In [44]:
# Import statements
import pandas as pd
import requests
from bs4 import BeautifulSoup
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import players
from nba_api.stats.static import teams

In [45]:
# Define the list of 30 NBA teams in a list
nba_teams = teams.get_teams()
all_teams  = []
for team in nba_teams:
    all_teams.append(team['full_name'])

In [46]:
all_teams

['Atlanta Hawks',
 'Boston Celtics',
 'Cleveland Cavaliers',
 'New Orleans Pelicans',
 'Chicago Bulls',
 'Dallas Mavericks',
 'Denver Nuggets',
 'Golden State Warriors',
 'Houston Rockets',
 'Los Angeles Clippers',
 'Los Angeles Lakers',
 'Miami Heat',
 'Milwaukee Bucks',
 'Minnesota Timberwolves',
 'Brooklyn Nets',
 'New York Knicks',
 'Orlando Magic',
 'Indiana Pacers',
 'Philadelphia 76ers',
 'Phoenix Suns',
 'Portland Trail Blazers',
 'Sacramento Kings',
 'San Antonio Spurs',
 'Oklahoma City Thunder',
 'Toronto Raptors',
 'Utah Jazz',
 'Memphis Grizzlies',
 'Washington Wizards',
 'Detroit Pistons',
 'Charlotte Hornets']

In [47]:
# Extract year from the Date column of injuries
injuries = pd.read_csv('injuries.csv')
injuries = injuries.drop(['Unnamed: 0', 'Acquired'], axis=1).dropna()
injuries['Date'] = pd.to_datetime(injuries['Date'], errors='coerce')
injuries['Year'] = injuries['Date'].dt.year

# Filter by the short timeframe years that we need from 2018 to 2023
short_injuries = injuries[(injuries['Year'] >= 2018) & (injuries['Year'] <= 2023)]

short_injuries

Unnamed: 0,Date,Team,Relinquished,Notes,Year
26718,2018-01-01,Lakers,Thomas Bryant,placed on IL,2018
26720,2018-01-01,Raptors,Bruno Caboclo,placed on IL,2018
26721,2018-01-01,Raptors,Lorenzo Brown,placed on IL,2018
26723,2018-01-02,Cavaliers,Ante Zizic,placed on IL,2018
26727,2018-01-02,Kings,Frank Mason,placed on IL with bruised right heel,2018
...,...,...,...,...,...
37624,2023-04-09,Grizzlies,Ja Morant,placed on IL with right hand injury,2023
37625,2023-04-09,Grizzlies,Jaren Jackson Jr.,placed on IL with sore left elbow,2023
37630,2023-04-09,Kings,Davion Mitchell,placed on IL with sore left knee,2023
37631,2023-04-09,Knicks,Trevor Keels,placed on IL with illness,2023


In [74]:
# Pain Metrics

# Feature 1 - Injury Type / Severity
# Keyword mapping through feature engineering
def map_severity_score(note):
    note = str(note).lower()
    if 'surgery' in note or 'ruptured' in note:
        return 5
    elif 'torn' in note or 'acl' in note or 'achilles' in note:
        return 4
    elif 'fracture' in note or 'broken' in note:
        return 3
    elif 'strain' in note or 'sprain' in note:
        return 2
    elif 'sore' in note or 'tightness' in note:
        return 1
    else:
        return 1

short_injuries['Severity'] = short_injuries['Notes'].apply(map_severity_score)

# Feature 2 - Position / Star Impact
# Count how many times each player appears in the 'Relinquished' column
player_freq = short_injuries['Relinquished'].value_counts()

# If player appears more than usual, assume star player because of the notion that they play frequently on their respective team
def estimate_impact(player):
    count = player_freq.get(player, 1)
    if count >= 15:
        return 3  
    elif count >= 5:
        return 2  
    else:
        return 1  

short_injuries['PlayerImpact'] = short_injuries['Relinquished'].apply(estimate_impact)

# Feature 3 - Time Missed
# Days missed based on keyword feature engineering similar to severity score
def estimate_days_missed(note):
    note = str(note).lower()
    if 'season-ending' in note or 'out for season' in note:
        return 10
    elif 'out indefinitely' in note:
        return 8
    elif 'multiple weeks' in note or 'surgery' in note:
        return 6
    elif 'week-to-week' in note:
        return 4
    elif 'day-to-day' in note or 'sore' in note or 'illness' in note:
        return 2
    else:
        return 3  # Default value if none of a keywords are a match

short_injuries['DaysMissedScore'] = short_injuries['Notes'].apply(estimate_days_missed)

# Calculate pain score based on a made up formula and assigning certain weights based on the significance of the metric
short_injuries['PainScore'] = (
    short_injuries['Severity'] * 1.5 +
    short_injuries['PlayerImpact'] * 1.0 +
    short_injuries['DaysMissedScore'] * 1.2
)

# Group and aggregate by team and year
pain_summary = short_injuries.groupby(['Team', 'Year']).agg(
    TotalInjuries=('Relinquished', 'count'),
    TotalPainScore=('PainScore', 'sum'),
    AvgSeverity=('Severity', 'mean'),
    AvgTimeMissed=('DaysMissedScore', 'mean')
).reset_index()

pain_summary = pain_summary.sort_values('TotalPainScore', ascending=True)
pain_summary

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  short_injuries['Severity'] = short_injuries['Notes'].apply(map_severity_score)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  short_injuries['PlayerImpact'] = short_injuries['Relinquished'].apply(estimate_impact)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  short_injuries['DaysMissedScore'] = sho

Unnamed: 0,Team,Year,TotalInjuries,TotalPainScore,AvgSeverity,AvgTimeMissed
155,Suns,2023,5,35.6,1.200000,2.600000
167,Timberwolves,2023,8,52.7,1.125000,2.625000
161,Thunder,2023,8,61.9,1.375000,2.750000
62,Hornets,2020,9,65.4,1.555556,2.444444
83,Knicks,2023,10,68.0,1.200000,2.500000
...,...,...,...,...,...,...
148,Spurs,2022,71,519.0,1.098592,2.816901
3,76ers,2021,71,529.4,1.183099,2.915493
135,Raptors,2021,65,532.4,1.200000,3.415385
129,Pistons,2021,72,589.1,1.347222,3.305556
