In [1]:
import pandas as pd

In [6]:
# Load the dataset (A manually edited version of the dataset found of Kaggle that removed columns and added a 'Season' column)

data = pd.read_csv("NBA Player Injury Stats.csv")

In [7]:
# Ensure the 'Date' column is formatted correctly 
data['Date'] = pd.to_datetime(data['Date'])

In [8]:
# Start a dictionary to keep track of players' inactive and active dates by season
player_status = {}

for index, row in data.iterrows():
    player = row['Inactive']
    team = row['Team']
    date = row['Date']
    season = row['Season']

# Looking for when a player is inactive (injured)
    if pd.notna(player):  
        if player not in player_status:
            player_status[player] = {}
        if season not in player_status[player]:
            player_status[player][season] = {'team': team, 'inactive_dates': [], 'active_dates': []}
        player_status[player][season]['inactive_dates'].append(date)
    else:
        player = row['Active']
        if pd.notna(player):  # When a player rejoins the lineup 
            if player in player_status and season in player_status[player]:
                player_status[player][season]['active_dates'].append(date)

In [9]:
# Calculate the total inactive days for each team for each season
team_inactive_days = {}

for player, seasons in player_status.items():
    for season, info in seasons.items():
        inactive_dates = sorted(info['inactive_dates'])
        active_dates = sorted(info['active_dates'])
        team = info['team']

        # Calculate the inactive periods
        total_inactive_days = 0
        for i, inactive_date in enumerate(inactive_dates):
            if i < len(active_dates):
                active_date = active_dates[i]
                inactive_period = (active_date - inactive_date).days
                total_inactive_days += inactive_period

        if (team, season) not in team_inactive_days:
            team_inactive_days[(team, season)] = 0
        team_inactive_days[(team, season)] += total_inactive_days

In [11]:
# Save as Dataframe
team_inactive_days_df = pd.DataFrame(list(team_inactive_days.items()), columns=['Team_Season', 'Total Inactive Days'])
team_inactive_days_df[['Team', 'Season']] = pd.DataFrame(team_inactive_days_df['Team_Season'].tolist(), index=team_inactive_days_df.index)
team_inactive_days_df.drop(columns=['Team_Season'], inplace=True)

In [15]:
team_inactive_days_df

Unnamed: 0,Total Inactive Days,Team,Season
0,383,Blazers,1997
1,499,Blazers,1998
2,327,Blazers,1999
3,504,Blazers,2000
4,397,Pacers,2005
...,...,...,...
798,602,Grizzlies,2022
799,377,Spurs,2023
800,219,Thunder,2023
801,131,Bulls,2019


In [ ]:
# Save as csv

In [16]:
team_inactive_days_df.to_csv("team_inactive_days.csv")

In [ ]:
# Change team names to full team names

In [17]:
data = pd.read_csv("team_inactive_days.csv")

unique_teams = data['Team'].unique()

print(unique_teams.tolist())

['Blazers', 'Pacers', 'Raptors', 'Heat', 'Celtics', 'Suns', 'Warriors', 'Grizzlies', 'Spurs', 'Knicks', 'Nets', 'Sonics', '76ers', 'Bucks', 'Rockets', 'Bullets', 'Bulls', 'Cavaliers', 'Wizards', 'Magic', 'Clippers', 'Kings', 'Mavericks', 'Hornets', 'Jazz', 'Lakers', 'Nuggets', 'Pistons', 'Timberwolves', 'Hawks', 'Bobcats', 'Thunder', 'Pelicans']


In [18]:
team_full_names = {
    'Blazers': 'Portland Trail Blazers',
    'Pacers': 'Indiana Pacers',
    'Raptors': 'Toronto Raptors',
    'Heat': 'Miami Heat',
    'Celtics': 'Boston Celtics',
    'Suns': 'Phoenix Suns',
    'Warriors': 'Golden State Warriors',
    'Grizzlies': 'Memphis Grizzlies',
    'Spurs': 'San Antonio Spurs',
    'Knicks': 'New York Knicks',
    'Nets': 'Brooklyn Nets',
    'Sonics': 'Seattle SuperSonics',
    '76ers': 'Philadelphia 76ers',
    'Bucks': 'Milwaukee Bucks',
    'Rockets': 'Houston Rockets',
    'Bullets': 'Washington Bullets',
    'Bulls': 'Chicago Bulls',
    'Cavaliers': 'Cleveland Cavaliers',
    'Wizards': 'Washington Wizards',
    'Magic': 'Orlando Magic',
    'Clippers': 'Los Angeles Clippers',
    'Kings': 'Sacramento Kings',
    'Mavericks': 'Dallas Mavericks',
    'Hornets': 'Charlotte Hornets',
    'Jazz': 'Utah Jazz',
    'Lakers': 'Los Angeles Lakers',
    'Nuggets': 'Denver Nuggets',
    'Pistons': 'Detroit Pistons',
    'Timberwolves': 'Minnesota Timberwolves',
    'Hawks': 'Atlanta Hawks',
    'Bobcats': 'Charlotte Bobcats',
    'Thunder': 'Oklahoma City Thunder',
    'Pelicans': 'New Orleans Pelicans'
}

In [19]:
data["Team"] = data["Team"].replace(team_full_names)

In [20]:
data

Unnamed: 0.1,Unnamed: 0,Total Inactive Days,Team,Season
0,0,383,Portland Trail Blazers,1997
1,1,499,Portland Trail Blazers,1998
2,2,327,Portland Trail Blazers,1999
3,3,504,Portland Trail Blazers,2000
4,4,397,Indiana Pacers,2005
...,...,...,...,...
798,798,602,Memphis Grizzlies,2022
799,799,377,San Antonio Spurs,2023
800,800,219,Oklahoma City Thunder,2023
801,801,131,Chicago Bulls,2019


In [21]:
data.to_csv("total_inactive_days_1.csv")

In [ ]:
# Changing team names for teams that were relocated and have a name change throughout the dataset

In [22]:
def replace_team (row):
    if row["Season"] < 2013 and row["Team"] == 'Brooklyn Nets':
        return 'New Jersey Nets'
    return row ['Team']

data["Team"] = data.apply(replace_team, axis=1)

In [24]:
data

Unnamed: 0.1,Unnamed: 0,Total Inactive Days,Team,Season
0,0,383,Portland Trail Blazers,1997
1,1,499,Portland Trail Blazers,1998
2,2,327,Portland Trail Blazers,1999
3,3,504,Portland Trail Blazers,2000
4,4,397,Indiana Pacers,2005
...,...,...,...,...
798,798,602,Memphis Grizzlies,2022
799,799,377,San Antonio Spurs,2023
800,800,219,Oklahoma City Thunder,2023
801,801,131,Chicago Bulls,2019


In [25]:
def replace_team_1 (row):
    if 2005 <= row["Season"] <= 2014 and row["Team"] == 'Charlotte Hornets':
        return 'Charlotte Bobcats'
    return row ['Team']

data["Team"] = data.apply(replace_team_1, axis=1)

In [ ]:
#Save as csv and merge with total stats file

In [34]:
data.to_csv("total_inactive_days_2.csv")

In [54]:
inactive = pd.read_csv("total_inactive_days_2.csv")

In [55]:
total_stats = pd.read_csv("final_stats_NBA.csv")

In [56]:
total = total_stats.merge(inactive, how='outer', on=["Team", "Season"])

In [58]:
total

Unnamed: 0,Team,G,MP Per Game,FG Per Game,FGA Per Game,3P Per Game,3PA Per Game,2P Per Game,2PA Per Game,FT Per Game,...,G (PF),W (PF),L (PF),G (P Car),W (P Car),L (P Car),top 3 conference,rk conference,count_playoff_games,Total Inactive Days
0,Atlanta Hawks,82,241.5,34.3,76.9,8.0,22.4,26.3,54.5,18.2,...,34,13,21,144,68,76,0.0,4.0,154.0,282
1,Atlanta Hawks,82,242.4,35.2,77.5,4.1,12.4,31.1,65.1,21.3,...,38,14,24,148,69,79,0.0,5.0,269.0,234
2,Atlanta Hawks,50,241.5,30.8,75.2,3.9,12.9,26.8,62.3,20.8,...,47,17,30,157,72,85,0.0,4.0,314.0,86
3,Atlanta Hawks,82,241.8,36.6,83.0,3.1,9.9,33.4,73.1,18.0,...,47,17,30,157,72,85,0.0,14.0,0.0,124
4,Atlanta Hawks,82,240.6,35.1,81.3,4.1,11.4,31.0,69.9,16.8,...,0,0,0,0,0,0,0.0,13.0,0.0,292
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
801,Washington Wizards,82,243.0,42.1,90.1,11.3,33.3,30.8,56.8,18.4,...,19,9,10,92,48,44,0.0,9.0,0.0,193
802,Washington Wizards,72,241.0,41.5,90.9,12.0,32.6,29.5,58.3,19.4,...,19,9,10,92,48,44,0.0,10.0,0.0,674
803,Washington Wizards,72,241.7,43.2,90.9,10.2,29.0,33.0,61.9,20.1,...,24,10,14,97,49,48,0.0,9.0,0.0,165
804,Washington Wizards,82,241.8,40.6,86.0,10.5,30.6,30.1,55.4,17.0,...,0,0,0,0,0,0,0.0,14.0,100.0,387


In [59]:
total.to_csv("combined_newest.csv")