In [1]:
from nba_api.stats.endpoints import leaguegamefinder, leaguedashplayerstats, commonteamroster, boxscoretraditionalv2
from nba_api.stats.static import teams
import pandas as pd
import time
from sklearn.preprocessing import MinMaxScaler

In [2]:
team_id = '1610612744'  # Golden State Warriors ID
team_id = int(team_id)
seasons = ["2018-19", "2019-20", "2020-21", "2021-22", "2022-23", "2023-24", "2024-25"] # Seasons to train on

In [3]:
all_games = []

In [4]:
for season in seasons:
    print(f"Fetching data for season {season}...")
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id, season_nullable=season)
    season_games = gamefinder.get_data_frames()[0]
    season_games["SEASON"] = season
    all_games.append(season_games)
    time.sleep(0.5)

Fetching data for season 2018-19...
Fetching data for season 2019-20...
Fetching data for season 2020-21...
Fetching data for season 2021-22...
Fetching data for season 2022-23...
Fetching data for season 2023-24...
Fetching data for season 2024-25...


In [5]:
df = pd.concat(all_games, ignore_index=True)

In [6]:
df['WL'] = df['WL'].apply(lambda x: 1 if x == 'W' else 0)

In [7]:
df['GAME_DATE'] = pd.to_datetime(df['GAME_DATE'])
df = df.sort_values('GAME_DATE').reset_index(drop=True)
df['DaysSinceLast'] = df['GAME_DATE'].diff().dt.days.fillna(0)

In [8]:
df['IS_HOME_GAME'] = df['MATCHUP'].apply(lambda x: 1 if "vs." in x else 0)

In [9]:
def get_team_dict():
    team_data = teams.get_teams()
    return {team['id']: team['abbreviation'] for team in team_data}

In [10]:
def calculate_player_ratings(season):
    df = leaguedashplayerstats.LeagueDashPlayerStats(season=season).get_data_frames()[0]
    metrics = ['PTS', 'REB', 'AST', 'STL', 'BLK', 'PLUS_MINUS', 'NBA_FANTASY_PTS']
    weights = {
        'PTS': 0.4,
        'REB': 0.2,
        'AST': 0.15,
        'STL': 0.1,
        'BLK': 0.1,
        'PLUS_MINUS': 0.05,
        'NBA_FANTASY_PTS': 0.3
    }
    
    total_weight = sum(weights.values())
    weights = {k: v / total_weight for k, v in weights.items()}
    
    scaler = MinMaxScaler()
    df[metrics] = scaler.fit_transform(df[metrics])
    
    df['PLAYER_RATING'] = sum(df[metric] * weight for metric, weight in weights.items())
    
    df['PLAYER_RATING'] = (df['PLAYER_RATING'] * 100).round(2)
    
    df = df.sort_values(by='PLAYER_RATING', ascending=False).reset_index(drop=True)
    return df[['PLAYER_NAME', 'PLAYER_RATING']]

In [11]:
def generate_player_ratings(seasons):
    player_ratings = {}
    for season in seasons:
        ratings_df = calculate_player_ratings(season)
        player_ratings[season] = ratings_df.set_index('PLAYER_NAME')['PLAYER_RATING'].to_dict()
    return player_ratings

In [12]:
player_ratings = generate_player_ratings(seasons)

In [13]:
def find_injured_players(game_id, season, team_abbr):
    team_dict = get_team_dict()

    reverse_team_dict = {abbr: tid for tid, abbr in team_dict.items()}

    if team_abbr not in reverse_team_dict:
        raise ValueError(f"Team abbreviation '{team_abbr}' not found.")

    team_id = reverse_team_dict[team_abbr]

    roster = commonteamroster.CommonTeamRoster(season=season, team_id=team_id)
    roster_data = roster.get_data_frames()[0]
    roster_players = roster_data['PLAYER'].tolist()

    roster_players_normalized = [normalize_name(player) for player in roster_players]

    box_score = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id=game_id)
    box_score_data = box_score.get_data_frames()[0]

    players_played = box_score_data['PLAYER_NAME'].tolist()
    players_played_normalized = [normalize_name(player) for player in players_played]

    injured_players = [
        roster_players[i]
        for i, player in enumerate(roster_players_normalized)
        if player not in players_played_normalized
    ]

    return injured_players


In [14]:
def normalize_name(name):
    return name.strip().lower()

In [15]:
def calculate_injury_score_sequential(df, player_ratings, find_injured_players, team_id):
    team_dict = get_team_dict()
    if team_id not in team_dict:
        raise ValueError(f"Team ID '{team_id}' not found.")
    team_abbr = team_dict[team_id]

    injury_scores = []

    for _, row in df.iterrows():
        game_id = row['GAME_ID']
        season = row['SEASON']

        matchup = row['MATCHUP']
        opponent_abbr = matchup.split()[-1]

        try:
            team_injured = find_injured_players(game_id, season, team_abbr)
            opponent_injured = find_injured_players(game_id, season, opponent_abbr)

            team_injury_rating = sum(player_ratings[season].get(player, 0) for player in team_injured)
            opponent_injury_rating = sum(player_ratings[season].get(player, 0) for player in opponent_injured)

            injury_score = team_injury_rating - opponent_injury_rating
        except Exception as e:
            print(f"Error processing GAME_ID {game_id}: {e}")
            injury_score = None

        injury_scores.append(injury_score)

        time.sleep(0.5)
        print(f"Successfully parsed {game_id}")

    df['INJURY_SCORE'] = injury_scores
    return df

In [16]:
df = calculate_injury_score_sequential(df, player_ratings, find_injured_players, team_id)

Successfully parsed 0011800005
Successfully parsed 0011800039
Successfully parsed 0011800054
Successfully parsed 0011800065
Successfully parsed 0011800079
Successfully parsed 0021800002
Successfully parsed 0021800024
Successfully parsed 0021800038
Successfully parsed 0021800047
Successfully parsed 0021800062
Successfully parsed 0021800068
Successfully parsed 0021800083
Successfully parsed 0021800091
Successfully parsed 0021800108
Successfully parsed 0021800124
Successfully parsed 0021800147
Successfully parsed 0021800166
Successfully parsed 0021800182
Successfully parsed 0021800198
Successfully parsed 0021800201
Successfully parsed 0021800213
Successfully parsed 0021800232
Successfully parsed 0021800238
Successfully parsed 0021800264
Successfully parsed 0021800277
Successfully parsed 0021800285
Successfully parsed 0021800300
Successfully parsed 0021800316
Successfully parsed 0021800331
Successfully parsed 0021800344
Successfully parsed 0021800355
Successfully parsed 0021800377
Successf

In [17]:
print("Done")

Done


In [19]:
df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,AST,STL,BLK,TOV,PF,PLUS_MINUS,SEASON,DaysSinceLast,IS_HOME_GAME,INJURY_SCORE
0,12018,1610612744,GSW,Golden State Warriors,0011800005,2018-09-29,GSW vs. MIN,0,241,110,...,33,9,8,17,23,-4.0,2018-19,0.0,1,-35.15
1,12018,1610612744,GSW,Golden State Warriors,0011800039,2018-10-05,GSW vs. SAC,1,241,122,...,29,8,6,12,18,28.0,2018-19,6.0,1,-24.66
2,12018,1610612744,GSW,Golden State Warriors,0011800054,2018-10-08,GSW vs. PHX,0,239,109,...,26,5,6,18,26,-8.0,2018-19,3.0,1,-124.33
3,12018,1610612744,GSW,Golden State Warriors,0011800065,2018-10-10,GSW @ LAL,0,241,113,...,23,11,3,25,38,-10.0,2018-19,2.0,0,-53.67
4,12018,1610612744,GSW,Golden State Warriors,0011800079,2018-10-12,GSW vs. LAL,0,241,105,...,25,8,4,16,30,-18.0,2018-19,2.0,1,-72.58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
590,22024,1610612744,GSW,Golden State Warriors,0022400446,2024-12-30,GSW vs. CLE,0,241,95,...,20,5,8,14,14,-18.0,2024-25,2.0,1,21.89
591,22024,1610612744,GSW,Golden State Warriors,0022400467,2025-01-02,GSW vs. PHI,1,242,139,...,41,5,8,11,22,34.0,2024-25,3.0,1,-50.95
592,22024,1610612744,GSW,Golden State Warriors,0022400485,2025-01-04,GSW vs. MEM,1,241,121,...,32,13,5,18,18,8.0,2024-25,2.0,1,-4.08
593,22024,1610612744,GSW,Golden State Warriors,0022400492,2025-01-05,GSW vs. SAC,0,240,99,...,22,10,3,22,21,-30.0,2024-25,1.0,1,1.80


In [20]:
df.to_csv("Warriors_7_Seasons_Data.csv", index=False)

In [21]:
df

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,AST,STL,BLK,TOV,PF,PLUS_MINUS,SEASON,DaysSinceLast,IS_HOME_GAME,INJURY_SCORE
0,12018,1610612744,GSW,Golden State Warriors,0011800005,2018-09-29,GSW vs. MIN,0,241,110,...,33,9,8,17,23,-4.0,2018-19,0.0,1,-35.15
1,12018,1610612744,GSW,Golden State Warriors,0011800039,2018-10-05,GSW vs. SAC,1,241,122,...,29,8,6,12,18,28.0,2018-19,6.0,1,-24.66
2,12018,1610612744,GSW,Golden State Warriors,0011800054,2018-10-08,GSW vs. PHX,0,239,109,...,26,5,6,18,26,-8.0,2018-19,3.0,1,-124.33
3,12018,1610612744,GSW,Golden State Warriors,0011800065,2018-10-10,GSW @ LAL,0,241,113,...,23,11,3,25,38,-10.0,2018-19,2.0,0,-53.67
4,12018,1610612744,GSW,Golden State Warriors,0011800079,2018-10-12,GSW vs. LAL,0,241,105,...,25,8,4,16,30,-18.0,2018-19,2.0,1,-72.58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
590,22024,1610612744,GSW,Golden State Warriors,0022400446,2024-12-30,GSW vs. CLE,0,241,95,...,20,5,8,14,14,-18.0,2024-25,2.0,1,21.89
591,22024,1610612744,GSW,Golden State Warriors,0022400467,2025-01-02,GSW vs. PHI,1,242,139,...,41,5,8,11,22,34.0,2024-25,3.0,1,-50.95
592,22024,1610612744,GSW,Golden State Warriors,0022400485,2025-01-04,GSW vs. MEM,1,241,121,...,32,13,5,18,18,8.0,2024-25,2.0,1,-4.08
593,22024,1610612744,GSW,Golden State Warriors,0022400492,2025-01-05,GSW vs. SAC,0,240,99,...,22,10,3,22,21,-30.0,2024-25,1.0,1,1.80
