## Import Required Libraries

In [None]:
!pip install --q -r requirements.txt

In [None]:
from nba_api.stats.endpoints import teamdetails, commonteamyears, teamgamelogs, playergamelogs, commonplayerinfo, boxscoreadvancedv3, boxscoretraditionalv3, boxscoremiscv3, scoreboardv2
from nba_api.live.nba.endpoints import playbyplay
import pandas as pd
from tqdm import tqdm
import os
import pandavro as pdx
import numpy as np
import time
from datetime import datetime, date
from warnings import filterwarnings
import json
filterwarnings('ignore')

## Get All Team IDs

In [None]:
if 'all_teams.parquet' not in os.listdir('Static Files'):
    teams = commonteamyears.CommonTeamYears()
    print('Established connection')
    teams = teams.get_dict()
    print('Retrieved list of dicts')
    team_ids = []
    for team in tqdm(teams['resultSets'][0]['rowSet']):
        team_id = team[1]
        team_name = team[2]
        start_year = team[3]
        end_year = team[4]
        team_ids.append((team_id, team_name, start_year, end_year))
        time.sleep(0.5)
    print('Established list of tuples')
    teams = pd.DataFrame(team_ids, columns=['team_id', 'year_founded', 'year_depreciated', 'abbreviation'])
    print('Created DataFrame')
    teams.to_parquet('Static Files/all_teams.parquet', index=False)

## Get All Team Details

In [None]:
if 'team_details.parquet' not in os.listdir('Static Files'):
    teams = pd.read_parquet('Static Files/all_teams.parquet')
    initial_df = []
    for team in tqdm(teams['team_id']):
        get_coaches = teamdetails.TeamDetails(team_id=team, timeout=60)
        next_df = get_coaches.get_data_frames()[0]
        initial_df.append(next_df)
        time.sleep(0.5)
    output_df = pd.concat(initial_df)
    output_df.to_parquet('Static Files/team_details.parquet')

## Get All Team Game Logs

In [None]:
if 'team_game_logs.parquet' not in os.listdir('Static Files'):
    start = 0
    end = 1
    initial_df = []
    for integer in tqdm(range(1,25)):
        if end < 10:
            get_game_stats = teamgamelogs.TeamGameLogs(season_nullable=f'200{start}-0{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)
            start += 1
            end += 1
        else:
            start += 1
            end += 1
            get_game_stats = teamgamelogs.TeamGameLogs(season_nullable=f'20{start}-{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)

    output_df = pd.concat(initial_df)
    output_df.to_parquet('Static Files/team_game_logs.parquet')

## Get All Current Player Injury Reports

In [None]:
if 'player_injury_reports.avro' not in os.listdir('Static Files'):
    player_injuries = pd.concat(pd.read_html('https://www.espn.com/nba/injuries')).fillna('No Info Available')
    player_injuries['date_updated'] = datetime.today()
    pdx.to_avro('Static Files/player_injury_reports.avro', player_injuries)
elif ('player_injury_reports.avro' in os.listdir('Static Files')):
    player_injuries = pdx.read_avro('Static Files/player_injury_reports.avro')
    if player_injuries['date_updated'].all() != datetime.today():
        player_injuries = pd.concat(pd.read_html('https://www.espn.com/nba/injuries')).fillna('No Info Available')
        player_injuries['date_updated'] = datetime.today()
        pdx.to_avro('Static Files/player_injury_reports.avro', player_injuries)

## Get All Player Game Logs

In [None]:
if 'player_game_logs.parquet' not in os.listdir('Static Files'):
    start = 0
    end = 1
    initial_df = []
    for integer in tqdm(range(1,25)):
        if end < 10:
            get_game_stats = playergamelogs.PlayerGameLogs(season_nullable=f'200{start}-0{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)
            start += 1
            end += 1
        else:
            start += 1
            end += 1
            get_game_stats = playergamelogs.PlayerGameLogs(season_nullable=f'20{start}-{end}')
            next_df = get_game_stats.get_data_frames()[0]
            initial_df.append(next_df)
            time.sleep(0.5)

    output_df = pd.concat(initial_df)
    output_df.to_parquet('Static Files/player_game_logs.parquet')

## Get Player Statuses

In [None]:
players = pd.read_parquet('Static Files/player_game_logs.parquet')[['PLAYER_ID']].drop_duplicates()
statuses = pdx.read_avro('Static Files/player_statuses.avro')

df3 = players.merge(statuses, left_on='PLAYER_ID', right_on='PERSON_ID', how='left', indicator=True)
df = df3.loc[df3['_merge'] == 'left_only', 'PLAYER_ID']
d = players[players['PLAYER_ID'].isin(df)]
d = statuses.merge(d, left_on='PERSON_ID', right_on='PLAYER_ID', how='right')
if 'player_statuses.avro' not in os.listdir('Static Files'):
    for player in tqdm(players):
        player_status = commonplayerinfo.CommonPlayerInfo(player_id=player).get_data_frames()[0][['PERSON_ID', 'ROSTERSTATUS']]
        pdx.to_avro('Static Files/player_statuses.avro', player_status, append=True)
        time.sleep(0.5)
elif ('player_statuses.avro' in os.listdir('Static Files')) & len(d[['PLAYER_ID']].drop_duplicates()['PLAYER_ID']) > 0:
    statuses = pdx.read_avro('Static Files/player_statuses.avro')

    # carrying out anti join using merge method
    df3 = players.merge(statuses, left_on='PLAYER_ID', right_on='PERSON_ID', how='left', indicator=True)
    df = df3.loc[df3['_merge'] == 'left_only', 'PLAYER_ID']
    d = players[players['PLAYER_ID'].isin(df)]
    d = statuses.merge(d, left_on='PERSON_ID', right_on='PLAYER_ID', how='right')
    for player in tqdm(d[['PLAYER_ID']].drop_duplicates()['PLAYER_ID']):
        player_status = commonplayerinfo.CommonPlayerInfo(player_id=player).get_data_frames()[0][['PERSON_ID', 'ROSTERSTATUS']]
        pdx.to_avro('Static Files/player_statuses.avro', player_status, append=True)
        time.sleep(0.5)

## Get Current Player Game Logs

In [None]:
if 'current_player_game_logs.parquet' not in os.listdir('Static Files'):
    player_game_logs = pd.read_parquet('Static Files/player_game_logs.parquet')
    current_player_game_logs = player_game_logs.merge(statuses, left_on='PLAYER_ID', right_on='PERSON_ID').query('ROSTERSTATUS == "Active"').to_parquet('Static Files/current_player_game_logs.parquet')

## Test Out Live Play-By-Play End Point

In [None]:
# Game ID for the live game you are tracking (replace with actual game ID)
game_id = "0022301196"

# DataFrame to store all player stats during the game
player_stats_df = pd.DataFrame()

# Track the last processed play to avoid duplicates
last_processed_play = None
pbp_data = playbyplay.PlayByPlay(game_id).get_dict()
plays = pbp_data['game']['actions']

# Sort plays by actionNumber to process them in order
plays.sort(key=lambda x: x['actionNumber'])

# Print the total number of plays
print(f"Total number of plays: {len(plays)}")

for play in tqdm(plays, desc="Processing plays"):
    action_number = play['actionNumber']
    player_id = play.get('personId', None)
    action_type = play.get('actionType', '')
    clock = play.get('clock', '')
    description = play.get("description", "")
    descriptor = play.get("descriptor", "")
    period = play.get("period", "")
    periodType = play.get("periodType", "")
    qualifiers = play.get("qualifiers", "")
    shotDistance = play.get("shotDistance", "")
    shotResult = play.get("shotResult", "")
    side = play.get("side", "")

    # Only process the play if it's new (not processed yet)
    if last_processed_play is None or action_number > last_processed_play:

        try:
            # Fetch traditional stats
            traditional_boxscore = boxscoretraditionalv3.BoxScoreTraditionalV3(game_id=game_id)
            traditional_stats = traditional_boxscore.get_data_frames()[0]

            # Fetch advanced stats
            advanced_boxscore = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id=game_id)
            advanced_stats = advanced_boxscore.get_data_frames()[0]

            # Fetch miscellaneous stats
            misc_boxscore = boxscoremiscv3.BoxScoreMiscV3(game_id=game_id)
            misc_stats = misc_boxscore.get_data_frames()[0]

            # Find the stats for the player involved in the play
            traditional_player_stats = traditional_stats[traditional_stats['personId'] == player_id]
            advanced_player_stats = advanced_stats[advanced_stats['personId'] == player_id]
            misc_player_stats = misc_stats[misc_stats['personId'] == player_id]

            if not traditional_player_stats.empty and not advanced_player_stats.empty and not misc_player_stats.empty:
                # Extract all columns from traditional stats
                traditional_columns = traditional_player_stats.columns.tolist()
                traditional_values = traditional_player_stats.iloc[0].tolist()

                # Extract all columns from advanced stats
                advanced_columns = advanced_player_stats.columns.tolist()
                advanced_values = advanced_player_stats.iloc[0].tolist()

                # Extract all columns from miscellaneous stats
                misc_columns = misc_player_stats.columns.tolist()
                misc_values = misc_player_stats.iloc[0].tolist()

                # Combine all stats into one dictionary
                player_data = {
                    'player_id': player_id,
                    'action_type': action_type,
                    'game_id': game_id,
                    'clock': clock,
                    'description': description,
                    'descriptor': descriptor,
                    'period': period,
                    'period_type': periodType,
                    'qualifiers': qualifiers,
                    'shot_distance': shotDistance,
                    'shot_result': shotResult,
                    'side': side
                }

                # Add traditional stats to player_data
                player_data.update(dict(zip(traditional_columns, traditional_values)))

                # Add advanced stats to player_data
                player_data.update(dict(zip(advanced_columns, advanced_values)))

                # Add miscellaneous stats to player_data
                player_data.update(dict(zip(misc_columns, misc_values)))

                # Append the new data to the DataFrame
                player_stats_df = pd.concat([player_stats_df, pd.DataFrame([player_data])], ignore_index=True)

        except Exception as e:
            tqdm.write(f"Error processing play {action_number}: {e}")

        # Update the last processed play
        last_processed_play = action_number

        # Sleep before fetching the next batch of plays (adjust this time to API rate limit)
        time.sleep(0.5)



In [None]:
try:
    player_stats_df['team_name'] = player_stats_df['teamCity'] + " " + player_stats_df['teamName']
    player_stats_df.drop(columns=['gameId', 'teamTricode', 'teamSlug', 'personId', 'firstName', 'familyName', 'playerSlug', 'estimatedOffensiveRating', 'estimatedDefensiveRating', 'estimatedNetRating', 'offensiveReboundPercentage', 'defensiveReboundPercentage', 'turnoverRatio', 'estimatedUsagePercentage', 'estimatedPace', 'jerseyNum', 'teamCity', 'teamName'], inplace=True)
except KeyError:
    player_stats_df.rename(columns={'teamId':'team_id', 'nameI':'player_name', 'fieldGoalsMade':'fg_made', 'fieldGoalsAttempted':'fg_attempts', 'fieldGoalPercentage':'fg_percentage','threePointersMade':'fg3_made', 'threePointersAttempted':'fg3_attempts', 'threePointersPercentage':'fg3_percentage', 'freeThrowsMade':'ft_made', 'freeThrowsAttempted':'ft_attempts', 'freeThrowPercentage':'ft_percentage', 'reboundsOffensive':'offensive_rebounds', 'reboundsDefensive':'defensive_rebounds', 'reboundsTotal':'total_rebounds', 'foulsPersonal':'personal_fouls', 'plusMinusPoints':'plus_minus', 'offensiveRating':'offensive_rating', 'defensiveRating':'defensive_rating', 'netRating':'net_rating', 'assistPercentage':'assist_percentage', 'assistToTurnover':'assist_turnover_ratio', 'assistRatio':'assist_ratio', 'reboundPercentage':'rebound_percentage', 'effectiveFieldGoalPercentage':'eff_fg_percentage', 'trueShootingPercentage':'true_shooting_percentage', 'usagePercentage':'usage_percentage', 'pointsOffTurnovers':'turnover_points', 'pointsSecondChance':'second_chance_points', 'pointsFastBreak':'fast_break_points', 'pointsPaint':'paint_points', 'blocksAgainst':'blocks_against', 'foulsDrawn':'fouls_drawn'}, inplace=True)

    player_stats_df['clock'] = player_stats_df['clock'].replace('PT', '').replace('M', '').replace('.00S', '')
    pd.set_option("display.max_columns", 0)
    best_players = player_stats_df.groupby('team_name')['PIE'].idxmax()
    best_players = player_stats_df.loc[best_players, ['player_name', 'team_name', 'PIE']]
    team_1 = best_players['team_name'].iloc[0]
    player_1 = best_players['player_name'].iloc[0]
    player_1_pie = best_players['PIE'].iloc[0]
    team_2 = best_players['team_name'].iloc[1]
    player_2 = best_players['player_name'].iloc[1]
    player_2_pie = best_players['PIE'].iloc[1]
    display(player_stats_df.head())
    print(f'{team_1}: {player_1} ({player_1_pie}) | {team_2}: {player_2} ({player_2_pie})')

In [None]:
start = datetime.strptime("2024-10-04", "%Y-%m-%d")
end = datetime.strptime("2025-04-15", "%Y-%m-%d")

date_range = pd.date_range(start=start, end=end).to_list()

games = {}
# Assuming 'range' contains the list of dates for the 2024-25 season
for date in tqdm(date_range):
    json_string = json.loads(scoreboardv2.ScoreboardV2(game_date=date).get_json())

    i = 0  # resultSets index for 'GameHeader'
    headers = json_string['resultSets'][i].get('headers', '')

    # Iterate through each game in the 'rowSet'
    for game in json_string['resultSets'][i]['rowSet']:
        game_id = game[2]  # Index 2 is the 'GAME_ID'

        # Add game_id to the list of games for the specific date
        if date in games:
            games[date].append(game_id)
        else:
            games[date] = [game_id]

    time.sleep(0.5)  # Delay to avoid rate limits
games = {key.strftime('%Y-%m-%d'): value for key, value in games.items()}

In [None]:
if '2024-25_game_ids.parquet' not in os.listdir('Static Files')
    games_2025 = pd.DataFrame(list(games.items()), columns=['game_date', 'game_id'])
    games_2025 = games_2025.explode(column='game_id')
    games_2025.to_parquet('Static Files/2024-25_game_ids.parquet')

In [None]:
game_log_df = pd.read_parquet('Static Files/current_player_game_logs.parquet')
game_log_df2 = pd.read_parquet('Static Files/team_game_logs.parquet')

def simulation(game_id):
    print(f"Starting mock live updates for game: {game_id}")
    play_by_play = playbyplay.PlayByPlay(game_id)
    plays = play_by_play.get_dict()['game']['actions']
    df = pd.DataFrame(columns=['game_id','period','period_time_remaining','description','home_score','away_score'])
    plays_df = []
    for _ in range(len(plays)):
        if plays:
            latest_play = plays[_]
            try:
                period = latest_play['period']
                period_time_remaining = latest_play['clock'].replace('PT', '').replace('M', ':')[:5]
                description = latest_play['description']
                home_score = latest_play['scoreHome']
                away_score = latest_play['scoreAway']
                df2 = pd.DataFrame({'game_id':game_id, 'period':period, 'period_time_remaining':period_time_remaining, 'description':description, 'home_score':home_score, 'away_score':away_score}, index=[0])
                plays_df.append(df2)
                # print(f"Q{period} {game_clock}: {description} | Score: {home_score} - {away_score}")
            except KeyError:
                pass
        else:
            print("no plays yet...")
    output = pd.concat(plays_df, ignore_index=True)
    return output
current_game = simulation('0022000196')
current_game

## Get Advanced Player Game Stats

In [None]:

if 'current_player_advanced_game_stats.parquet' not in os.listdir('Static Files'):

    # Filter games for the 2023-24 season
    games = game_log_df[game_log_df['SEASON_YEAR'] == '2023-24']['GAME_ID'].unique()

    # Iterate over each game
    for game_id in tqdm(games):
        # Fetch box score data for the current game
        game = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id).get_data_frames()[0]

        # Iterate over each player in the game
        for idx, player_stats in game.iterrows():
            # Extract relevant fields
            team_id = player_stats['teamId']
            team_name = player_stats['teamCity'] + " " + player_stats['teamName']
            player_id = player_stats['personId']
            player_name = player_stats['firstName'] + " " + player_stats['familyName']
            position = player_stats['position']
            comment = player_stats['comment']
            offensive_rating = player_stats['offensiveRating']
            defensive_rating = player_stats['defensiveRating']
            net_rating = player_stats['netRating']
            assist_percentage = player_stats['assistPercentage']
            assist_turnover_ratio = player_stats['assistToTurnover']
            assist_ratio = player_stats['assistRatio']
            usage = player_stats['usagePercentage']
            possessions = player_stats['possessions']
            eff_fg = player_stats['effectiveFieldGoalPercentage']
            true_shooting = player_stats['trueShootingPercentage']
            impact_score = player_stats['PIE']

            # Create a DataFrame for the current player
            df_row = pd.DataFrame({
                'game_id': game_id,
                'team_id': team_id,
                'team_name': team_name,
                'player_id': player_id,
                'player_name': player_name,
                'position': position,
                'comment': comment,
                'offensive_rating': offensive_rating,
                'defensive_rating': defensive_rating,
                'net_rating': net_rating,
                'assist_percentage': assist_percentage,
                'assist_turnover_ratio': assist_turnover_ratio,
                'assist_ratio': assist_ratio,
                'usage_percentage': usage,
                'possessions': possessions,
                'effective_fg_percentage': eff_fg,
                'true_shooting_percentage': true_shooting,
                'player_impact_score': impact_score
            }, index=[0])

            # Append the DataFrame row to the list
            df_row.to_csv('player_advanced_stats.csv', mode='a', index=False, header=False)
            time.sleep(0.5)

        # Sleep to avoid hitting the rate limit
        time.sleep(0.5)

# manually add csv headers and then convert csv to parquet
# df = pd.read_csv('player_advanced_stats.csv')
# df.to_parquet('Static Files/current_player_advanced_game_stats.parquet')

## Get Advanced Team Game Stats

In [None]:

if 'current_team_advanced_game_stats.parquet' not in os.listdir('Static Files'):

    # Filter games for the 2023-24 season
    games = game_log_df[game_log_df['SEASON_YEAR'] == '2023-24']['GAME_ID'].unique()

    # Iterate over each game
    for game_id in tqdm(games):
        # Fetch box score data for the current game
        game = boxscoreadvancedv3.BoxScoreAdvancedV3(game_id).get_data_frames()[1]

        # Iterate over each player in the game
        for idx, player_stats in game.iterrows():
            # Extract relevant fields
            team_id = player_stats['teamId']
            team_name = player_stats['teamCity'] + " " + player_stats['teamName']
            offensive_rating = player_stats['offensiveRating']
            defensive_rating = player_stats['defensiveRating']
            net_rating = player_stats['netRating']
            assist_percentage = player_stats['assistPercentage']
            assist_turnover_ratio = player_stats['assistToTurnover']
            assist_ratio = player_stats['assistRatio']
            possessions = player_stats['possessions']
            eff_fg = player_stats['effectiveFieldGoalPercentage']
            true_shooting = player_stats['trueShootingPercentage']

            # Create a DataFrame for the current player
            df_row = pd.DataFrame({
                'game_id': game_id,
                'team_id': team_id,
                'team_name': team_name,
                'offensive_rating': offensive_rating,
                'defensive_rating': defensive_rating,
                'net_rating': net_rating,
                'assist_percentage': assist_percentage,
                'assist_turnover_ratio': assist_turnover_ratio,
                'assist_ratio': assist_ratio,
                'possessions': possessions,
                'effective_fg_percentage': eff_fg,
                'true_shooting_percentage': true_shooting,
            }, index=[0])

            # Append the DataFrame row to the list
            df_row.to_csv('team_advanced_stats.csv', mode='a', index=False, header=False)
            time.sleep(0.5)

        # Sleep to avoid hitting the rate limit
        time.sleep(0.5)

# manually add csv headers and then convert csv to parquet
# df = pd.read_csv('team_advanced_stats.csv')
# df.to_parquet('Static Files/current_team_advanced_game_stats.parquet')

## Test out ML capabilities
### In this section, I will create two base models:
#### - One to predict the game winner
#### - One to predict the MVP for the game

### Get base dataset

In [None]:
df = pd.read_parquet('Static Files/current_player_advanced_game_stats.parquet')
df_columns = ['game_id', 'team_id', 'player_id']
game_log_columns = ['GAME_ID', 'TEAM_ID', 'PLAYER_ID']
game_log_columns2 = ['GAME_ID', 'TEAM_ID']
for column in df_columns:
    df[column] = df[column].astype(str)
for column in game_log_columns:
    game_log_df[column] = game_log_df[column].astype(str)
for column in game_log_columns2:
    game_log_df2[column] = game_log_df2[column].astype(str)

game_log_df['GAME_ID'] = game_log_df['GAME_ID'].str[2:]
game_log_df2['GAME_ID'] = game_log_df2['GAME_ID'].str[2:]

player_game_stats = df.merge(game_log_df, left_on=['game_id', 'team_id', 'player_id'], right_on=['GAME_ID', 'TEAM_ID', 'PLAYER_ID'], how='left')
player_game_stats = player_game_stats.merge(game_log_df2, left_on=['game_id', 'team_id'], right_on=['GAME_ID', 'TEAM_ID'], how='left', suffixes=('_player', '_team'))

# column operations
player_game_stats = player_game_stats[player_game_stats.columns.drop(list(player_game_stats.filter(regex='_RANK_team')))]
player_game_stats = player_game_stats[player_game_stats.columns.drop(list(player_game_stats.filter(regex='_RANK_player')))]
player_game_stats.columns = [x.lower() for x in player_game_stats.columns]

player_game_stats['is_home'] = np.where(player_game_stats['matchup_player'].str.contains('vs.'), 1, 0)
player_game_stats = player_game_stats.loc[:, ~player_game_stats.columns.duplicated()]
player_game_stats.dropna(subset=['pts_player'], inplace=True)
player_game_stats

In [None]:
team_advanced_columns = ['game_id', 'team_id']


team_advanced_stats = pd.read_parquet('Static Files/current_team_advanced_game_stats.parquet')

for column in team_advanced_columns:
    team_advanced_stats[column] = team_advanced_stats[column].astype(str)
team_advanced_stats.info()

### Prep data for first model

In [None]:
# drop all unnecessary columns
game_winner_train = player_game_stats[['game_id', 'team_id', 'is_home', 'fgm_team', 'fga_team', 'fg_pct_team', 'fg3m_team', 'fg3a_team', 'fg3_pct_team', 'ftm_team', 'fta_team', 'ft_pct_team', 'oreb_team', 'dreb_team', 'reb_team', 'ast_team', 'tov_team', 'stl_team', 'blk_team', 'pf_team', 'pfd_team', 'pts_team', 'wl_team']]
game_winner_train = game_winner_train.merge(team_advanced_stats, on=['game_id', 'team_id'])
game_winner_train.drop_duplicates(subset=['game_id', 'team_id'], inplace=True)
games = []
for game in game_winner_train['game_id'].unique():
    games.append("00"+game)
game_winner_train.drop(columns=['game_id', 'team_id', 'team_name'], inplace=True)

# rename columns
game_winner_train.columns = game_winner_train.columns.str.replace('_team', '', regex=False)

# convert boolean labels to boolean types
game_winner_train['wl'] = game_winner_train['wl'].str.replace('L','0').replace('W','1')
game_winner_train['wl'] = np.where(game_winner_train['wl'] == '0', 0, 1)

game_winner_train.reset_index().drop(columns='index', inplace=True)
game_winner_train

### Fit Training Data to First Model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()

X = game_winner_train.drop(columns=['wl'])
y = game_winner_train['wl']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model.fit(X_train, y_train)

In [None]:
game_winner_train['prediction'] = model.predict(X)
game_winner_train['outcome'] = np.where(game_winner_train['wl'] == game_winner_train['prediction'], True, False)
print(f'Accuracy: {round(model.score(X, y)*100, 2)}%')
game_winner_train

In [None]:
import re
def camel_to_snake(name):
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()

# Convert all column headers in the DataFrame
player_game_stats.columns = [camel_to_snake(col) for col in player_stats_df.columns]
player_stats_df.groupby(['player_id'])[list(player_stats_df.columns[19:])].max().reset_index()