# Retrieve every gamelog for every NBA player from the previous two seasons

In [None]:
import requests
import time
import pandas as pd
from datetime import datetime

# Headers neccessary for communication with the NBA api
headers = {
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
}

# Dataframe structure
gamelogs_df = pd.DataFrame(columns=[
    'season', 'dateUTC', 'playerTeamId', 'opposingTeamId', 'playerId', 'position', 'isStarter', 'minutes', 
    'points', 'assists', 'reboundsDefensive', 'reboundsOffensive', 'reboundsTotal',
    'steals', 'blocks', 'turnovers', 'fieldGoalsAttempted', 'fieldGoalsMade', 'freeThrowsAttempted', 
    'freeThrowsMade', 'fouls'
])

# Get the game ids for all games played in the previous three seasons
game_ids = []
for i in range(2):
    game_ids_response = requests.get(f'https://stats.nba.com/stats/leaguegamefinder?LeagueID=00&Season={int(datetime.now().year)-(i+1)}-{str(datetime.now().year - i)[2:]}&SeasonType=Regular Season', headers=headers)
    game_ids = list(set(game_ids + [game[4] for game in game_ids_response.json()['resultSets'][0]['rowSet']]))

# Gets player data, i.e. position, from Sleeper's API
players_response = requests.get('https://api.sleeper.app/v1/players/nba')
players_data = players_response.json()

# Get the gamelogs for each player in each game
for game_id in game_ids:
    try:
        # Get the game data from the NBA API
        game_response = requests.get(f'https://cdn.nba.com/static/json/liveData/boxscore/boxscore_{game_id}.json')
        game = game_response.json()['game']
        
        # Get the date of the game
        game_date = game['gameTimeUTC'].split('T')[0]
        season = int(game_date.split('-')[0])
        month_number = int(game_date.split('-')[1])
        if month_number >= 1 and month_number < 10:
            season -= 1

        # Get home team players and team id
        home_team = game['homeTeam']
        home_team_id = home_team['teamId']
        home_players = home_team['players']

        # Get away team players and team id
        away_team = game['awayTeam']
        away_team_id = away_team['teamId']
        away_players = away_team['players']

        data = []
        # Get the gamelogs for the home players
        for home_player in home_players:
            stats = home_player['statistics']
            position = 'NaN' # Position the player plays, i.e. powerforward, shootingguard, etc
            minutes = float(stats['minutes'].split('M')[0][2:])
            seconds = float(stats['minutes'].split('M')[1][:2])
            minutes_played = minutes + (seconds / 60)
            is_active = int(home_player.get('notPlayingReason') is None)
            
            if not is_active:
                continue
            
            # Get the player's position from the player data fetched from Sleeper's API
            for key, player in players_data.items():
                if player['first_name']==home_player['firstName'] and player['last_name'] in home_player['familyName']:
                    position = player['position']
                    break
                    
            data.append({
                'season': season, 'dateUTC': game_date, 'playerTeamId': home_team_id, 
                'opposingTeamId': away_team_id, 'playerId': home_player['personId'], 
                'position': position, 'isStarter': home_player['starter'],
                'minutes': minutes_played, 'points': stats['points'], 'assists': stats['assists'], 
                'reboundsDefensive': stats['reboundsDefensive'], 
                'reboundsOffensive': stats['reboundsOffensive'], 
                'reboundsTotal': stats['reboundsTotal'], 
                'steals': stats['steals'], 
                'blocks': stats['blocks'], 'turnovers': stats['turnovers'], 
                'fieldGoalsAttempted': stats['fieldGoalsAttempted'], 
                'fieldGoalsMade': stats['fieldGoalsMade'], 
                'freeThrowsAttempted': stats['freeThrowsAttempted'], 
                'freeThrowsMade': stats['freeThrowsMade'],
                'threesAttempted': stats['threePointersAttempted'],
                'threesMade': stats['threePointersMade'],
                'fouls': stats['foulsPersonal']
            })

        # Get the gamelogs for the away players
        for away_player in away_players:
            stats = away_player['statistics']
            position = 'NaN'
            minutes = float(stats['minutes'].split('M')[0][2:])
            seconds = float(stats['minutes'].split('M')[1][:2])
            minutes_played = minutes + (seconds / 60)
            is_active = int(away_player.get('notPlayingReason') is None)
            
            if not is_active:
                continue
            
            # Get the player's position from the player data fetched from Sleeper's API
            for key, player in players_data.items():
                if player['first_name']==away_player['firstName'] and player['last_name'] in away_player['familyName']:
                    position = player['position'] # Position the player plays, i.e. center, pointguard, smallforward, etc
                    
            data.append({
                'season': season, 'dateUTC': game_date, 'playerTeamId': away_team_id, 
                'opposingTeamId': home_team_id, 'playerId': away_player['personId'], 
                'position': position, 'isStarter': away_player['starter'], 'minutes': minutes_played,
                'points': stats['points'], 'assists': stats['assists'], 
                'reboundsDefensive': stats['reboundsDefensive'], 
                'reboundsOffensive': stats['reboundsOffensive'],
                'reboundsTotal': stats['reboundsTotal'],
                'steals': stats['steals'], 'blocks': stats['blocks'], 'turnovers': stats['turnovers'], 
                'fieldGoalsAttempted': stats['fieldGoalsAttempted'], 
                'fieldGoalsMade': stats['fieldGoalsMade'], 
                'freeThrowsAttempted': stats['freeThrowsAttempted'], 
                'freeThrowsMade': stats['freeThrowsMade'],
                'threesAttempted': stats['threePointersAttempted'],
                'threesMade': stats['threePointersMade'],
                'fouls': stats['foulsPersonal']
            })
        gamelogs_df = pd.concat([gamelogs_df, pd.DataFrame(data)])
    except Exception as e:
        print('Error: ' + e)
    finally:
        time.sleep(0.05)
        
gamelogs_df['gamescore'] = (gamelogs_df['points'] + 0.4*gamelogs_df['fieldGoalsMade'] 
                            - 0.7*gamelogs_df['fieldGoalsAttempted']
                            - 0.4*(gamelogs_df['freeThrowsAttempted'] - gamelogs_df['freeThrowsMade'])
                            + 0.7*gamelogs_df['reboundsOffensive'] 
                            + 0.3*gamelogs_df['reboundsDefensive']
                            + gamelogs_df['steals'] + 0.7*gamelogs_df['assists']
                            + 0.7*gamelogs_df['blocks'] - 0.4*gamelogs_df['fouls']
                            - gamelogs_df['turnovers'])

gamelogs_df

# Save dataframe to CSV format

In [None]:
gamelogs_df.to_csv("nba-player-gamelogs.csv", index=False)

In [None]:
import requests
from datetime import datetime

headers = {
            "Connection": "keep-alive",
            "Accept": "application/json, text/plain, */*",
            "x-nba-stats-token": "true",
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 \
            (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36",
            "x-nba-stats-origin": "stats",
            "Sec-Fetch-Site": "same-origin",
            "Sec-Fetch-Mode": "cors",
            "Referer": "https://stats.nba.com/",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "en-US,en;q=0.9",
        }

game_ids_endpoint = (
    "https://stats.nba.com/stats/leaguegamefinder?LeagueID=00&Season="
    + f"2023-24&SeasonType=Regular Season"
)
game_ids_response = requests.get(game_ids_endpoint, headers=headers)
game_ids = list(set([game[4] for game in game_ids_response.json()["resultSets"][0]["rowSet"]]))
game_ids

In [10]:
import pandas as pd
import requests

gamelogs = requests.get('http://localhost:8000/api/v1/gamelogs').json()

# Extracting required data and converting to DataFrame
#gamelogs_df = pd.DataFrame([gamelog for gamelog in gamelogs])
gamelogs_df = pd.json_normalize(gamelogs)[[
    'playerId', 'dateUTC', 'position', 'isStarter', 'opposingTeam.teamId', 'minutes',
    'fieldGoalsAttempted', 'fieldGoalsMade', 'freeThrowsAttempted', 'freeThrowsMade', 
    'points', 'threesMade', 'steals', 'blocks', 'assists', 'reboundsTotal', 'turnovers']
]

gamelogs_df['dateUTC'] = pd.to_datetime(gamelogs_df['dateUTC'])
gamelogs_df

Unnamed: 0,playerId,dateUTC,position,isStarter,opposingTeam.teamId,minutes,fieldGoalsAttempted,fieldGoalsMade,freeThrowsAttempted,freeThrowsMade,points,threesMade,steals,blocks,assists,reboundsTotal,turnovers
0,202330,2023-10-30,SF,True,1610612751,31.266667,11,6,1,1,15,2,2,1,5,0,0
1,1629023,2023-10-30,PF,True,1610612751,35.366667,14,7,1,0,15,1,0,0,2,12,1
2,1631109,2023-10-30,C,True,1610612751,23.216667,8,8,2,2,18,0,0,0,1,7,1
3,1626179,2023-10-30,PG,True,1610612751,40.700000,19,10,1,1,23,2,1,1,9,3,2
4,1630163,2023-10-30,PG,True,1610612751,21.750000,12,3,2,2,8,0,2,0,8,3,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63463,1629216,2024-04-21,PG,False,1610612743,7.700000,0,0,0,0,0,0,0,0,1,1,1
63464,1629637,2024-04-21,C,False,1610612743,4.383333,0,0,0,0,0,0,0,0,0,3,0
63465,1631108,2024-04-21,PG,False,1610612743,0.000000,0,0,0,0,0,0,0,0,0,0,0
63466,1641721,2024-04-21,PF,False,1610612743,0.000000,0,0,0,0,0,0,0,0,0,0,0


In [13]:


from datetime import datetime, timedelta
current_date = pd.to_datetime(datetime.utcnow().date())

prev_gms = gamelogs_df[(gamelogs_df['dateUTC'] < current_date) 
                        & (gamelogs_df['dateUTC'] > (current_date - timedelta(days=50)))]

if len(prev_gms['dateUTC'].unique()) < 20:
    prev_gms = game_logs_df[(gamelogs_df['dateUTC'] < current_date)]

gamelogs_aggregate_df = (prev_gms.drop(columns=['dateUTC', 'playerId'])
                        .groupby(['opposingTeam.teamId', 'position', 'isStarter']))

defense_df = (gamelogs_aggregate_df.sum().div(gamelogs_aggregate_df['minutes'].sum(), axis=0))

defense_df.loc[('1610612737', 'C', True)]
#prev_gms[(prev_gms['opposingTeamId']=='1610612737') & (prev_gms['position']=='C') & (prev_gms['isStarter']==1)]

minutes                1.000000
fieldGoalsAttempted    0.333383
fieldGoalsMade         0.183063
freeThrowsAttempted    0.096741
freeThrowsMade         0.075904
points                 0.465843
threesMade             0.023813
steals                 0.026790
blocks                 0.043161
assists                0.083346
reboundsTotal          0.308082
turnovers              0.063998
Name: (1610612737, C, True), dtype: float64

In [None]:
schedules = requests.get('http://localhost:8000/api/v1/matchups/schedules?is_current_week=True').json()
players = requests.get('http://localhost:8000/api/v1/players').json()
players

In [None]:
from datetime import datetime, timedelta
import numpy as np

current_date = pd.to_datetime(datetime.utcnow().date())
projections_list = []

for player in players:
    
    # Gets all the games previously played by the player
    prev_gms = game_logs_df[(game_logs_df['dateUTC'] < current_date) & (game_logs_df['playerId'] == player['playerId'])]
    is_starter = player['depthChartOrder'] == 1

    # The stats whose weighted averages will be taken.
    stats = {'fieldGoalsAttempted': None, 'fieldGoalsMade': None, 
             'threesMade': None, 'freeThrowsAttempted': None, 'freeThrowsMade': None,
             'points': None, 'steals': None, 'blocks': None,
            'assists': None, 'reboundsTotal': None, 'turnovers': None}
        
    for stat in stats.keys():
        if len(prev_gms) == 0 and is_starter:
            stats[stat] = game_logs_df[(game_logs_df['dateUTC'] < current_date) & (game_logs_df['isStarter']==1) 
                  & (game_logs_df['position']==player['position'])][stat].mean()
            
        elif len(prev_gms) == 0 and not is_starter:
            stats[stat] = game_logs_df[(game_logs_df['dateUTC'] < current_date) & (game_logs_df['isStarter']==0) 
                             & (game_logs_df['position']==player['position'])][stat].mean()
            
        elif prev_gms.tail(5)['isStarter'].sum() < 3 and is_starter:
            player_start_games = game_logs_df[(game_logs_df['dateUTC'] < current_date) & (game_logs_df['isStarter']==1)
                                    & (game_logs_df['playerId']==player['playerId'])]
            if len(player_start_games) < 3:
                stats[stat] = game_logs_df[(game_logs_df['dateUTC'] < current_date) & (game_logs_df['isStarter']==1) 
                                 & (game_logs_df['position']==player['position'])][stat].mean()
            else:
                stats[stat] = player_start_games[stat].mean()
                
        elif prev_gms.tail(5)['isStarter'].sum() > 2 and not is_starter:
            player_bench_games = game_logs_df[(game_logs_df['dateUTC'] < current_date) & (game_logs_df['isStarter']==0) 
                                    & (game_logs_df['playerId']==player['playerId'])]
            
            if len(player_bench_games) < 3:
                stats[stat] = game_logs_df[(game_logs_df['dateUTC'] < current_date) & (game_logs_df['isStarter']==0) 
                                 & (game_logs_df['position']==player['position'])][stat].mean()
            else:
                stats[stat] = player_bench_games[stat].mean()
        else:
            weights = .98 ** ((pd.to_datetime(current_date) - pd.to_datetime(prev_gms['dateUTC'])).dt.days)
            weighted_sum = np.sum(weights * prev_gms[stat])
            weights_sum = np.sum(weights)
            if weights_sum > 0:
                weighted_avg = weighted_sum / weights_sum
                stats[stat] = weighted_avg
                
    projections = {f"avg{stat[0].upper()+stat[1:]}": value for stat, value in stats.items()}
    projections['playerId'] = player['playerId']
    
    for schedule in schedules:
        opponent_team_id = None
        if str(schedule['awayTeamId']) == str(player['team']['teamId']):
            opponent_team_id = schedule['awayTeamId']
        elif str(schedule['homeTeamId']) == str(player['team']['teamId']):
            opponent_team_id = schedule['homeTeamId']
        
        if opponent_team_id is not None:
            # The defensive rating's will be calculated for these stats
            stats = {'fieldGoalsAttempted': None, 'fieldGoalsMade': None, 
                     'threesMade': None, 'freeThrowsAttempted': None, 'freeThrowsMade': None,
                     'points': None, 'steals': None, 'blocks': None,
                    'assists': None, 'reboundsTotal': None, 'turnovers': None}

            # All games the opposing team has played over the previous 30 days.
            opp_gms = game_logs_df[(game_logs_df['dateUTC'] < current_date) & (game_logs_df['dateUTC'] > (current_date - timedelta(days=30))) 
                         & (game_logs_df['opposingTeamId'] == str(opponent_team_id)) & (game_logs_df['isStarter']==int(is_starter))
                         & (game_logs_df['position'] == player['position'])]

            for stat in stats.keys():
                # If the opponent has not played 15 games yet in the season, use all their games
                # from last season and the current season to estimate the opponent's rating
                if len(opp_gms) < 10:
                    opp_gms = game_logs_df[(game_logs_df['dateUTC'] < current_date) 
                         & (game_logs_df['opposingTeamId'] == str(opponent_team_id)) 
                        & (game_logs_df['isStarter']==int(is_starter))
                         & (game_logs_df['position'] == player['position'])]

                # Get the total number of the stat accumulated by players with the given position against 
                # this team and divide by the total minutes played by those players
                stats[stat] = (opp_gms[stat].sum()  / opp_gms['minutes'].sum())
                
            projections.update({f"oppDRating{stat[0].upper()+stat[1:]}": value for stat, value in stats.items()})
            projections['gameId'] = schedule['gameId']
            projections_list.append(projections)

projections_input_df = pd.DataFrame(projections_list)

projections_input_df

In [None]:
from sklearn.preprocessing import StandardScaler

# Extract the playerId and gameId columns
player_id_game_id = projections_input_df[['playerId', 'gameId']]

# Select only the numerical columns for standard scaling
numerical_cols = projections_input_df.select_dtypes(include=['float64', 'int64']).columns

# Initialize the StandardScaler
scaler = StandardScaler()

# Standardize the numerical columns
scaled_numerical = scaler.fit_transform(projections_input_df[numerical_cols])

# Convert the standardized numerical data back to a DataFrame
scaled_df = pd.DataFrame(scaled_numerical, columns=numerical_cols)

# Concatenate the standardized numerical DataFrame with the playerId and gameId columns
final_df = pd.concat([player_id_game_id, scaled_df], axis=1)

# Display the resulting DataFrame
final_df