# Retrieve every gamelog for every NBA player from the previous two seasons

In [31]:
import requests
import time
import pandas as pd
from datetime import datetime

# Headers neccessary for communication with the NBA api
headers = {
    'Connection': 'keep-alive',
    'Accept': 'application/json, text/plain, */*',
    'x-nba-stats-token': 'true',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36',
    'x-nba-stats-origin': 'stats',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Referer': 'https://stats.nba.com/',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
}

# Dataframe structure
gamelogs_df = pd.DataFrame(columns=[
    'season', 'date', 'playerTeamId', 'opposingTeamId', 'playerId', 'position', 'isStarter', 'minutes', 
    'points', 'assists', 'reboundsDefensive', 'reboundsOffensive', 'reboundsTotal',
    'steals', 'blocks', 'turnovers', 'fieldGoalsAttempted', 'fieldGoalsMade', 'freeThrowsAttempted', 
    'freeThrowsMade', 'fouls'
])

# Get the game ids for all games played in the previous three seasons
game_ids = []
for i in range(2):
    game_ids_response = requests.get(f'https://stats.nba.com/stats/leaguegamefinder?LeagueID=00&Season={int(datetime.now().year)-(i+1)}-{str(datetime.now().year - i)[2:]}&SeasonType=Regular Season', headers=headers)
    game_ids = list(set(game_ids + [game[4] for game in game_ids_response.json()['resultSets'][0]['rowSet']]))

# Gets player data, i.e. position, from Sleeper's API
players_response = requests.get('https://api.sleeper.app/v1/players/nba')
players_data = players_response.json()

# Get the gamelogs for each player in each game
for game_id in game_ids:
    try:
        # Get the game data from the NBA API
        game_response = requests.get(f'https://cdn.nba.com/static/json/liveData/boxscore/boxscore_{game_id}.json')
        game = game_response.json()['game']
        
        # Get the date of the game
        game_date = game['gameTimeUTC'].split('T')[0]
        season = int(game_date.split('-')[0])
        month_number = int(game_date.split('-')[1])
        if month_number >= 1 and month_number < 10:
            season -= 1

        # Get home team players and team id
        home_team = game['homeTeam']
        home_team_id = home_team['teamId']
        home_players = home_team['players']

        # Get away team players and team id
        away_team = game['awayTeam']
        away_team_id = away_team['teamId']
        away_players = away_team['players']

        data = []
        # Get the gamelogs for the home players
        for home_player in home_players:
            stats = home_player['statistics']
            position = 'NaN' # Position the player plays, i.e. powerforward, shootingguard, etc
            minutes = float(stats['minutes'].split('M')[0][2:])
            seconds = float(stats['minutes'].split('M')[1][:2])
            minutes_played = minutes + (seconds / 60)
            is_active = int(home_player.get('notPlayingReason') is None)
            
            if not is_active:
                continue
            
            # Get the player's position from the player data fetched from Sleeper's API
            for key, player in players_data.items():
                if player['first_name']==home_player['firstName'] and player['last_name'] in home_player['familyName']:
                    position = player['position']
                    break
                    
            data.append({
                'season': season, 'date': game_date, 'playerTeamId': home_team_id, 
                'opposingTeamId': away_team_id, 'playerId': home_player['personId'], 
                'position': position, 'isStarter': home_player['starter'],
                'minutes': minutes_played, 'points': stats['points'], 'assists': stats['assists'], 
                'reboundsDefensive': stats['reboundsDefensive'], 
                'reboundsOffensive': stats['reboundsOffensive'], 
                'reboundsTotal': stats['reboundsTotal'], 
                'steals': stats['steals'], 
                'blocks': stats['blocks'], 'turnovers': stats['turnovers'], 
                'fieldGoalsAttempted': stats['fieldGoalsAttempted'], 
                'fieldGoalsMade': stats['fieldGoalsMade'], 
                'freeThrowsAttempted': stats['freeThrowsAttempted'], 
                'freeThrowsMade': stats['freeThrowsMade'],
                'threesAttempted': stats['threePointersAttempted'],
                'threesMade': stats['threePointersMade'],
                'fouls': stats['foulsPersonal']
            })

        # Get the gamelogs for the away players
        for away_player in away_players:
            stats = away_player['statistics']
            position = 'NaN'
            minutes = float(stats['minutes'].split('M')[0][2:])
            seconds = float(stats['minutes'].split('M')[1][:2])
            minutes_played = minutes + (seconds / 60)
            is_active = int(away_player.get('notPlayingReason') is None)
            
            if not is_active:
                continue
            
            # Get the player's position from the player data fetched from Sleeper's API
            for key, player in players_data.items():
                if player['first_name']==away_player['firstName'] and player['last_name'] in away_player['familyName']:
                    position = player['position'] # Position the player plays, i.e. center, pointguard, smallforward, etc
                    
            data.append({
                'season': season, 'date': game_date, 'playerTeamId': away_team_id, 
                'opposingTeamId': home_team_id, 'playerId': away_player['personId'], 
                'position': position, 'isStarter': away_player['starter'], 'minutes': minutes_played,
                'points': stats['points'], 'assists': stats['assists'], 
                'reboundsDefensive': stats['reboundsDefensive'], 
                'reboundsOffensive': stats['reboundsOffensive'],
                'reboundsTotal': stats['reboundsTotal'],
                'steals': stats['steals'], 'blocks': stats['blocks'], 'turnovers': stats['turnovers'], 
                'fieldGoalsAttempted': stats['fieldGoalsAttempted'], 
                'fieldGoalsMade': stats['fieldGoalsMade'], 
                'freeThrowsAttempted': stats['freeThrowsAttempted'], 
                'freeThrowsMade': stats['freeThrowsMade'],
                'threesAttempted': stats['threePointersAttempted'],
                'threesMade': stats['threePointersMade'],
                'fouls': stats['foulsPersonal']
            })
        gamelogs_df = pd.concat([gamelogs_df, pd.DataFrame(data)])
    except Exception as e:
        print('Error: ' + e)
    finally:
        time.sleep(0.05)
        
gamelogs_df['gamescore'] = (gamelogs_df['points'] + 0.4*gamelogs_df['fieldGoalsMade'] 
                            - 0.7*gamelogs_df['fieldGoalsAttempted']
                            - 0.4*(gamelogs_df['freeThrowsAttempted'] - gamelogs_df['freeThrowsMade'])
                            + 0.7*gamelogs_df['reboundsOffensive'] 
                            + 0.3*gamelogs_df['reboundsDefensive']
                            + gamelogs_df['steals'] + 0.7*gamelogs_df['assists']
                            + 0.7*gamelogs_df['blocks'] - 0.4*gamelogs_df['fouls']
                            - gamelogs_df['turnovers'])

gamelogs_df

Unnamed: 0,season,date,playerTeamId,opposingTeamId,playerId,position,isStarter,minutes,points,assists,...,blocks,turnovers,fieldGoalsAttempted,fieldGoalsMade,freeThrowsAttempted,freeThrowsMade,fouls,threesAttempted,threesMade,gamescore
0,2022,2023-03-27,1610612765,1610612749,1630587,SF,1,34.500000,9,2,...,0,0,5,4,0,0,2,1.0,1.0,9.9
1,2022,2023-03-27,1610612765,1610612749,1628963,PF,1,35.733333,16,2,...,0,0,11,6,4,3,0,2.0,1.0,16.2
2,2022,2023-03-27,1610612765,1610612749,1630164,C,1,20.333333,14,1,...,0,1,12,5,4,4,1,1.0,0.0,10.9
3,2022,2023-03-27,1610612765,1610612749,1631093,PG,1,31.683333,32,8,...,0,4,19,9,12,11,4,6.0,3.0,24.7
4,2022,2023-03-27,1610612765,1610612749,1630165,PG,1,37.300000,14,5,...,0,0,17,6,5,2,4,3.0,0.0,8.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21,2023,2024-03-19,1610612757,1610612741,1641871,C,0,16.950000,14,0,...,0,0,6,5,2,2,3,3.0,2.0,11.3
22,2023,2024-03-19,1610612757,1610612741,1641712,PG,0,3.683333,0,0,...,0,0,2,0,0,0,0,1.0,0.0,-1.1
23,2023,2024-03-19,1610612757,1610612741,1630641,C,0,0.000000,0,0,...,0,0,0,0,0,0,0,0.0,0.0,0.0
24,2023,2024-03-19,1610612757,1610612741,1629650,C,0,0.000000,0,0,...,0,0,0,0,0,0,0,0.0,0.0,0.0


# Save dataframe to CSV format

In [33]:
gamelogs_df.to_csv("nba-player-gamelogs.csv", index=False)