In [1]:
from nba_api.stats.static import players,teams
import pandas as pd
import requests
import sys
import os
import time
from datetime import datetime

def format_date_to_url(date):
    # Convert date from YYYYMMDD to datetime object
    date_obj = datetime.strptime(str(date), '%Y%m%d')
    
    # Format the date as MM%2FDD%2FYYYY
    formatted_date = date_obj.strftime('%m%%2F%d%%2F%Y')
    
    return formatted_date

# Example usage

def pull_data(url):
    headers = {
        "Host": "stats.nba.com",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
        "Accept": "application/json, text/plain, */*",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept-Encoding": "gzip, deflate, br",
        "Connection": "keep-alive",
        "Referer": "https://stats.nba.com/",
        "Origin": "https://stats.nba.com",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
    }

    json = requests.get(url,headers = headers).json()

    if len(json["resultSets"])== 1:

        
        data = json["resultSets"][0]["rowSet"]
        #print(data)
        columns = json["resultSets"][0]["headers"]
        #print(columns)
        
        df = pd.DataFrame.from_records(data, columns=columns)
    else:

        data = json["resultSets"]["rowSet"]
        #print(json)
        columns = json["resultSets"]["headers"][1]['columnNames']
        #print(columns)
        df = pd.DataFrame.from_records(data, columns=columns)

    time.sleep(1.2)
    return df


def pull_game_level(dateframe, start_year,end_year,ps=False):
    stype = 'Regular%20Season'
    trail=''
    if ps == True:
        stype='{stype}'
        trail='ps'
    dframes = []
    shotcolumns = ['FGA_FREQUENCY', 'FGM', 'FGA', 'FG_PCT', 'EFG_PCT', 'FG2A_FREQUENCY', 'FG2M', 'FG2A', 'FG2_PCT', 
                   'FG3A_FREQUENCY', 'FG3M', 'FG3A', 'FG3_PCT']
    
    unit='Player'
    for year in range(start_year, end_year):
        count=0
        countframe=dateframe[dateframe.year==year].reset_index()
        print(len(dateframe))
        print(len(countframe))
        year_frame=[]

        year_dates = countframe['GAME_DATE'].unique().tolist()
        if os.path.exists('year_files/'+str(year)+trail+'_games.csv'):
            df= pd.read_csv('year_files/'+str(year)+trail+'_games.csv')
            
            year_frame.append(df)

            year_dates=[int(date) for date in year_dates if date not in df['date'].unique().tolist()]
            year_dates=year_dates[::-1]
            

        season = str(year - 1) + '-' + str(year)[-2:]
        print(year_dates)
        for date in year_dates:
            try:
                date_num = int(date)
                date = format_date_to_url(date)
    
    
                url = f'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType={stype}&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
            
                df = pull_data(url)
    
                url2 = f'https://stats.nba.com/stats/leaguedashplayerstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType={stype}&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
                df2 = pull_data(url2)
    
                url3 = f'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam={unit}&PlayerPosition=&PtMeasureType=Passing&Season={season}&SeasonSegment=&SeasonType={stype}&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
                df3 = pull_data(url3)
    
                url4 = f'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam={unit}&PlayerPosition=&PtMeasureType=Drives&Season={season}&SeasonSegment=&SeasonType={stype}&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
                df4 = pull_data(url4)
    
                url5 = f'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam={unit}&PlayerPosition=&PtMeasureType=Possessions&Season={season}&SeasonSegment=&SeasonType={stype}&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
                df5 = pull_data(url5)
    
                url6 = f'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam={unit}&PlayerPosition=&PtMeasureType=Rebounding&Season={season}&SeasonSegment=&SeasonType={stype}&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
                df6 = pull_data(url6)
    
                url7 = f'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=0-2%20Feet%20-%20Very%20Tight&College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season={season}&SeasonSegment=&SeasonType={stype}&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
                df7 = pull_data(url7)
    
                term = 'very_tight_'
                df7.rename(columns={col: term + col for col in shotcolumns}, inplace=True)
                
                url8 = 'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=2-4%20Feet%20-%20Tight&College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=' + season + '&SeasonSegment=&SeasonType='+stype+'&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
                df8 = pull_data(url8)
                term = 'tight_'
                df8.rename(columns={col: term + col for col in shotcolumns},inplace=True)
    
                url9 = 'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=4-6%20Feet%20-%20Open&College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=' + season + '&SeasonSegment=&SeasonType='+stype+'&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
                df9 = pull_data(url9)
                term = 'open_'
                df9.rename(columns={col: term + col for col in shotcolumns},inplace=True)
    
                url10 = 'https://stats.nba.com/stats/leaguedashplayerptshot?CloseDefDistRange=6%2B%20Feet%20-%20Wide%20Open&College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&DribbleRange=&GameScope=&GameSegment=&GeneralRange=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season=' + season + '&SeasonSegment=&SeasonType='+stype+'&ShotClockRange=&ShotDistRange=&StarterBench=&TeamID=0&TouchTimeRange=&VsConference=&VsDivision=&Weight='
                df10 = pull_data(url10)
                term = 'wide_open_'
                df10.rename(columns={col: term + col for col in shotcolumns},inplace=True)
                url11 = 'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=PullUpShot&Season=' + season + '&SeasonSegment=&SeasonType='+stype+'&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
                df11 = pull_data(url11) 
                shotcolumns2=shotcolumns+['EFG%']
                term='pullup_'
                df11.rename(columns={col: term + col for col in shotcolumns2},inplace=True)
    
                url12 = 'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom=' + date + '&DateTo=' + date + '&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=Efficiency&Season=' + season + '&SeasonSegment=&SeasonType='+stype+'&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='


                df12 = pull_data(url12) 
                url13=f"https://stats.nba.com/stats/leaguedashplayershotlocations?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&DistanceRange=By%20Zone&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType={stype}&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
                
                df13=pull_data(url13)
    
                zone_columns=['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'AGE', 'NICKNAME',
                 'RA_FGM', 'RA_FGA', 'RA_FG_PCT',               # Restricted Area
                 'ITP_FGM', 'ITP_FGA', 'ITP_FG_PCT',             # In The Paint (Non-RA)
                 'MID_FGM', 'MID_FGA', 'MID_FG_PCT',             # Mid Range
                 'LEFT_CORNER_3_FGM', 'LEFT_CORNER_3_FGA', 'LEFT_CORNER_3_FG_PCT',  # Left Corner 3
                 'RIGHT_CORNER_3_FGM', 'RIGHT_CORNER_3_FGA', 'RIGHT_CORNER_3_FG_PCT', # Right Corner 3
          
    
                               # All Corner 3s
                 'ABOVE_BREAK_3_FGM', 'ABOVE_BREAK_3_FGA', 'ABOVE_BREAK_3_FG_PCT', 
                       'BACKCOURT_FGM', 'BACKCOURT_FGA', 'BACKCOURT_FG_PCT', # Right Corner 3
                              
                              'CORNER_3_FGM', 'CORNER_3_FGA', 'CORNER_3_FG_PCT'  ]  # Above the Break 3
    
                df13.columns=zone_columns
                url14=f"https://stats.nba.com/stats/leaguedashptdefend?College=&Conference=&Country=&DateFrom{date}=&DateTo={date}&DefenseCategory=Less%20Than%206Ft&Division=&DraftPick=&DraftYear=&GameSegment=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&Season={season}&SeasonSegment=&SeasonType={stype}&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
                df14=pull_data(url14)
                df14.rename(columns={'CLOSE_DEF_PERSON_ID':'PLAYER_ID'},inplace=True)
    
                url15=f"https://stats.nba.com/stats/leaguedashplayershotlocations?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&DistanceRange=5ft%20Range&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType={stype}&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="
                df15=pull_data(url15)
                df15.columns=['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBR', 'AGE', 'NICKNAME',
                 'FGM_LT_5', 'FGA_LT_5', 'FGP_LT_5',      # Less than 5 feet
                 'FGM_5_9', 'FGA_5_9', 'FGP_5_9',         # 5-9 feet
                 'FGM_10_14', 'FGA_10_14', 'FGP_10_14',   # 10-14 feet
                 'FGM_15_19', 'FGA_15_19', 'FGP_15_19',   # 15-19 feet
                 'FGM_20_24', 'FGA_20_24', 'FGP_20_24',   # 20-24 feet
                 'FGM_25_29', 'FGA_25_29', 'FGP_25_29',   # 25-29 feet
                 'FGM_30_34', 'FGA_30_34', 'FGP_30_34',   # 30-34 feet
                 'FGM_35_39', 'FGA_35_39', 'FGP_35_39',   # 35-39 feet
                 'FGM_40_PLUS', 'FGA_40_PLUS', 'FGP_40_PLUS'  # 40+ feet
                ]
                url16 = f'https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=&PerMode=Totals&PlayerExperience=&PlayerOrTeam={unit}&PlayerPosition=&PtMeasureType=CatchShoot&Season={season}&SeasonSegment=&SeasonType={stype}&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
                df16=pull_data(url16)
    
                
                url17 = f'https://stats.nba.com/stats/leaguedashteamstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&GameSegment=&Height=&ISTRound=&LastNGames=0&LeagueID=00&Location=&MeasureType=Advanced&Month=0&OpponentTeamID=0&Outcome=&PORound=&PaceAdjust=N&PerMode=Totals&Period=0&PlayerExperience=&PlayerPosition=&PlusMinus=N&Rank=N&Season={season}&SeasonSegment=&SeasonType=Playoffs&ShotClockRange=&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight='
                df17 = pull_data(url17)
                df17=df17[['TEAM_ID','POSS']]
                df17.columns=['TEAM_ID','team_poss']
    
                poss_map=dict(zip(df17['TEAM_ID'],df17['team_poss']  ))
    
                df['team_poss']=df['TEAM_ID'].map(poss_map)
               
                frames = [df2, df3, df4, df5, df6, df7, df8, df9, df10,df11,df12,df13,df14,df15,df16]
                for frame in frames:
                    
                    joined_columns = set(frame.columns) - set(df.columns)
                    joined_columns = list(joined_columns)
                    joined_columns.append('PLAYER_ID')
                    frame = frame[joined_columns]
    
                    df = df.merge(frame, on='PLAYER_ID',how='left').reset_index(drop=True)
    
                df['year'] = year
                df['date']=date_num
      
                year_frame.append(df)
                count+=1
                print(date_num)
                if count %10==0:
            
                    yeardata=pd.concat(year_frame)
                    print(len(yeardata))
                    yeardata['playoffs']=ps
                    yeardata.to_csv(str(year)+trail+'_games.csv',index=False)
            except Exception as e:
                print(str(e))
                print(str(date_num))
                time.sleep(1)
    

        yeardata=pd.concat(year_frame)
        print(len(yeardata))
        yeardata['playoffs']=ps
        yeardata.to_csv('year_files/'+str(year)+trail+'_games.csv',index=False)
        dframes.append(yeardata)
        print(f"Year: {year}")

    total = pd.concat(dframes)
    return total

start_year=2014
end_year=2026



def get_dates(start_year,end_year):
    dates=[]
    for year in range(start_year,end_year):
    
        for team in teams.get_teams():
            team_id=team['id']
            path = '../../shot_data/team/'+str(year)+'/'+str(team_id)+'.csv'
            if os.path.exists(path):
                df=pd.read_csv(path)
    
                df=df[['PLAYER_ID','TEAM_ID','HTM','VTM','GAME_DATE','GAME_ID']]
                df.drop_duplicates(inplace=True)
                df['year']=year
                dates.append(df)
    return pd.concat(dates)
dateframe=get_dates(start_year,end_year)

dates=dateframe['GAME_DATE'].unique().tolist()

#df= pull_game_level(dateframe,start_year,end_year)
#data=pull_game_level(dates)
#df

In [2]:
frames= []
count=0
index_master=pd.read_csv('index_master.csv')
index_master=index_master[index_master.team!='TOT']
index_master['team_id']=index_master['team_id'].astype(int)
index_master['nba_id']=index_master['nba_id'].astype(int)

for year in range(2014, 2026):
    # Load the game data for the specific year.
    df = pd.read_csv(f'year_files/{year}_games.csv')

    team_map=dict(zip(df['TEAM_ID'],df['TEAM_ABBREVIATION']))
    
    # Filter index_master for the current year.
    year_index = index_master[index_master['year'] == year].reset_index()

    # Process each unique date in the dataset.
    for date in df['date'].unique().tolist():
        datedf=df[df.date==date].reset_index(drop=True)
        datedf=datedf.drop_duplicates(subset=['PLAYER_ID','date'])
        # Filter game data by date.
        
        gameframe = dateframe[dateframe['GAME_DATE'] == date].reset_index()
        gameframe.rename(columns={'GAME_DATE':'date'},inplace=True)
        # Get the unique team and game data for the specific date from gameframe.
        to_merge = gameframe[['TEAM_ID', 'GAME_ID', 'date', 'year']].drop_duplicates().reset_index(drop=True)
        

        save_frame=datedf.merge(to_merge,on=['TEAM_ID','date','year'],how='left')

      
        save_frame.drop_duplicates(inplace=True)

        
        # Merge game data with index_master to ensure correct team alignment.
        # Match on 'player' and 'team' columns from index_master and 'TEAM_ID' from the game data.

        # Identify rows where the merge may have issues.
        if save_frame['GAME_ID'].isna().any():
        
            missing=save_frame[save_frame['GAME_ID'].isna()].reset_index(drop=True)
            save_frame.dropna(subset='GAME_ID',inplace=True)
            missing.drop(columns=['GAME_ID','TEAM_ID','TEAM_ABBREVIATION'],inplace=True)

            missing=missing.merge(gameframe,on=['PLAYER_ID','year','date'],how='left')
            missing['TEAM_ABBREVIATION']=missing['TEAM_ID'].map(team_map)
    
            save_frame=pd.concat([save_frame,missing])

        if save_frame['GAME_ID'].isna().any():
        
            missing=save_frame[save_frame['GAME_ID'].isna()].reset_index(drop=True)
            missing.drop(columns='GAME_ID',inplace=True)
            save_frame.dropna(subset='GAME_ID',inplace=True)
            missed=[]
            
            for missed_player in missing['PLAYER_ID'].unique().tolist():
                missing_frame=missing[missing.PLAYER_ID==missed_player].reset_index(drop=True)
                temp_index=year_index[year_index.nba_id==missed_player].reset_index(drop=True)
                team_id=temp_index.iloc[0]['team_id']
                team=temp_index.iloc[0]['team']
                missing_frame['TEAM_ID']=int(team_id)
                missing_frame['TEAM_ABBREVIATION']=team
                missing_frame= missing_frame.merge(to_merge,on=['TEAM_ID','date','year'],how='left')
                missed.append(missing_frame)

            missing=pd.concat(missed)
            save_frame=pd.concat([save_frame,missing])
        if save_frame['GAME_ID'].isna().any():
        
            missing=save_frame[save_frame['GAME_ID'].isna()].reset_index(drop=True)
            missing.drop(columns='GAME_ID',inplace=True)
            save_frame.dropna(subset='GAME_ID',inplace=True)
            missed=[]
            
            for missed_player in missing['PLAYER_ID'].unique().tolist():
                missing_frame=missing[missing.PLAYER_ID==missed_player].reset_index(drop=True)
                temp_index=year_index[year_index.nba_id==missed_player].reset_index(drop=True)
                team_id=temp_index.iloc[1]['team_id']
                team=temp_index.iloc[1]['team']
                missing_frame['TEAM_ID']=int(team_id)
                missing_frame['TEAM_ABBREVIATION']=team
                missing_frame= missing_frame.merge(to_merge,on=['TEAM_ID','date','year'],how='left')
                missed.append(missing_frame)

            missing=pd.concat(missed)
            save_frame=pd.concat([save_frame,missing])

        if save_frame['GAME_ID'].isna().any():
        
            missing=save_frame[save_frame['GAME_ID'].isna()].reset_index(drop=True)
            missing.drop(columns='GAME_ID',inplace=True)
            save_frame.dropna(subset='GAME_ID',inplace=True)
            missed=[]
            
            for missed_player in missing['PLAYER_ID'].unique().tolist():
                missing_frame=missing[missing.PLAYER_ID==missed_player].reset_index(drop=True)
                temp_index=year_index[year_index.nba_id==missed_player].reset_index(drop=True)
                team_id=temp_index.iloc[2]['team_id']
                team=temp_index.iloc[2]['team']
                missing_frame['TEAM_ID']=int(team_id)
                missing_frame['TEAM_ABBREVIATION']=team
                missing_frame= missing_frame.merge(to_merge,on=['TEAM_ID','date','year'],how='left')
                missed.append(missing_frame)

            missing=pd.concat(missed)
            save_frame=pd.concat([save_frame,missing])
            
        if save_frame['GAME_ID'].isna().any():
            missing=save_frame[save_frame['GAME_ID'].isna()]
            print('test point')
            print(missing)
        
        # Remove any duplicate entries after the merge.
        save_frame.drop_duplicates(inplace=True)
        save_frame['GAME_ID']=save_frame['GAME_ID'].astype(int)
        # Save each game by unique GAME_ID.
        for game_id in save_frame['GAME_ID'].unique():
            gameid_frame = save_frame[save_frame['GAME_ID'] == game_id].reset_index(drop=True)
            gameid_frame.to_csv(f'{year}/{game_id}.csv', index=False)
            count += 1
            
            # Exit early for testing if more than 8 files are saved.
        

In [3]:
sumframe=df.groupby(['TEAM_ID','TEAM_ABBREVIATION','date']).sum(numeric_only=True)[['very_tight_FG3A','wide_open_FG3A','open_FG3A','tight_FG3A','very_tight_FG3M','wide_open_FG3M','open_FG3M','tight_FG3M',
                                                                       'FGA','FTA','PULL_UP_FGA','PULL_UP_FGM','PULL_UP_FG3M','DRIVES','POTENTIAL_AST','TOV','RA_FGA','FRONT_CT_TOUCHES']].reset_index()
selected_teams=['LAL','MIN']
sumframe=sumframe[sumframe.TEAM_ABBREVIATION.isin(selected_teams)]


sumframe


Unnamed: 0,TEAM_ID,TEAM_ABBREVIATION,date,very_tight_FG3A,wide_open_FG3A,open_FG3A,tight_FG3A,very_tight_FG3M,wide_open_FG3M,open_FG3M,...,FGA,FTA,PULL_UP_FGA,PULL_UP_FGM,PULL_UP_FG3M,DRIVES,POTENTIAL_AST,TOV,RA_FGA,FRONT_CT_TOUCHES
24,1610612747,LAL,20241022,0.0,18.0,9.0,3.0,0.0,3.0,1.0,...,95,25,21,7,1.0,34.0,54.0,7,35.0,268.0
25,1610612747,LAL,20241025,0.0,13.0,9.0,5.0,0.0,10.0,3.0,...,75,39,18,6,4.0,33.0,38.0,12,30.0,187.0
26,1610612747,LAL,20241026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,92,24,0,0,0.0,0.0,0.0,19,31.0,0.0
31,1610612750,MIN,20241022,0.0,16.0,21.0,4.0,0.0,4.0,9.0,...,85,27,28,9,4.0,42.0,42.0,15,27.0,211.0
32,1610612750,MIN,20241024,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,86,17,0,0,0.0,0.0,0.0,16,7.0,0.0
33,1610612750,MIN,20241026,0.0,10.0,19.0,8.0,0.0,3.0,7.0,...,83,26,25,9,6.0,48.0,41.0,13,22.0,235.0


In [4]:
sumframe.columns

Index(['TEAM_ID', 'TEAM_ABBREVIATION', 'date', 'very_tight_FG3A',
       'wide_open_FG3A', 'open_FG3A', 'tight_FG3A', 'very_tight_FG3M',
       'wide_open_FG3M', 'open_FG3M', 'tight_FG3M', 'FGA', 'FTA',
       'PULL_UP_FGA', 'PULL_UP_FGM', 'PULL_UP_FG3M', 'DRIVES', 'POTENTIAL_AST',
       'TOV', 'RA_FGA', 'FRONT_CT_TOUCHES'],
      dtype='object')

In [5]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

def get_colors():
    NBA_TEAM_COLORS = {
    # Eastern Conference
    ## Atlantic Division
        'BOS': '#007A33',  # Boston Celtics - Green
        'BKN': '#000000',  # Brooklyn Nets - Black
        'NYK': '#006BB6',  # New York Knicks - Blue
        'PHI': '#006BB6',  # Philadelphia 76ers - Blue
        'TOR': '#CE1141',  # Toronto Raptors - Red
    
        ## Central Division
        'CHI': '#CE1141',  # Chicago Bulls - Red
        'CLE': '#860038',  # Cleveland Cavaliers - Wine
        'DET': '#C8102E',  # Detroit Pistons - Red
        'IND': '#002D62',  # Indiana Pacers - Navy
        'MIL': '#00471B',  # Milwaukee Bucks - Green
    
        ## Southeast Division
        'ATL': '#E03A3E',  # Atlanta Hawks - Red
        'CHA': '#1D1160',  # Charlotte Hornets - Purple
        'MIA': '#98002E',  # Miami Heat - Red
        'ORL': '#0077C0',  # Orlando Magic - Blue
        'WAS': '#002B5C',  # Washington Wizards - Navy
    
        # Western Conference
        ## Northwest Division
        'DEN': '#0E2240',  # Denver Nuggets - Navy
        'MIN': '#0C2340',  # Minnesota Timberwolves - Navy
        'OKC': '#007AC1',  # Oklahoma City Thunder - Blue
        'POR': '#E03A3E',  # Portland Trail Blazers - Red
        'UTA': '#002B5C',  # Utah Jazz - Navy
    
        ## Pacific Division
        'GSW': '#1D428A',  # Golden State Warriors - Blue
        'LAC': '#C8102E',  # LA Clippers - Red
        'LAL': '#552583',  # Los Angeles Lakers - Purple
        'PHX': '#1D1160',  # Phoenix Suns - Purple
        'SAC': '#5A2D81',  # Sacramento Kings - Purple
    
        ## Southwest Division
        'DAL': '#00538C',  # Dallas Mavericks - Blue
        'HOU': '#CE1141',  # Houston Rockets - Red
        'MEM': '#5D76A9',  # Memphis Grizzlies - Blue
        'NOP': '#0C2340',  # New Orleans Pelicans - Navy
        'SAS': '#C4CED4'   # San Antonio Spurs - Silver
    }
    NBA_TEAM_SECONDARY_COLORS = {
        # Eastern Conference
        ## Atlantic Division
        'BOS': '#BA9653',  # Boston Celtics - Gold
        'BKN': '#FFFFFF',  # Brooklyn Nets - White
        'NYK': '#F58426',  # New York Knicks - Orange
        'PHI': '#ED174C',  # Philadelphia 76ers - Red
        'TOR': '#000000',  # Toronto Raptors - Black
    
        ## Central Division
        'CHI': '#000000',  # Chicago Bulls - Black
        'CLE': '#FDBB30',  # Cleveland Cavaliers - Gold
        'DET': '#1D42BA',  # Detroit Pistons - Blue
        'IND': '#FDBB30',  # Indiana Pacers - Gold
        'MIL': '#EEE1C6',  # Milwaukee Bucks - Cream
    
        ## Southeast Division
        'ATL': '#C1D32F',  # Atlanta Hawks - Volt Green
        'CHA': '#00788C',  # Charlotte Hornets - Teal
        'MIA': '#F9A01B',  # Miami Heat - Yellow
        'ORL': '#C4CED4',  # Orlando Magic - Silver
        'WAS': '#E31837',  # Washington Wizards - Red
    
        # Western Conference
        ## Northwest Division
        'DEN': '#FEC524',  # Denver Nuggets - Gold
        'MIN': '#236192',  # Minnesota Timberwolves - Blue
        'OKC': '#EF3B24',  # Oklahoma City Thunder - Orange
        'POR': '#000000',  # Portland Trail Blazers - Black
        'UTA': '#00A9E0',  # Utah Jazz - Light Blue
    
        ## Pacific Division
        'GSW': '#FFC72C',  # Golden State Warriors - Gold
        'LAC': '#1D428A',  # LA Clippers - Blue
        'LAL': '#FDB927',  # Los Angeles Lakers - Gold
        'PHX': '#E56020',  # Phoenix Suns - Orange
        'SAC': '#63727A',  # Sacramento Kings - Silver
    
        ## Southwest Division
        'DAL': '#002B5E',  # Dallas Mavericks - Navy
        'HOU': '#000000',  # Houston Rockets - Black
        'MEM': '#12173F',  # Memphis Grizzlies - Navy
        'NOP': '#C8102E',  # New Orleans Pelicans - Red
        'SAS': '#000000'   # San Antonio Spurs - Black
    }
    return NBA_TEAM_COLORS,NBA_TEAM_SECONDARY_COLORS




def create_team_comparison(df, team1, team2):
    sumframe = df.groupby(['TEAM_ID', 'TEAM_ABBREVIATION', 'date']).sum(numeric_only=True)[
        ['very_tight_FG3A', 'wide_open_FG3A', 'open_FG3A', 'tight_FG3A', 'very_tight_FG3M', 'wide_open_FG3M', 
         'open_FG3M', 'tight_FG3M', 'CATCH_SHOOT_FGA', 'FGA', 'FTA', 'PULL_UP_FGA', 'PULL_UP_FGM', 'PULL_UP_FG3M', 
         'DRIVES', 'OREB', 'DREB', 'POTENTIAL_AST', 'TOV', 'RA_FGA', 'FRONT_CT_TOUCHES', 'FGM_LT_5']].reset_index()

    main_colors, secondary_colors = get_colors()

    # Calculate FG3% for each category
    sumframe["very_tight_fg3%"] = 100 * sumframe['very_tight_FG3M'] / sumframe['very_tight_FG3A']
    sumframe["wide_open_fg3%"] = 100 * sumframe['wide_open_FG3M'] / sumframe['wide_open_FG3A']
    sumframe["open_fg3%"] = 100 * sumframe['open_FG3M'] / sumframe['open_FG3A']
    sumframe["tight_fg3%"] = 100 * sumframe['tight_FG3M'] / sumframe['tight_FG3A']

    team1color = main_colors[team1]
    team2color = main_colors[team2]
    team1second_color = secondary_colors[team1]
    team2second_color = secondary_colors[team2]

    selected_teams = [team1, team2]
    sumframe = sumframe[sumframe.TEAM_ABBREVIATION.isin(selected_teams)]


    df1 = sumframe[sumframe.TEAM_ABBREVIATION == team1]
    df2 = sumframe[sumframe.TEAM_ABBREVIATION == team2]

    team1_name = df1['TEAM_ABBREVIATION'].iloc[0]
    team2_name = df2['TEAM_ABBREVIATION'].iloc[0]
    date = df2['date'].iloc[0]

    # Create figure with secondary y-axis
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=("3PT Shot Distribution", "3PT Shot Making", "Possessions", "Shot Selection", "Creation"),
        vertical_spacing=0.15,
        horizontal_spacing=0.1
    )

    # 1. 3PT Shot Distribution (top left)
    shot_types = ['very_tight_FG3A', 'tight_FG3A', 'open_FG3A', 'wide_open_FG3A']
    team1_shots = [df1[col].iloc[0] for col in shot_types]
    team2_shots = [df2[col].iloc[0] for col in shot_types]

    fig.add_trace(
        go.Bar(
            name=team1_name, 
            x=['Very Tight', 'Tight', 'Open', 'Wide Open'], 
            y=team1_shots, 
            marker=dict(
                color=team1color, 
                line=dict(
                    color=team1second_color,  # Outline color
                    width=5                  # Outline width
                )
            )
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Bar(
            name=team2_name, 
            x=['Very Tight', 'Tight', 'Open', 'Wide Open'], 
            y=team2_shots, 
            marker=dict(
                color=team2color, 
                line=dict(
                    color=team2second_color,  # Outline color
                    width=5                  # Outline width
                )
            )
        ),
        row=1, col=1
    )

    # 2. Shot Making (top right)
    make_types = ['very_tight_fg3%', 'tight_fg3%', 'open_fg3%', 'wide_open_fg3%']
    team1_makes = [df1[col].iloc[0] for col in make_types]
    team2_makes = [df2[col].iloc[0] for col in make_types]

    fig.add_trace(
        go.Bar(
            name=team1_name, 
            x=['Very Tight', 'Tight', 'Open', 'Wide Open'], 
            y=team1_makes, 
            marker=dict(
                color=team1color, 
                line=dict(
                    color=team1second_color,  # Outline color
                    width=5                  # Outline width
                )
            ), 
            showlegend=False
        ),
        row=1, col=2
    )

    fig.add_trace(
        go.Bar(
            name=team2_name, 
            x=['Very Tight', 'Tight', 'Open', 'Wide Open'], 
            y=team2_makes, 
            marker=dict(
                color=team2color, 
                line=dict(
                    color=team2second_color,  # Outline color
                    width=5                  # Outline width
                )
            ), 
            showlegend=False
        ),
        row=1, col=2
    )

    # 3. Offensive Creation (bottom left)
    creation_metrics = ['TOV', 'OREB']
    team1_creation = [df1[col].iloc[0] for col in creation_metrics]
    team2_creation = [df2[col].iloc[0] for col in creation_metrics]

    fig.add_trace(
        go.Bar(
            name=team1_name, 
            x=['Turnovers', 'OREB'], 
            y=team1_creation, 
            marker=dict(
                color=team1color, 
                line=dict(
                    color=team1second_color,  # Outline color
                    width=5                  # Outline width
                )
            ), 
            showlegend=False
        ),
        row=2, col=1
    )

    fig.add_trace(
        go.Bar(
            name=team2_name, 
            x=['Turnovers', 'OREB'], 
            y=team2_creation, 
            marker=dict(
                color=team2color, 
                line=dict(
                    color=team2second_color,  # Outline color
                    width=5                  # Outline width
                )
            ), 
            showlegend=False
        ),
        row=2, col=1
    )

    # 4. Shot Selection (bottom right)
    selection_metrics = ['FGM_LT_5', 'FTA', 'PULL_UP_FGA', 'CATCH_SHOOT_FGA']
    team1_selection = [df1[col].iloc[0] for col in selection_metrics]
    team2_selection = [df2[col].iloc[0] for col in selection_metrics]

    fig.add_trace(
        go.Bar(
            name=team1_name, 
            x=['Rim Attempt', 'FTA', 'Pull Up FGA', 'Catch&Shoot FGA'], 
            y=team1_selection, 
            marker=dict(
                color=team1color, 
                line=dict(
                    color=team1second_color,  # Outline color
                    width=5                  # Outline width
                )
            ), 
            showlegend=False
        ),
        row=2, col=2
    )

    fig.add_trace(
        go.Bar(
            name=team2_name, 
            x=['Rim Attempt', 'FTA', 'Pull Up FGA', 'Catch&Shoot FGA'], 
            y=team2_selection, 
            marker=dict(
                color=team2color, 
                line=dict(
                    color=team2second_color,  # Outline color
                    width=5                  # Outline width
                )
            ), 
            showlegend=False
        ),
        row=2, col=2
    )

    # 5. Additional shot selection (bottom left)
    selection_metrics = ['POTENTIAL_AST', 'DRIVES']
    team1_selection = [df1[col].iloc[0] for col in selection_metrics]
    team2_selection = [df2[col].iloc[0] for col in selection_metrics]

    fig.add_trace(
        go.Bar(
            name=team1_name, 
            x=['Potential Assists', 'Drives'], 
            y=team1_selection, 
            marker=dict(
                color=team1color, 
                line=dict(
                    color=team1second_color,  # Outline color
                    width=5                  # Outline width
                )
            ), 
            showlegend=False
        ),
        row=3, col=1
    )

    fig.add_trace(
        go.Bar(
            name=team2_name, 
            x=['Potential Assists', 'Drives'], 
            y=team2_selection, 
            marker=dict(
                color=team2color, 
                line=dict(
                    color=team2second_color,  # Outline color
                    width=5                  # Outline width
                )
            ), 
            showlegend=False
        ),
        row=3, col=1
    )

    # Update layout
    fig.update_layout(
        height=900,
        width=1350,
        title_text=team1+" vs "+team2 +"<br>"+str(date)[0:4]+ "/"+str(date)[4:6] + "/"+str(date)[6:],
        title_x=.5,
        title_font_size=30,
        barmode='group',
          bargap=0.25, 
        template='plotly_white',
        showlegend=True,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        )
    )
    

    fig.update_yaxes(title_text="Attempts", row=1, col=1)
    fig.update_yaxes(title_text="FG%", row=1, col=2)
    fig.update_yaxes(title_text="Per Game", row=2, col=1)
    fig.update_yaxes(title_text="Total", row=2, col=2)
    fig.update_yaxes(title_text="Possession Creation", row=3, col=1)
    fig.update_xaxes(tickfont=dict(size=15))


    return fig

team1='CLE'
team2='TOR'
df=pd.read_csv('2025_games.csv')

fig = create_team_comparison(df,team1, team2)
fig.show()


FileNotFoundError: [Errno 2] No such file or directory: '2025_games.csv'

In [None]:
sumframe['POTENTIAL_AST']