In [1]:
import pandas as pd

# Specify the file path
file_path = 'datasets/regular season/Player game logs.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the DataFrame headers
df.columns

Index(['SEASON_YEAR', 'PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID',
       'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP',
       'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM',
       'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK',
       'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2',
       'TD3', 'WNBA_FANTASY_PTS', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK',
       'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK',
       'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK',
       'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK',
       'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK',
       'PLUS_MINUS_RANK', 'NBA_FANTASY_PTS_RANK', 'DD2_RANK', 'TD3_RANK',
       'WNBA_FANTASY_PTS_RANK', 'AVAILABLE_FLAG', 'PlayerID', 'PlayerName'],
      dtype='object')

In [2]:
df = df.loc[:, :'PLUS_MINUS']
df.columns

Index(['SEASON_YEAR', 'PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID',
       'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP',
       'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM',
       'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK',
       'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS'],
      dtype='object')

In [3]:
# To calculate the efficiency of each game, we need to sum the points, rebounds, assists, steals, and blocks of a player, and then subtract the missed field goals, missed free throws, and turnovers.
df['Efficiency'] = (df['PTS'] + df['REB'] + df['AST'] + df['STL'] + df['BLK']) - (df['FGA'] - df['FGM']) - (df['FTA'] - df['FTM']) - df['TOV']

In [4]:
# Specify the file path
file_path = 'nba_game_matchups_2023_2024(include playyoffs).csv'

# Read the CSV file into a DataFrame
matchup_df = pd.read_csv(file_path)

matchup_df.info()

matchup_df.drop_duplicates(subset='GAME_ID', inplace=True)

matchup_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3704 entries, 0 to 3703
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   GAME_ID         3704 non-null   int64 
 1   HOME_TEAM_NAME  3704 non-null   object
 2   AWAY_TEAM_NAME  3704 non-null   object
dtypes: int64(1), object(2)
memory usage: 86.9+ KB
<class 'pandas.core.frame.DataFrame'>
Index: 1852 entries, 0 to 3700
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   GAME_ID         1852 non-null   int64 
 1   HOME_TEAM_NAME  1852 non-null   object
 2   AWAY_TEAM_NAME  1852 non-null   object
dtypes: int64(1), object(2)
memory usage: 57.9+ KB


In [5]:
df.head()

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,Efficiency
0,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301190,2024-04-14T00:00:00,NYK vs. CHI,...,2,2,0,1,0,3,0,4,-2,9
1,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301175,2024-04-12T00:00:00,NYK vs. BKN,...,0,0,0,0,0,0,0,5,3,8
2,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301167,2024-04-11T00:00:00,NYK @ BOS,...,0,1,0,1,2,0,0,2,-9,2
3,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301139,2024-04-07T00:00:00,NYK @ MIL,...,0,0,0,0,0,1,0,0,5,-1
4,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301119,2024-04-05T00:00:00,NYK @ CHI,...,1,1,1,0,1,4,0,0,-2,3


In [6]:
import numpy as np

# Left join the two DataFrames on the GAME_ID column
merged_df = df.merge(matchup_df, on='GAME_ID', how='left')
merged_df.head()

merged_df['LOCATION'] = np.where(merged_df['HOME_TEAM_NAME'] == merged_df['TEAM_ABBREVIATION'], 'HOME', 'AWAY')
merged_df.head()

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,Efficiency,HOME_TEAM_NAME,AWAY_TEAM_NAME,LOCATION
0,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301190,2024-04-14T00:00:00,NYK vs. CHI,...,1,0,3,0,4,-2,9,CHI,NYK,AWAY
1,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301175,2024-04-12T00:00:00,NYK vs. BKN,...,0,0,0,0,5,3,8,NYK,BKN,HOME
2,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301167,2024-04-11T00:00:00,NYK @ BOS,...,1,2,0,0,2,-9,2,NYK,BOS,HOME
3,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301139,2024-04-07T00:00:00,NYK @ MIL,...,0,0,1,0,0,5,-1,NYK,MIL,HOME
4,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301119,2024-04-05T00:00:00,NYK @ CHI,...,0,1,4,0,0,-2,3,NYK,CHI,HOME


In [7]:
# Check for missing values
merged_df.sample(10)

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,Efficiency,HOME_TEAM_NAME,AWAY_TEAM_NAME,LOCATION
838,2023-24,1630175,Cole Anthony,Cole,1610612753,ORL,Orlando Magic,22300172,2023-11-09T00:00:00,ORL vs. ATL,...,0,0,3,3,7,-3,1,ATL,ORL,AWAY
1964,2023-24,1630699,MarJon Beauchamp,MarJon,1610612749,MIL,Milwaukee Bucks,22300840,2024-02-27T00:00:00,MIL vs. CHA,...,0,0,0,0,7,-8,11,MIL,CHA,HOME
24060,2023-24,1629731,Dean Wade,Dean,1610612739,CLE,Cleveland Cavaliers,22300191,2023-11-13T00:00:00,CLE @ SAC,...,0,0,1,0,0,-18,0,SAC,CLE,AWAY
26292,2023-24,203469,Cody Zeller,Cody,1610612740,NOP,New Orleans Pelicans,22301157,2024-04-09T00:00:00,NOP @ POR,...,0,0,1,0,0,-10,1,POR,NOP,AWAY
17673,2023-24,1626220,Royce O'Neale,Royce,1610612756,PHX,Phoenix Suns,22300796,2024-02-22T00:00:00,PHX @ DAL,...,0,1,3,0,10,-7,13,PHX,DAL,HOME
12226,2023-24,203999,Nikola Jokic,Nikola,1610612743,DEN,Denver Nuggets,22300855,2024-02-29T00:00:00,DEN vs. MIA,...,2,0,4,5,18,12,26,MIA,DEN,AWAY
25901,2023-24,1629627,Zion Williamson,Zion,1610612740,NOP,New Orleans Pelicans,22300525,2024-01-10T00:00:00,NOP @ GSW,...,0,3,2,4,19,23,22,NOP,GSW,HOME
5424,2023-24,201939,Stephen Curry,Stephen,1610612744,GSW,Golden State Warriors,22300727,2024-02-07T00:00:00,GSW @ PHI,...,1,1,2,3,9,13,5,GSW,PHI,HOME
18038,2023-24,1630647,Eugene Omoruyi,Eugene,1610612764,WAS,Washington Wizards,22301137,2024-04-07T00:00:00,WAS @ TOR,...,0,1,1,4,4,-14,3,TOR,WAS,AWAY
3630,2023-24,1628973,Jalen Brunson,Jalen,1610612752,NYK,New York Knicks,22301201,2023-12-05T00:00:00,NYK @ MIL,...,0,0,1,8,24,-14,16,NYK,MIL,HOME


In [8]:
logo_df = pd.read_csv('datasets/nba_team_logos.csv')
player_headshot_df = pd.read_csv('datasets/nba_players_headshots_2023_2024.csv')

merged_df.head()

merged_df = merged_df.merge(logo_df, on='TEAM_NAME', how='left')

# Rename the column to match the column in the player_headshot_df DataFrame
player_headshot_df.rename(columns={'PERSON_ID': 'PLAYER_ID'}, inplace=True)

merged_df = merged_df.merge(player_headshot_df, on='PLAYER_ID', how='left')
merged_df.columns

Index(['SEASON_YEAR', 'PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID_x',
       'TEAM_ABBREVIATION_x', 'TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP',
       'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM',
       'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK',
       'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'Efficiency',
       'HOME_TEAM_NAME', 'AWAY_TEAM_NAME', 'LOCATION', 'GP', 'W', 'L',
       'Logo_URL', 'DISPLAY_FIRST_LAST', 'TEAM_ID_y', 'TEAM_ABBREVIATION_y',
       'HEADSHOT_URL'],
      dtype='object')

In [9]:
merged_df.head()

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID_x,TEAM_ABBREVIATION_x,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,AWAY_TEAM_NAME,LOCATION,GP,W,L,Logo_URL,DISPLAY_FIRST_LAST,TEAM_ID_y,TEAM_ABBREVIATION_y,HEADSHOT_URL
0,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301190,2024-04-14T00:00:00,NYK vs. CHI,...,NYK,AWAY,82,50,32,https://loodibee.com/wp-content/uploads/nba-ne...,Precious Achiuwa,1610612752,NYK,https://cdn.nba.com/headshots/nba/latest/1040x...
1,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301175,2024-04-12T00:00:00,NYK vs. BKN,...,BKN,HOME,82,50,32,https://loodibee.com/wp-content/uploads/nba-ne...,Precious Achiuwa,1610612752,NYK,https://cdn.nba.com/headshots/nba/latest/1040x...
2,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301167,2024-04-11T00:00:00,NYK @ BOS,...,BOS,HOME,82,50,32,https://loodibee.com/wp-content/uploads/nba-ne...,Precious Achiuwa,1610612752,NYK,https://cdn.nba.com/headshots/nba/latest/1040x...
3,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301139,2024-04-07T00:00:00,NYK @ MIL,...,MIL,HOME,82,50,32,https://loodibee.com/wp-content/uploads/nba-ne...,Precious Achiuwa,1610612752,NYK,https://cdn.nba.com/headshots/nba/latest/1040x...
4,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301119,2024-04-05T00:00:00,NYK @ CHI,...,CHI,HOME,82,50,32,https://loodibee.com/wp-content/uploads/nba-ne...,Precious Achiuwa,1610612752,NYK,https://cdn.nba.com/headshots/nba/latest/1040x...


In [10]:
average_data = pd.DataFrame(merged_df)


# List of columns to calculate the mean for
columns_to_average = ['MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM',
                      'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK',
                      'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'Efficiency']

# Create a dictionary for aggregation
agg_dict = {col: 'mean' for col in columns_to_average}

# Group by PLAYER_NAME, TEAM_NAME, and LOCATION and calculate the mean for the specified columns
average_stats = average_data.groupby(['PLAYER_NAME', 'PLAYER_ID','TEAM_NAME', 'LOCATION']).agg(agg_dict).reset_index()

# Calculate overall averages without grouping by location
overall_stats = average_data.groupby(['PLAYER_NAME', 'PLAYER_ID','TEAM_NAME']).agg(agg_dict).reset_index()
overall_stats['LOCATION'] = 'OVERALL'

# Concatenate the two DataFrames
final_stats = pd.concat([average_stats, overall_stats], ignore_index=True)

# Display the resulting DataFrame
final_stats.head()
final_stats = final_stats.round(2)
final_stats



Unnamed: 0,PLAYER_NAME,PLAYER_ID,TEAM_NAME,LOCATION,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,...,AST,TOV,STL,BLK,BLKA,PF,PFD,PTS,PLUS_MINUS,Efficiency
0,AJ Green,1631260,Milwaukee Bucks,AWAY,9.70,1.41,3.11,0.36,1.19,2.78,...,0.44,0.19,0.07,0.07,0.07,0.74,0.30,4.11,0.22,3.96
1,AJ Green,1631260,Milwaukee Bucks,HOME,12.15,1.55,3.86,0.36,1.28,3.24,...,0.62,0.24,0.24,0.07,0.03,1.00,0.41,4.86,1.52,4.31
2,AJ Griffin,1631100,Atlanta Hawks,AWAY,9.13,1.12,3.62,0.26,0.50,2.00,...,0.12,0.50,0.00,0.12,0.12,0.00,0.00,2.75,-4.75,0.75
3,AJ Griffin,1631100,Atlanta Hawks,HOME,8.14,0.75,2.75,0.33,0.50,1.92,...,0.33,0.33,0.08,0.08,0.17,0.50,0.08,2.17,-1.08,1.33
4,AJ Lawson,1630639,Dallas Mavericks,AWAY,9.37,1.32,3.16,0.33,0.37,1.37,...,0.58,0.47,0.37,0.11,0.32,0.58,0.47,3.63,0.74,3.53
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1942,Zach LaVine,203897,Chicago Bulls,OVERALL,34.87,6.80,15.04,0.43,2.36,6.76,...,3.92,2.08,0.84,0.32,0.80,2.28,2.80,19.48,-3.68,18.80
1943,Zavier Simpson,1630285,Memphis Grizzlies,OVERALL,23.05,2.43,7.71,0.28,0.71,2.43,...,3.57,1.43,1.00,0.43,1.00,1.57,0.43,6.00,-4.14,7.00
1944,Zeke Nnaji,1630192,Denver Nuggets,OVERALL,9.93,1.19,2.57,0.39,0.10,0.40,...,0.55,0.47,0.26,0.66,0.40,1.40,1.33,3.21,-2.90,4.64
1945,Ziaire Williams,1630533,Memphis Grizzlies,OVERALL,20.35,2.94,7.41,0.34,1.14,3.71,...,1.47,1.29,0.71,0.18,0.53,1.67,1.45,8.24,-4.84,8.10


In [11]:
df = pd.DataFrame(final_stats)

# Separate the dataset into HOME and AWAY
home_df = df[df['LOCATION'] == 'HOME'].groupby(['PLAYER_NAME', 'TEAM_NAME']).agg({'Efficiency': 'mean'}).reset_index()
home_df['LOCATION'] = 'HOME'

away_df = df[df['LOCATION'] == 'AWAY'].groupby(['PLAYER_NAME', 'TEAM_NAME']).agg({'Efficiency': 'mean'}).reset_index()
away_df['LOCATION'] = 'AWAY'

# Merge home and away dataframes on PLAYER_NAME and TEAM_NAME
merged_df = pd.merge(home_df, away_df, on=['PLAYER_NAME', 'TEAM_NAME'], suffixes=('_HOME', '_AWAY'))

# Calculate the difference in Efficiency between home and away
merged_df['Efficiency_DIFF'] = (merged_df['Efficiency_HOME'] - merged_df['Efficiency_AWAY']).abs()

merged_df = merged_df.sort_values(by='Efficiency_DIFF', ascending=False)

# Top 20 players with the highest difference in Efficiency between home and away games
merged_df.head(20)

Unnamed: 0,PLAYER_NAME,TEAM_NAME,Efficiency_HOME,LOCATION_HOME,Efficiency_AWAY,LOCATION_AWAY,Efficiency_DIFF
451,Mouhamed Gueye,Atlanta Hawks,-1.0,HOME,15.0,AWAY,16.0
465,Nicolas Batum,LA Clippers,2.5,HOME,16.0,AWAY,13.5
291,Jeff Dowtin Jr.,Philadelphia 76ers,16.0,HOME,2.62,AWAY,13.38
602,Tristan Vukcevic,Washington Wizards,2.0,HOME,14.17,AWAY,12.17
571,Taze Moore,Portland Trail Blazers,1.67,HOME,13.0,AWAY,11.33
549,Shake Milton,Detroit Pistons,17.0,HOME,6.0,AWAY,11.0
161,Dexter Dennis,Dallas Mavericks,-2.0,HOME,8.0,AWAY,10.0
421,Marques Bolden,Charlotte Hornets,12.0,HOME,2.75,AWAY,9.25
509,Quentin Grimes,Detroit Pistons,-1.0,HOME,7.0,AWAY,8.0
177,Dylan Windler,Atlanta Hawks,4.4,HOME,12.0,AWAY,7.6


In [12]:
final_stats['Season Type'] = 'Regular Season'

playoff_data = pd.read_csv('average_stats_playoffs.csv')

playoff_data['Season Type'] = 'Playoffs'

# Concatenate the regular season and playoff DataFrames
final_data = pd.concat([final_stats, playoff_data], ignore_index=True)



In [13]:
logo_df = pd.read_csv('datasets/nba_team_logos.csv')
player_headshot_df = pd.read_csv('datasets/nba_players_headshots_2023_2024.csv')

# Merge the final_data DataFrame with the logo_df DataFrame on TEAM_NAME
final_data = pd.merge(final_data, logo_df, on='TEAM_NAME')

# Merge the final_data DataFrame with the player_headshot_df DataFrame on PLAYER_ID
final_data = pd.merge(final_data, player_headshot_df, on='PLAYER_ID')

final_data.columns 


Index(['PLAYER_NAME', 'PLAYER_ID', 'TEAM_NAME', 'LOCATION', 'MIN', 'FGM',
       'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT',
       'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD',
       'PTS', 'PLUS_MINUS', 'Efficiency', 'Season Type', 'GP', 'W', 'L',
       'Logo_URL', 'DISPLAY_FIRST_LAST', 'TEAM_ID', 'TEAM_ABBREVIATION',
       'HEADSHOT_URL'],
      dtype='object')

In [14]:
final_data['PLAYER_ID'] = final_data['PLAYER_ID'].astype(int)

position_df = pd.read_csv('active_nba_players_with_positions.csv')

# merge the position_df DataFrame with the final_data DataFrame
final_data = final_data.merge(position_df, on='PLAYER_ID', how='left')



In [15]:
regular_def = pd.read_csv('datasets/regular season/team_defense_stats_2023_2024.csv')
playoff_def = pd.read_csv('datasets/playoff/Playoff_team_defense_stats_2023_2024.csv')

In [16]:
playoff_def['Season Type'] = 'Playoffs'
regular_def['Season Type'] = 'Regular Season'

In [17]:
concatenated_data_def = pd.concat([regular_def, playoff_def], ignore_index=True)
concatenated_data_def.sample(10)

Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,DEF_RATING,DREB,DREB_PCT,...,DEF_RATING_RANK,DREB_RANK,DREB_PCT_RANK,STL_RANK,BLK_RANK,OPP_PTS_OFF_TOV_RANK,OPP_PTS_2ND_CHANCE_RANK,OPP_PTS_FB_RANK,OPP_PTS_PAINT_RANK,Season Type
31,1610612739,Cleveland Cavaliers,12,5,7,0.417,576.0,111.1,373,0.699,...,8,7,9,5,7,10,11,10,10,Playoffs
26,1610612759,San Antonio Spurs,82,22,60,0.268,3966.0,115.6,2778,0.721,...,21,8,13,22,3,27,23,17,28,Regular Season
0,1610612737,Atlanta Hawks,82,36,46,0.439,3971.0,118.4,2639,0.717,...,27,22,16,16,26,18,12,28,25,Regular Season
40,1610612740,New Orleans Pelicans,4,0,4,0.0,192.0,109.6,133,0.797,...,4,15,1,11,14,3,1,3,1,Playoffs
24,1610612757,Portland Trail Blazers,82,21,61,0.256,3976.0,116.6,2469,0.697,...,23,30,28,12,27,28,22,29,24,Regular Season
38,1610612749,Milwaukee Bucks,6,2,4,0.333,293.0,119.4,199,0.697,...,14,10,10,13,12,2,6,3,6,Playoffs
39,1610612750,Minnesota Timberwolves,16,9,7,0.563,768.0,110.9,509,0.762,...,7,4,3,3,4,14,10,13,13,Playoffs
11,1610612754,Indiana Pacers,82,47,35,0.573,3941.0,117.6,2578,0.7,...,24,28,26,11,8,12,26,11,30,Regular Season
3,1610612766,Charlotte Hornets,82,21,61,0.256,3946.0,119.2,2538,0.709,...,29,29,22,25,25,20,21,12,23,Regular Season
27,1610612761,Toronto Raptors,82,25,57,0.305,3961.0,118.1,2607,0.694,...,26,26,29,10,19,23,29,18,26,Regular Season


In [18]:
final_data = pd.merge(final_data, concatenated_data_def, on=['TEAM_ID', 'Season Type'])

In [19]:
playoff_all_shot = pd.read_csv('datasets/playoff/All shots.csv')
regular_all_shot = pd.read_csv('datasets/regular season/all_players_shot_data_2023_2024.csv')

playoff_all_shot['Season Type'] = 'Playoffs'
regular_all_shot['Season Type'] = 'Regular Season'

In [20]:
# Concatenate the regular season and playoff DataFrames
all_shot_data = pd.concat([regular_all_shot, playoff_all_shot], ignore_index=True)
all_shot_data.sample(10)

matchup_data = pd.read_csv('nba_game_matchups_2023_2024(include playyoffs).csv')

# Merge the all_shot_data DataFrame with the matchup_data DataFrame

# Drop duplicates in the GAME_ID column
matchup_df.drop_duplicates(subset='GAME_ID', inplace=True)


all_shot_data = pd.merge(all_shot_data, matchup_df, on='GAME_ID')


all_shot_data['LOCATION'] = np.where(all_shot_data['HOME_TEAM_NAME'] == all_shot_data['HTM'], 'HOME', 'AWAY')
all_shot_data.loc[0:3]

all_shot_data.to_csv('all_shot_data_final.csv', index=False)


In [21]:
all_shot_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 232541 entries, 0 to 232540
Data columns (total 29 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   GRID_TYPE            232541 non-null  object
 1   GAME_ID              232541 non-null  int64 
 2   GAME_EVENT_ID        232541 non-null  int64 
 3   PLAYER_ID            232541 non-null  int64 
 4   PLAYER_NAME          232541 non-null  object
 5   TEAM_ID              232541 non-null  int64 
 6   TEAM_NAME            232541 non-null  object
 7   PERIOD               232541 non-null  int64 
 8   MINUTES_REMAINING    232541 non-null  int64 
 9   SECONDS_REMAINING    232541 non-null  int64 
 10  EVENT_TYPE           232541 non-null  object
 11  ACTION_TYPE          232541 non-null  object
 12  SHOT_TYPE            232541 non-null  object
 13  SHOT_ZONE_BASIC      232541 non-null  object
 14  SHOT_ZONE_AREA       232541 non-null  object
 15  SHOT_ZONE_RANGE      232541 non-nu

In [22]:
# Define the list of Eastern Conference teams
east_teams = ["Atlanta Hawks", "Boston Celtics", "Brooklyn Nets", "Charlotte Hornets", "Chicago Bulls",
              "Cleveland Cavaliers", "Detroit Pistons", "Indiana Pacers", "Miami Heat", "Milwaukee Bucks",
              "New York Knicks", "Orlando Magic", "Philadelphia 76ers", "Toronto Raptors", "Washington Wizards"]

# Add the Conference column
final_data["Conference"] = final_data["TEAM_NAME_x"].apply(lambda x: "East" if x in east_teams else "West")

# Display the updated DataFrame
final_data.columns

Index(['PLAYER_NAME', 'PLAYER_ID', 'TEAM_NAME_x', 'LOCATION', 'MIN_x', 'FGM',
       'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT',
       'OREB', 'DREB_x', 'REB', 'AST', 'TOV', 'STL_x', 'BLK_x', 'BLKA', 'PF',
       'PFD', 'PTS', 'PLUS_MINUS', 'Efficiency', 'Season Type', 'GP_x', 'W_x',
       'L_x', 'Logo_URL', 'DISPLAY_FIRST_LAST_x', 'TEAM_ID',
       'TEAM_ABBREVIATION', 'HEADSHOT_URL', 'DISPLAY_FIRST_LAST_y', 'POSITION',
       'TEAM_NAME_y', 'GP_y', 'W_y', 'L_y', 'W_PCT', 'MIN_y', 'DEF_RATING',
       'DREB_y', 'DREB_PCT', 'STL_y', 'BLK_y', 'OPP_PTS_OFF_TOV',
       'OPP_PTS_2ND_CHANCE', 'OPP_PTS_FB', 'OPP_PTS_PAINT', 'GP_RANK',
       'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'DEF_RATING_RANK',
       'DREB_RANK', 'DREB_PCT_RANK', 'STL_RANK', 'BLK_RANK',
       'OPP_PTS_OFF_TOV_RANK', 'OPP_PTS_2ND_CHANCE_RANK', 'OPP_PTS_FB_RANK',
       'OPP_PTS_PAINT_RANK', 'Conference'],
      dtype='object')

In [23]:
final_data.to_csv('final_data.csv', index=False)

In [24]:
all_shot_data.columns

Index(['GRID_TYPE', 'GAME_ID', 'GAME_EVENT_ID', 'PLAYER_ID', 'PLAYER_NAME',
       'TEAM_ID', 'TEAM_NAME', 'PERIOD', 'MINUTES_REMAINING',
       'SECONDS_REMAINING', 'EVENT_TYPE', 'ACTION_TYPE', 'SHOT_TYPE',
       'SHOT_ZONE_BASIC', 'SHOT_ZONE_AREA', 'SHOT_ZONE_RANGE', 'SHOT_DISTANCE',
       'LOC_X', 'LOC_Y', 'SHOT_ATTEMPTED_FLAG', 'SHOT_MADE_FLAG', 'GAME_DATE',
       'HTM', 'VTM', 'player_name', 'Season Type', 'HOME_TEAM_NAME',
       'AWAY_TEAM_NAME', 'LOCATION'],
      dtype='object')

In [25]:
len(all_shot_data[all_shot_data['TEAM_NAME'] == 'Los Angeles Lakers'])

7600

In [26]:
final_data[final_data['OPP_PTS_PAINT'] > 50]

Unnamed: 0,PLAYER_NAME,PLAYER_ID,TEAM_NAME_x,LOCATION,MIN_x,FGM,FGA,FG_PCT,FG3M,FG3A,...,DEF_RATING_RANK,DREB_RANK,DREB_PCT_RANK,STL_RANK,BLK_RANK,OPP_PTS_OFF_TOV_RANK,OPP_PTS_2ND_CHANCE_RANK,OPP_PTS_FB_RANK,OPP_PTS_PAINT_RANK,Conference
0,AJ Green,1631260,Milwaukee Bucks,AWAY,9.70,1.41,3.11,0.36,1.19,2.78,...,19,3,4,26,15,6,12,10,19,East
1,AJ Green,1631260,Milwaukee Bucks,HOME,12.15,1.55,3.86,0.36,1.28,3.24,...,19,3,4,26,15,6,12,10,19,East
2,AJ Griffin,1631100,Atlanta Hawks,AWAY,9.13,1.12,3.62,0.26,0.50,2.00,...,27,22,16,16,26,18,12,28,25,East
3,AJ Griffin,1631100,Atlanta Hawks,HOME,8.14,0.75,2.75,0.33,0.50,1.92,...,27,22,16,16,26,18,12,28,25,East
4,AJ Lawson,1630639,Dallas Mavericks,AWAY,9.37,1.32,3.16,0.33,0.37,1.37,...,18,14,21,24,17,7,24,26,20,West
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2413,Tyrese Maxey,1630178,Philadelphia 76ers,OVERALL,44.58,11.00,23.00,0.47,3.67,9.17,...,11,12,15,12,9,5,8,7,4,East
2414,Wendell Carter Jr.,1628976,Orlando Magic,OVERALL,26.43,2.71,6.71,0.39,1.00,3.57,...,1,9,4,9,10,7,3,8,8,East
2415,Wendell Moore Jr.,1631111,Minnesota Timberwolves,OVERALL,3.08,0.50,1.17,0.28,0.17,0.67,...,7,4,3,3,4,14,10,13,13,West
2416,Xavier Tillman,1630214,Boston Celtics,OVERALL,8.64,0.62,1.00,0.56,0.12,0.12,...,3,2,2,2,2,15,14,14,15,East


In [27]:
playoff_df = pd.read_csv('datasets/playoff/playoffs player game_logs.csv')

In [28]:
regular_df = pd.read_csv('datasets/regular season/Player game logs.csv')

In [29]:
playoff_df.columns

Index(['SEASON_YEAR', 'PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID',
       'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP',
       'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM',
       'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK',
       'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2',
       'TD3', 'WNBA_FANTASY_PTS', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK',
       'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK',
       'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK',
       'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK',
       'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK',
       'PLUS_MINUS_RANK', 'NBA_FANTASY_PTS_RANK', 'DD2_RANK', 'TD3_RANK',
       'WNBA_FANTASY_PTS_RANK', 'AVAILABLE_FLAG', 'PlayerID', 'PlayerName'],
      dtype='object')

In [30]:
regular_df.head()

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,AVAILABLE_FLAG,PlayerID,PlayerName
0,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301190,2024-04-14T00:00:00,NYK vs. CHI,...,45,49,37,55,10,1,53,1,1630173,Precious Achiuwa
1,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301175,2024-04-12T00:00:00,NYK vs. BKN,...,45,45,26,63,10,1,61,1,1630173,Precious Achiuwa
2,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301167,2024-04-11T00:00:00,NYK @ BOS,...,45,58,55,61,10,1,61,1,1630173,Precious Achiuwa
3,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301139,2024-04-07T00:00:00,NYK @ MIL,...,45,67,20,74,10,1,74,1,1630173,Precious Achiuwa
4,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301119,2024-04-05T00:00:00,NYK @ CHI,...,45,67,37,64,10,1,64,1,1630173,Precious Achiuwa


In [31]:
# Dropping specified columns from the dataframe
columns_to_drop = ['PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2', 'TD3', 'WNBA_FANTASY_PTS', 
                   'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 
                   'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK', 
                   'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK', 
                   'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 
                   'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK', 
                   'NBA_FANTASY_PTS_RANK', 'DD2_RANK', 'TD3_RANK', 'WNBA_FANTASY_PTS_RANK', 
                   'AVAILABLE_FLAG']

regular_df.drop(columns=columns_to_drop, inplace=True, errors='ignore')
playoff_df.drop(columns=columns_to_drop, inplace=True, errors='ignore')

In [32]:
regular_df['Season Type'] = 'Regular Season'

playoff_df['Season Type'] = 'Playoffs'

all_gamelog_df = pd.concat([regular_df, playoff_df], ignore_index=True)


In [33]:
all_gamelog_df[all_gamelog_df['Season Type'] == 'Playoffs']

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,TOV,STL,BLK,BLKA,PF,PFD,PTS,PlayerID,PlayerName,Season Type
26401,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,42300217,2024-05-19T00:00:00,NYK vs. IND,...,0,0,0,1,3,2,4,1630173,Precious Achiuwa,Playoffs
26402,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,42300216,2024-05-17T00:00:00,NYK @ IND,...,0,2,2,2,3,5,12,1630173,Precious Achiuwa,Playoffs
26403,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,42300215,2024-05-14T00:00:00,NYK vs. IND,...,0,2,2,0,4,1,4,1630173,Precious Achiuwa,Playoffs
26404,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,42300214,2024-05-12T00:00:00,NYK @ IND,...,0,0,0,1,0,1,8,1630173,Precious Achiuwa,Playoffs
26405,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,42300213,2024-05-10T00:00:00,NYK @ IND,...,2,0,3,1,2,1,5,1630173,Precious Achiuwa,Playoffs
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28081,2023-24,1627826,Ivica Zubac,Ivica,1610612746,LAC,LA Clippers,42300175,2024-05-01T00:00:00,LAC vs. DAL,...,1,1,0,0,4,2,15,1627826,Ivica Zubac,Playoffs
28082,2023-24,1627826,Ivica Zubac,Ivica,1610612746,LAC,LA Clippers,42300174,2024-04-28T00:00:00,LAC @ DAL,...,1,1,1,1,4,2,13,1627826,Ivica Zubac,Playoffs
28083,2023-24,1627826,Ivica Zubac,Ivica,1610612746,LAC,LA Clippers,42300173,2024-04-26T00:00:00,LAC @ DAL,...,2,1,1,2,4,4,19,1627826,Ivica Zubac,Playoffs
28084,2023-24,1627826,Ivica Zubac,Ivica,1610612746,LAC,LA Clippers,42300172,2024-04-23T00:00:00,LAC vs. DAL,...,2,1,0,1,2,5,13,1627826,Ivica Zubac,Playoffs


In [34]:
log1_df = pd.read_csv('nba_player_game_logs_active_players_2021_22_playoff.csv')
log11_df = pd.read_csv('nba_player_game_logs_active_players_2021_22.csv')

log2_df = pd.read_csv('nba_player_game_logs_active_players_2022_23_playoff.csv')
log22_df = pd.read_csv('nba_player_game_logs_active_players_2022_23.csv')

log11_df['Season Type'] = 'Regular Season'
log22_df['Season Type'] = 'Regular Season'

log2_df['Season Type'] = 'Playoffs'
log1_df['Season Type'] = 'Playoffs'

log2_df.drop(columns=columns_to_drop, inplace=True, errors='ignore')
log1_df.drop(columns=columns_to_drop, inplace=True, errors='ignore')
log11_df.drop(columns=columns_to_drop, inplace=True, errors='ignore')
log22_df.drop(columns=columns_to_drop, inplace=True, errors='ignore')   



In [35]:
all_gamelog_2021_2024_df = pd.concat([all_gamelog_df, log1_df, log11_df, log22_df, log2_df], ignore_index=True)

all_gamelog_2021_2024_df.head()

all_gamelog_2021_2024_df.to_csv('player_gamelog_2021_2024.csv', index=False)

In [36]:
all_gamelog_2021_2024_df.columns

Index(['SEASON_YEAR', 'PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID',
       'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP',
       'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM',
       'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK',
       'BLKA', 'PF', 'PFD', 'PTS', 'PlayerID', 'PlayerName', 'Season Type',
       'Season'],
      dtype='object')

In [37]:
match_df1 = pd.read_csv('nba_game_matchups_2021_2022.csv')
match_df2 = pd.read_csv('nba_game_matchups_2021_2022_playoff.csv')
match_df3 = pd.read_csv('nba_game_matchups_2022_2023.csv')
match_df4 = pd.read_csv('nba_game_matchups_2022_2023_playoff.csv')
match_df5 = pd.read_csv('nba_game_matchups_2023_2024(include playyoffs).csv')

all_matchup = pd.concat([match_df1, match_df2, match_df3, match_df4, match_df5], ignore_index=True)

all_matchup.drop_duplicates(subset='GAME_ID', inplace=True)



In [38]:
all_matchup.head()

Unnamed: 0,GAME_ID,HOME_TEAM_NAME,AWAY_TEAM_NAME
0,22101223,MEM,BOS
1,22101226,TOR,NYK
2,22101230,POR,UTA
3,22101219,DAL,SAS
4,22101217,CHA,WAS


In [39]:
all_gamelog_2021_2024_df = all_gamelog_2021_2024_df.merge(all_matchup, on='GAME_ID', how='left')

all_gamelog_2021_2024_df.head()

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,BLKA,PF,PFD,PTS,PlayerID,PlayerName,Season Type,Season,HOME_TEAM_NAME,AWAY_TEAM_NAME
0,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301190,2024-04-14T00:00:00,NYK vs. CHI,...,0,3,0,4,1630173,Precious Achiuwa,Regular Season,,CHI,NYK
1,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301175,2024-04-12T00:00:00,NYK vs. BKN,...,0,0,0,5,1630173,Precious Achiuwa,Regular Season,,NYK,BKN
2,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301167,2024-04-11T00:00:00,NYK @ BOS,...,2,0,0,2,1630173,Precious Achiuwa,Regular Season,,NYK,BOS
3,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301139,2024-04-07T00:00:00,NYK @ MIL,...,0,1,0,0,1630173,Precious Achiuwa,Regular Season,,NYK,MIL
4,2023-24,1630173,Precious Achiuwa,Precious,1610612752,NYK,New York Knicks,22301119,2024-04-05T00:00:00,NYK @ CHI,...,1,4,0,0,1630173,Precious Achiuwa,Regular Season,,NYK,CHI


In [40]:
all_gamelog_2021_2024_df['Opponent Team'] = np.where(all_gamelog_2021_2024_df['HOME_TEAM_NAME'] == all_gamelog_2021_2024_df['TEAM_ABBREVIATION'], all_gamelog_2021_2024_df['AWAY_TEAM_NAME'], all_gamelog_2021_2024_df['HOME_TEAM_NAME'])

In [41]:
all_gamelog_2021_2024_df.sample(10)

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,PF,PFD,PTS,PlayerID,PlayerName,Season Type,Season,HOME_TEAM_NAME,AWAY_TEAM_NAME,Opponent Team
21435,2023-24,1641767,Ben Sheppard,Ben,1610612754,IND,Indiana Pacers,22300039,2023-11-21T00:00:00,IND @ ATL,...,0,0,5,1641767,Ben Sheppard,Regular Season,,IND,ATL,ATL
56173,2022-23,1630217,Desmond Bane,Desmond,1610612763,MEM,Memphis Grizzlies,22201052,2023-03-17T00:00:00,MEM @ SAS,...,5,2,21,1630217,Desmond Bane,Regular Season,2022-23,MEM,SAS,SAS
16118,2023-24,1629003,Shake Milton,Shake,1610612765,DET,Detroit Pistons,22300776,2024-02-13T00:00:00,DET @ LAL,...,1,0,8,1629003,Shake Milton,Regular Season,,LAL,DET,LAL
40773,2021-22,1629659,Talen Horton-Tucker,Talen,1610612747,LAL,Los Angeles Lakers,22100365,2021-12-07T00:00:00,LAL vs. BOS,...,3,2,12,1629659,Talen Horton-Tucker,Regular Season,2021-22,LAL,BOS,BOS
41902,2021-22,1630552,Jalen Johnson,Jalen,1610612737,ATL,Atlanta Hawks,22100202,2021-11-15T00:00:00,ATL vs. ORL,...,0,0,0,1630552,Jalen Johnson,Regular Season,2021-22,ATL,ORL,ORL
81234,2022-23,202083,Wesley Matthews,Wesley,1610612749,MIL,Milwaukee Bucks,42200101,2023-04-16T00:00:00,MIL vs. MIA,...,2,1,6,202083,Wesley Matthews,Playoffs,2022-23,MIA,MIL,MIA
50072,2021-22,203082,Terrence Ross,Terrence,1610612753,ORL,Orlando Magic,22100071,2021-10-29T00:00:00,ORL @ TOR,...,2,2,9,203082,Terrence Ross,Regular Season,2021-22,ORL,TOR,TOR
38916,2021-22,201145,Jeff Green,Jeff,1610612743,DEN,Denver Nuggets,22100500,2021-12-26T00:00:00,DEN @ LAC,...,1,1,4,201145,Jeff Green,Regular Season,2021-22,LAC,DEN,LAC
62633,2022-23,1628983,Shai Gilgeous-Alexander,Shai,1610612760,OKC,Oklahoma City Thunder,22200611,2023-01-10T00:00:00,OKC @ MIA,...,2,6,26,1628983,Shai Gilgeous-Alexander,Regular Season,2022-23,MIA,OKC,MIA
48215,2021-22,203486,Mason Plumlee,Mason,1610612766,CHA,Charlotte Hornets,22101001,2022-03-11T00:00:00,CHA @ NOP,...,4,1,9,203486,Mason Plumlee,Regular Season,2021-22,CHA,NOP,NOP


In [42]:
all_gamelog_2021_2024_df.to_csv('player_gamelog_2021_2024.csv', index=False)

In [44]:
all_gamelog_2021_2024_df.iloc[0]

SEASON_YEAR                      2023-24
PLAYER_ID                        1630173
PLAYER_NAME             Precious Achiuwa
NICKNAME                        Precious
TEAM_ID                       1610612752
TEAM_ABBREVIATION                    NYK
TEAM_NAME                New York Knicks
GAME_ID                         22301190
GAME_DATE            2024-04-14T00:00:00
MATCHUP                      NYK vs. CHI
WL                                     W
MIN                            18.506667
FGM                                    2
FGA                                    3
FG_PCT                             0.667
FG3M                                   0
FG3A                                   0
FG3_PCT                              0.0
FTM                                    0
FTA                                    0
FT_PCT                               0.0
OREB                                   1
DREB                                   4
REB                                    5
AST             

In [45]:
all_gamelog_2021_2024_df['Double_Double_rebound'] = all_gamelog_2021_2024_df.apply(lambda row: 1 if row['PTS'] >= 10 and row['REB'] >= 10 else 0, axis=1)
all_gamelog_2021_2024_df['Double_Double_assist'] = all_gamelog_2021_2024_df.apply(lambda row: 1 if row['PTS'] >= 10 and row['AST'] >= 10 else 0, axis=1)

In [60]:
all_gamelog_2021_2024_df['Double_Double_rebound'].value_counts()

Double_Double_rebound
0    76381
1     5603
Name: count, dtype: int64

In [61]:
all_gamelog_2021_2024_df['Double_Double_assist'].value_counts()

Double_Double_assist
0    80183
1     1801
Name: count, dtype: int64