## Lib Imports

In [71]:
import pandas as pd
from nba_api.stats.endpoints import leaguedashplayerstats

## Data Prep

In [72]:
def get_rookie_stats_season(season, per_mode):
    try:
        player_stats = leaguedashplayerstats.LeagueDashPlayerStats(
            season = season,
            season_type_all_star = 'Regular Season',
            player_experience_nullable = 'Rookie',
            per_mode_detailed = per_mode,
        ).get_data_frames()[0]

        player_stats['SEASON'] = season
    except Exception as e:
        print('Error: Could not get rookie stats for season', season)
        print(e)
        player_stats = pd.DataFrame()

    return player_stats

In [73]:
def format_season(season):
    return f'{season}-{str(season + 1)[-2:]}'

In [74]:
df_rookie_stats = pd.DataFrame()

for season in range(1950, 2025):
    season_str = format_season(season)
    rookie_stats = get_rookie_stats_season(season_str, 'PerGame')

    if rookie_stats.empty:
        print(f'No rookie stats found for {season_str} season')
        continue
    
    print(f'Found {rookie_stats.shape[0]} rookie stats for {season_str} season')
    df_rookie_stats = pd.concat([df_rookie_stats, rookie_stats])

No rookie stats found for 1950-51 season
No rookie stats found for 1951-52 season
No rookie stats found for 1952-53 season
No rookie stats found for 1953-54 season
No rookie stats found for 1954-55 season
No rookie stats found for 1955-56 season
No rookie stats found for 1956-57 season
No rookie stats found for 1957-58 season
No rookie stats found for 1958-59 season
No rookie stats found for 1959-60 season
No rookie stats found for 1960-61 season
No rookie stats found for 1961-62 season
No rookie stats found for 1962-63 season
No rookie stats found for 1963-64 season
No rookie stats found for 1964-65 season
No rookie stats found for 1965-66 season
No rookie stats found for 1966-67 season
No rookie stats found for 1967-68 season
No rookie stats found for 1968-69 season
No rookie stats found for 1969-70 season
No rookie stats found for 1970-71 season
No rookie stats found for 1971-72 season
No rookie stats found for 1972-73 season
No rookie stats found for 1973-74 season
No rookie stats 

In [75]:
df_rookie_stats.head()

Unnamed: 0,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,...,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,SEASON
0,1059,Aleksandar Djordjevic,Aleksandar,1610612757,POR,29.0,8,4,4,0.5,...,57,60,22,34,6,54,20,3,44,1996-97
1,947,Allen Iverson,Allen,1610612755,PHI,22.0,76,20,56,0.263,...,3,3,22,1,60,1,2,3,1,1996-97
2,1043,Amal McCaskill,Amal,1610612753,ORL,23.0,17,9,8,0.529,...,44,59,22,50,42,50,20,3,50,1996-97
3,952,Antoine Walker,Antoine,1610612738,BOS,20.0,82,15,67,0.183,...,2,2,21,3,62,2,1,1,2,1996-97
4,989,Ben Davis,Ben,1610612756,PHX,24.0,20,12,8,0.6,...,25,50,22,57,33,58,20,3,58,1996-97


In [76]:
df_rookie_stats.groupby('SEASON').size()

SEASON
1996-97     63
1997-98     64
1998-99     67
1999-00     63
2000-01     68
2001-02     69
2002-03     64
2003-04     67
2004-05     70
2005-06     85
2006-07     80
2007-08     64
2008-09     62
2009-10     57
2010-11     67
2011-12     81
2012-13     78
2013-14     76
2014-15     81
2015-16     73
2016-17     88
2017-18    116
2018-19    105
2019-20    118
2020-21     94
2021-22    126
2022-23     85
2023-24    101
dtype: int64

In [77]:
df_rookie_stats.drop(columns=['NICKNAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'WNBA_FANTASY_PTS_RANK', 'WNBA_FANTASY_PTS'], inplace=True)
df_rookie_stats['PLAYER_NAME'] = df_rookie_stats['PLAYER_NAME'].str.strip()
df_rookie_stats.columns

Index(['PLAYER_ID', 'PLAYER_NAME', 'AGE', 'GP', 'W', 'L', 'W_PCT', 'MIN',
       'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA',
       'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2', 'TD3',
       'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK',
       'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK',
       'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK',
       'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK',
       'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK',
       'NBA_FANTASY_PTS_RANK', 'DD2_RANK', 'TD3_RANK', 'SEASON'],
      dtype='object')

In [85]:
roy_winners = pd.read_csv('../data/roy_winners.csv')
roy_winners.head()

Unnamed: 0,Season,Lg,Player,Voting,Age,Tm,G,MP,PTS,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,-9999
0,2023-24,NBA,Victor Wembanyama,(V),20,SAS,71,29.7,21.4,10.6,3.9,1.2,3.6,0.465,0.325,0.796,3.7,0.085,wembavi01
1,2022-23,NBA,Paolo Banchero,(V),20,ORL,72,33.8,20.0,6.9,3.7,0.8,0.5,0.427,0.298,0.738,2.4,0.047,banchpa01
2,2021-22,NBA,Scottie Barnes,(V),20,TOR,74,35.4,15.3,7.5,3.5,1.1,0.7,0.492,0.301,0.735,6.6,0.122,barnesc01
3,2020-21,NBA,LaMelo Ball,(V),19,CHO,51,28.8,15.7,5.9,6.1,1.6,0.4,0.436,0.352,0.758,2.8,0.093,ballla01
4,2019-20,NBA,Ja Morant,(V),20,MEM,67,31.0,17.8,3.9,7.3,0.9,0.3,0.477,0.335,0.776,3.8,0.088,moranja01


In [86]:
roy_winners['Player'] = roy_winners['Player'].str.replace('*', '')
roy_winners['Player'] = roy_winners['Player'].str.replace(' (Tie)', '')
roy_winners['Player'] = roy_winners['Player'].str.replace('č', 'c')
roy_winners['Player'] = roy_winners['Player'].str.replace('ć', 'c')
roy_winners['Player'] = roy_winners["Player"].str.strip()

In [87]:
roy_winners = roy_winners[roy_winners['Lg'].str.strip() == 'NBA']
roy_winners['ROY'] = 1
roy_winners = roy_winners[['Player', 'Season', 'ROY']]

In [88]:
df_rookie_stats_roy = df_rookie_stats.merge(
    roy_winners,
    left_on=['PLAYER_NAME', 'SEASON'],
    right_on=['Player', 'Season'],
    how='left'
)

In [89]:
df_rookie_stats_roy['ROY'] = df_rookie_stats_roy['ROY'].fillna(0)

In [92]:
df_rookie_stats_roy.drop(columns=['Player', 'Season'], inplace=True)

In [93]:
df_rookie_stats_roy.columns

Index(['PLAYER_ID', 'PLAYER_NAME', 'AGE', 'GP', 'W', 'L', 'W_PCT', 'MIN',
       'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK', 'BLKA',
       'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2', 'TD3',
       'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK',
       'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK',
       'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK',
       'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK',
       'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK',
       'NBA_FANTASY_PTS_RANK', 'DD2_RANK', 'TD3_RANK', 'SEASON', 'ROY'],
      dtype='object')