In [225]:
from nba_api.stats.endpoints import leagueleaders
import pandas as pd
from sklearn.tree import DecisionTreeRegressor

# PREPARE DATA

## Stats

In [226]:
COLUMNS = [
    "PLAYER",
    "GP",
    "MIN",
    "FGM",
    "FGA",
    "FG_PCT",
    "FG3M",
    "FG3A",
    "FG3_PCT",
    "FTM",
    "FTA",
    "FT_PCT",
    "OREB",
    "DREB",
    "REB",
    "AST",
    "STL",
    "BLK",
    "TOV",
    "PF",
    "PTS",
    "EFF",
    "AST_TOV",
    "STL_TOV",
]
SEASONS_FROM = 1995
SEASONS_TO = 2023
SEASONS = [f"{season}-{(season+1)%100:02d}" for season in range(SEASONS_FROM, SEASONS_TO+1)]

N_BEST_PLAYERS = 100

In [227]:
print(f"Seasons: {SEASONS}")

Seasons: ['1995-96', '1996-97', '1997-98', '1998-99', '1999-00', '2000-01', '2001-02', '2002-03', '2003-04', '2004-05', '2005-06', '2006-07', '2007-08', '2008-09', '2009-10', '2010-11', '2011-12', '2012-13', '2013-14', '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24']


In [228]:
def get_players_stats(seasons, rookies_only=False):
    seasons_stats = pd.DataFrame()
    scope = "Rookies" if rookies_only else "S"

    for season in seasons:
        player_stats = leagueleaders.LeagueLeaders(
            season=season, scope=scope
        ).get_data_frames()[0][COLUMNS]
        player_stats = player_stats.head(N_BEST_PLAYERS)
        player_stats.columns = pd.MultiIndex.from_product(
            [[season], player_stats.columns]
        )
        seasons_stats = pd.concat([seasons_stats, player_stats], axis=1)

    return seasons_stats

In [220]:
stats_all_nba_df = get_players_stats(seasons=SEASONS)
stats_all_nba_df

Unnamed: 0_level_0,1995-96,1995-96,1995-96,1995-96,1995-96,1995-96,1995-96,1995-96,1995-96,1995-96,...,2023-24,2023-24,2023-24,2023-24,2023-24,2023-24,2023-24,2023-24,2023-24,2023-24
Unnamed: 0_level_1,PLAYER,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,...,REB,AST,STL,BLK,TOV,PF,PTS,EFF,AST_TOV,STL_TOV
0,Michael Jordan,82,3090,916,1850,0.495,111,260,0.427,548,...,647,686,99,38,282,149,2370,2580,2.43,0.35
1,Karl Malone,82,3113,789,1520,0.519,16,40,0.400,512,...,415,465,150,67,162,184,2254,2416,2.87,0.93
2,David Robinson,82,3019,711,1378,0.516,3,9,0.333,626,...,841,476,87,79,250,210,2222,2655,1.90,0.35
3,Hakeem Olajuwon,72,2797,768,1494,0.514,3,14,0.214,397,...,278,519,70,13,186,144,2212,1972,2.79,0.38
4,Mitch Richmond,81,2946,611,1368,0.447,225,515,0.437,425,...,976,708,108,68,237,194,2085,3039,2.99,0.46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Shawn Bradley,79,2329,387,873,0.443,1,4,0.250,169,...,249,99,54,12,58,98,1000,932,1.71,0.93
96,Todd Day,79,1807,299,817,0.366,100,302,0.331,224,...,185,230,43,22,112,120,997,928,2.05,0.38
97,P.J. Brown,81,2942,354,798,0.444,3,15,0.200,204,...,409,127,43,189,79,191,990,1266,1.61,0.54
98,Eddie Jones,70,2184,337,685,0.492,83,227,0.366,136,...,514,386,51,44,168,111,986,1344,2.30,0.30


In [221]:
stats_all_rookie_df = get_players_stats(seasons=SEASONS, rookies_only=True)
stats_all_rookie_df

Unnamed: 0_level_0,1995-96,1995-96,1995-96,1995-96,1995-96,1995-96,1995-96,1995-96,1995-96,1995-96,...,2023-24,2023-24,2023-24,2023-24,2023-24,2023-24,2023-24,2023-24,2023-24,2023-24
Unnamed: 0_level_1,PLAYER,GP,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,...,REB,AST,STL,BLK,TOV,PF,PTS,EFF,AST_TOV,STL_TOV
0,Jerry Stackhouse,72.0,2701.0,452.0,1091.0,0.414,93.0,292.0,0.318,387.0,...,755.0,274.0,88.0,254.0,260.0,153.0,1522.0,1923.0,1.05,0.34
1,Damon Stoudamire,70.0,2865.0,481.0,1129.0,0.426,133.0,337.0,0.395,236.0,...,648.0,200.0,53.0,190.0,131.0,197.0,1357.0,1812.0,1.53,0.41
2,Joe Smith,82.0,2821.0,469.0,1024.0,0.458,10.0,28.0,0.357,303.0,...,315.0,175.0,66.0,42.0,132.0,184.0,1279.0,1108.0,1.33,0.50
3,Michael Finley,82.0,3212.0,465.0,976.0,0.476,61.0,186.0,0.328,242.0,...,211.0,332.0,36.0,7.0,188.0,91.0,974.0,842.0,1.77,0.19
4,Arvydas Sabonis,73.0,1735.0,394.0,723.0,0.545,39.0,104.0,0.375,231.0,...,285.0,195.0,77.0,20.0,110.0,167.0,889.0,967.0,1.77,0.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,,,,,,,,,,,...,,,,,,,,,,
96,,,,,,,,,,,...,,,,,,,,,,
97,,,,,,,,,,,...,,,,,,,,,,
98,,,,,,,,,,,...,,,,,,,,,,


## Awards

In [222]:
def postprocess_award(award):

    award = award.droplevel(0, axis=1)
    award = award.drop(columns=["Teams", "Positions"], errors="ignore")
    award = award.set_index("Season")
    award.index = award.index.map(lambda x: x.replace(u"\u2013", "-"))

    postprocessed_award = pd.DataFrame()
    for index in SEASONS:
        season = award.loc[index]
        season = pd.concat(values for _, values in season.items())
        season = season.reset_index(drop=True)
        season = season.str.replace(r"[\*\^\[\(\†].*", "", regex=True)
        postprocessed_award = postprocessed_award.assign(**{index: season})

    return postprocessed_award

In [223]:
awards_all_nba_df = pd.read_html("https://en.wikipedia.org/wiki/All-NBA_Team")[7:9]
awards_all_nba_df = pd.concat(awards_all_nba_df)
awards_all_nba_df = postprocess_award(awards_all_nba_df)
awards_all_nba_df

Unnamed: 0,1995-96,1996-97,1997-98,1998-99,1999-00,2000-01,2001-02,2002-03,2003-04,2004-05,...,2014-15,2015-16,2016-17,2017-18,2018-19,2019-20,2020-21,2021-22,2022-23,2023-24
0,Scottie Pippen,Karl Malone,Karl Malone,Karl Malone,Tim Duncan,Tim Duncan,Tim Duncan,Tim Duncan,Kevin Garnett,Tim Duncan,...,LeBron James,Kawhi Leonard,Kawhi Leonard,Kevin Durant,Giannis Antetokounmpo,LeBron James,Giannis Antetokounmpo,Giannis Antetokounmpo,Giannis Antetokounmpo,Shai Gilgeous-Alexander
1,Karl Malone,Grant Hill,Tim Duncan,Tim Duncan,Kevin Garnett,Chris Webber,Tracy McGrady,Kevin Garnett,Tim Duncan,Dirk Nowitzki,...,Anthony Davis,LeBron James,LeBron James,LeBron James,Paul George,Giannis Antetokounmpo,Kawhi Leonard,Jayson Tatum,Jayson Tatum,Nikola Jokić
2,David Robinson,Hakeem Olajuwon,Shaquille O'Neal,Alonzo Mourning,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,Shaquille O'Neal,...,Marc Gasol§,DeAndre Jordan,Anthony Davis,Anthony Davis,Nikola Jokić,Anthony Davis,Nikola Jokić,Nikola Jokić,Joel Embiid,Luka Dončić
3,Michael Jordan,Michael Jordan,Michael Jordan,Allen Iverson,Jason Kidd,Allen Iverson,Jason Kidd,Kobe Bryant,Kobe Bryant,Allen Iverson,...,James Harden,Stephen Curry,James Harden,James Harden,James Harden,James Harden,Stephen Curry,Devin Booker,Luka Dončić,Giannis Antetokounmpo
4,Penny Hardaway,Tim Hardaway,Gary Payton,Jason Kidd,Gary Payton,Jason Kidd,Kobe Bryant,Tracy McGrady,Jason Kidd,Steve Nash,...,Stephen Curry,Russell Westbrook,Russell Westbrook,Damian Lillard,Stephen Curry,Luka Dončić,Luka Dončić,Luka Dončić,Shai Gilgeous-Alexander,Jayson Tatum
5,Shawn Kemp,Scottie Pippen,Grant Hill,Chris Webber,Karl Malone,Kevin Garnett,Kevin Garnett,Dirk Nowitzki,Jermaine O'Neal,LeBron James,...,LaMarcus Aldridge,Kevin Durant,Kevin Durant,LaMarcus Aldridge,Kevin Durant,Kawhi Leonard,LeBron James,DeMar DeRozan,Jimmy Butler,Jalen Brunson
6,Grant Hill,Glen Rice,Vin Baker,Grant Hill,Grant Hill,Vince Carter,Chris Webber,Chris Webber,Peja Stojaković,Kevin Garnett,...,Pau Gasol,Draymond Green,Giannis Antetokounmpo,Giannis Antetokounmpo,Kawhi Leonard,Pascal Siakam,Julius Randle,Kevin Durant,Jaylen Brown,Anthony Edwards
7,Hakeem Olajuwon,Patrick Ewing,David Robinson,Shaquille O'Neal,Alonzo Mourning,Dikembe Mutombo,Dirk Nowitzki,Ben Wallace,Ben Wallace,Amare Stoudemire,...,DeMarcus Cousins,DeMarcus Cousins,Rudy Gobert,Joel Embiid,Joel Embiid,Nikola Jokić,Joel Embiid,Joel Embiid,Nikola Jokić,Kevin Durant
8,Gary Payton,Gary Payton,Tim Hardaway,Gary Payton,Allen Iverson,Kobe Bryant,Gary Payton,Jason Kidd,Sam Cassell,Dwyane Wade,...,Russell Westbrook,Damian Lillard,Stephen Curry,DeMar DeRozan,Damian Lillard,Damian Lillard,Damian Lillard,Stephen Curry,Stephen Curry,Kawhi Leonard
9,John Stockton,Mitch Richmond,Rod Strickland,Tim Hardaway,Kobe Bryant,Tracy McGrady,Allen Iverson,Allen Iverson,Tracy McGrady,Ray Allen,...,Chris Paul,Chris Paul,Isaiah Thomas,Russell Westbrook,Kyrie Irving,Chris Paul,Chris Paul,Ja Morant,Donovan Mitchell,Anthony Davis


In [224]:
awards_all_rookie_df = pd.read_html(
    "https://en.wikipedia.org/wiki/NBA_All-Rookie_Team"
)[5]
awards_all_rookie_df = awards_all_rookie_df.drop(range(0, 132))
awards_all_rookie_df = postprocess_award(awards_all_rookie_df)
awards_all_rookie_df

Unnamed: 0,1995-96,1996-97,1997-98,1998-99,1999-00,2000-01,2001-02,2002-03,2003-04,2004-05,...,2014-15,2015-16,2016-17,2017-18,2018-19,2019-20,2020-21,2021-22,2022-23,2023-24
0,Damon Stoudamire,Shareef Abdur-Rahim,Tim Duncan,Vince Carter,Elton Brand,Mike Miller,Pau Gasol,Yao Ming,Carmelo Anthony,Emeka Okafor,...,Andrew Wiggins,Karl-Anthony Towns,Malcolm Brogdon,Ben Simmons,Luka Dončić,Ja Morant,LaMelo Ball,Scottie Barnes,Paolo Banchero,Victor Wembanyama
1,Joe Smith,Allen Iverson,Keith Van Horn,Paul Pierce,Steve Francis,Kenyon Martin,Shane Battier,Amare Stoudemire,LeBron James,Dwight Howard,...,Nikola Mirotić,Kristaps Porziņģis,Dario Šarić,Donovan Mitchell,Trae Young,Kendrick Nunn,Anthony Edwards,Evan Mobley,Walker Kessler,Chet Holmgren
2,Jerry Stackhouse,Stephon Marbury,Brevin Knight,Jason Williams,Lamar Odom,Marc Jackson,Jason Richardson,Caron Butler,Dwyane Wade,Ben Gordon,...,Nerlens Noel,Devin Booker,Joel Embiid,Jayson Tatum,Deandre Ayton,Brandon Clarke,Tyrese Haliburton,Cade Cunningham,Bennedict Mathurin,Brandon Miller
3,Antonio McDyess,Marcus Camby,Žydrūnas Ilgauskas,Mike Bibby,Wally Szczerbiak,Morris Peterson,Tony Parker,Drew Gooden,Chris Bosh,Andre Iguodala,...,Elfrid Payton,Nikola Jokić,Buddy Hield,Kyle Kuzma,Jaren Jackson Jr.,Zion Williamson,Saddiq Bey,Franz Wagner,Keegan Murray,Jaime Jaquez Jr.
4,Arvydas Sabonis,Antoine Walker,Ron Mercer,Matt Harpring,Andre Miller,Darius Miles,Andrei Kirilenko,Nenê Hilario,Kirk Hinrich,Luol Deng,...,Jordan Clarkson,Jahlil Okafor,Willy Hernangómez,Lauri Markkanen,Marvin Bagley III,Eric Paschall,Jae'Sean Tate,Jalen Green,Jalen Williams,Brandin Podziemski
5,Michael Finley,Kerry Kittles,Tim Thomas,Michael Dickerson,Shawn Marion,Hedo Türkoğlu,Andrei Kirilenko,Manu Ginóbili,Josh Howard,Nenad Krstić,...,Marcus Smart,Justise Winslow,Jamal Murray,Dennis Smith Jr.,Shai Gilgeous-Alexander,Tyler Herro,Immanuel Quickley,Herbert Jones,Jalen Duren,Dereck Lively II
6,Kevin Garnett,Ray Allen,Cedric Henderson,Michael Doleac,Ron Artest,Desmond Mason,Jamaal Tinsley,Gordan Giriček,T. J. Ford,Josh Smith,...,Zach LaVine,D'Angelo Russell,Jaylen Brown,Lonzo Ball,Collin Sexton,Terence Davis,Desmond Bane,Chris Duarte,Tari Eason,GG Jackson
7,Bryant Reeves,Travis Knight,Derek Anderson,Cuttino Mobley,James Posey,Courtney Alexander,Richard Jefferson,Carlos Boozer,Udonis Haslem,Josh Childress,...,Bojan Bogdanović,Emmanuel Mudiay,Marquese Chriss,John Collins,Landry Shamet,Coby White,Isaiah Stewart,Bones Hyland,Jaden Ivey,Keyonte George
8,Brent Barry,Kobe Bryant,Maurice Taylor,Michael Olowokandi,Jason Terry,Marcus Fizer,Eddie Griffin,Jay Williams,Jarvis Hayes,Jameer Nelson,...,Jusuf Nurkić,Myles Turner,Brandon Ingram,Bogdan Bogdanović,Mitchell Robinson,P. J. Washington,Isaac Okoro,Ayo Dosunmu,Jabari Smith Jr.,Amen Thompson
9,Rasheed Wallace,Matt Maloney,Bobby Jackson,Antawn Jamison,Chucky Atkins,Chris Mihm,Željko Rebrača,J. R. Bremer,Marquis Daniels,Al Jefferson,...,Langston Galloway,Willie Cauley-Stein,Yogi Ferrell,Josh Jackson,Kevin Huerter,Rui Hachimura,Patrick Williams,Josh Giddey,Jeremy Sochan,Cason Wallace


## Combined

In [155]:
# append award at the end of the stats

# Train the model