In [42]:
pip install nba_api



In [43]:
#import team statistics from nba_api

from nba_api.stats.endpoints import leaguedashteamstats
import pandas as pd

#set season perameters for list to most recent NBA season
season = '2023-24'

#pull the team statistics for the specified season and put into dataframe
team_stats = leaguedashteamstats.LeagueDashTeamStats(season=season)
team_stats_df = team_stats.get_data_frames()[0]

#sort the dataframe by team wins (W)
sorted_team_stats_df = team_stats_df.sort_values(by='W', ascending=False)

#create list of teams for user to choose from
print("Select a team from the list below to view more detailed statistics:")
for index, team in enumerate(sorted_team_stats_df['TEAM_NAME'].unique()):
    print(f"{index + 1}: {team}")

#prompt the user to select a team from the list
team_choice = int(input("Enter the number of your chosen team: ")) - 1


#ensure user has entered a valid input and display the selected team's statistics
if 0 <= team_choice < len(sorted_team_stats_df['TEAM_NAME'].unique()):
    team_selected = sorted_team_stats_df['TEAM_NAME'].unique()[team_choice]
    team_stats_selected = sorted_team_stats_df[sorted_team_stats_df['TEAM_NAME'] == team_selected]
    print("\nDetailed statistics for the selected team:")
else:
    print("Invalid selection. Please run the code again and choose a valid number.")


#print the statistics of the selected team
team_stats_selected

Select a team from the list below to view more detailed statistics:
1: Boston Celtics
2: Denver Nuggets
3: Oklahoma City Thunder
4: Minnesota Timberwolves
5: LA Clippers
6: New York Knicks
7: Dallas Mavericks
8: New Orleans Pelicans
9: Phoenix Suns
10: Milwaukee Bucks
11: Cleveland Cavaliers
12: Philadelphia 76ers
13: Indiana Pacers
14: Los Angeles Lakers
15: Orlando Magic
16: Miami Heat
17: Golden State Warriors
18: Sacramento Kings
19: Houston Rockets
20: Chicago Bulls
21: Atlanta Hawks
22: Brooklyn Nets
23: Utah Jazz
24: Memphis Grizzlies
25: Toronto Raptors
26: San Antonio Spurs
27: Charlotte Hornets
28: Portland Trail Blazers
29: Washington Wizards
30: Detroit Pistons
Enter the number of your chosen team: 3

Detailed statistics for the selected team:


Unnamed: 0,TEAM_ID,TEAM_NAME,GP,W,L,W_PCT,MIN,FGM,FGA,FG_PCT,...,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK
20,1610612760,Oklahoma City Thunder,82,57,25,0.695,3961.0,3653,7324,0.499,...,27,11,7,1,1,18,19,12,3,2


In [44]:
#removing all categories that are not rankings
mask = team_stats_selected.columns.str.endswith('RANK')
team_stats_selected = team_stats_selected.loc[:, mask]

#printing rankings of user selected team
team_stats_selected


Unnamed: 0,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,FGM_RANK,FGA_RANK,FG_PCT_RANK,FG3M_RANK,FG3A_RANK,...,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK
20,1,2,2,2,10,2,15,3,8,16,...,27,11,7,1,1,18,19,12,3,2


In [46]:
columns_to_drop = ['GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG3M_RANK', 'FTM_RANK', 'PLUS_MINUS_RANK']

#drop these columns from the DataFrame
team_stats_selected = team_stats_selected.drop(columns=columns_to_drop, errors='ignore')

#now team_stats_selected contains only the relevant stats
print("DataFrame after dropping categorical and non-relevant columns:")
team_stats_selected

DataFrame after dropping categorical and non-relevant columns:


Unnamed: 0,FG_PCT_RANK,FG3A_RANK,FG3_PCT_RANK,FTA_RANK,FT_PCT_RANK,OREB_RANK,DREB_RANK,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK
20,3,16,1,17,4,29,12,27,11,7,1,1,18,19,12,3


In [47]:
#get the column names where the values are greater than or equal to 15
cols = team_stats_selected.columns[team_stats_selected.ge(15).any()]

#print the column names
print("Columns with values greater than or equal to 15:")
cols


Columns with values greater than or equal to 15:


Index(['FG3A_RANK', 'FTA_RANK', 'OREB_RANK', 'REB_RANK', 'BLKA_RANK',
       'PF_RANK'],
      dtype='object')

In [48]:
from nba_api.stats.endpoints import leaguedashplayerstats
import pandas as pd

player_stats = leaguedashplayerstats.LeagueDashPlayerStats(season='2023-24')
player_stats_df = player_stats.get_data_frames()[0]

#removing 'RANK' from column names
performance_metrics = [col.replace('_RANK', '') for col in cols]

#create empty list to hold recommended players
top_players_dfs = []
#filter for players who are in the top 20% in the desired categories
for metric in performance_metrics:
    if metric in player_stats_df.columns:
        threshold = player_stats_df[metric].quantile(0.80)
        top_players_dfs.append(player_stats_df[player_stats_df[metric] >= threshold])

#using concat to put all dataFrames in the list into one dataFrame, removing duplicates
if top_players_dfs:
    top_players = pd.concat(top_players_dfs).drop_duplicates(subset='PLAYER_ID')
    #display the top players who can help improve these categories
    print("Recommended Players to Improve Team Performance:")
    print(top_players[['PLAYER_NAME', 'TEAM_ABBREVIATION'] + performance_metrics].drop_duplicates())
else:
    print("No players found that meet the criteria.")


Recommended Players to Improve Team Performance:
         PLAYER_NAME TEAM_ABBREVIATION  FG3A  FTA  OREB  REB  BLKA   PF
5      Aaron Nesmith               IND   334  137    63  275    51  241
11        Alec Burks               NYK   330  167    24  150    21   75
13       Alex Caruso               CHI   333   96    62  273    25  181
26   Anfernee Simons               POR   405  178    22  167    51   98
29   Anthony Edwards               MIN   532  506    52  430    56  141
..               ...               ...   ...  ...   ...  ...   ...  ...
484    Rui Hachimura               LAL   232  134    58  294    43  101
24   Andrew Nembhard               IND   171   56    36  141    21  143
225  Jaden McDaniels               MIN   255   79    56  224    28  219
357       Kyle Lowry               PHI   250   75    34  194    17  146
466    Peyton Watson               DEN   152  106    56  258    32  143

[223 rows x 8 columns]


In [49]:
#selecting the top 5 players based on the sum of their rankings in the performance metrics
top_5_players = top_players.sort_values(cols.tolist(), ascending=True).head(5)

#displaying the top 5 players
print("Top 5 Recommended Players to Improve Team Performance:")
print(top_5_players[['PLAYER_NAME', 'TEAM_ABBREVIATION'] + performance_metrics])


Top 5 Recommended Players to Improve Team Performance:
           PLAYER_NAME TEAM_ABBREVIATION  FG3A  FTA  OREB  REB  BLKA   PF
509      Stephen Curry               GSW   876  324    37  330    51  117
372        Luka Doncic               DAL   744  608    59  647    50  149
153   Donte DiVincenzo               NYK   705  114    63  297    32  166
348      Klay Thompson               GSW   692  137    35  254    27  127
45   Bogdan Bogdanovic               ATL   641  164    53  272    51  179
