In [1]:
import requests
import pandas as pd
import nba_api as nba

In [2]:
from nba_api.stats.endpoints import commonteamroster

# Returns a teams roster for the given season
def getTeamSeasonRoster(season, team_id):
    team_roster = commonteamroster.CommonTeamRoster(season=2018, team_id=1610612738)
    df_team_roster = team_roster.get_data_frames()[0]['PLAYER_ID']
    return df_team_roster

print(getTeamSeasonRoster(2018, 1610612738))

0     1628369
1     1626780
2     1627759
3      202954
4      202681
5     1626179
6      202694
7      202330
8     1628464
9     1626154
10    1627824
11     203935
12    1628400
13     201143
14    1629057
15     203382
16    1628408
Name: PLAYER_ID, dtype: int64


In [3]:
from nba_api.stats.endpoints import playercareerstats

# Returns a players all-time per-game stats
def getPlayerStats(player_id, season):
    player_stats = playercareerstats.PlayerCareerStats(per_mode36='Per36', player_id=player_id)
    df_player_stats = player_stats.get_data_frames()[0]
    df_player_stats = df_player_stats[df_player_stats['SEASON_ID'] == season]
    return df_player_stats

print(getPlayerStats(1628369, '2017-18'))

   PLAYER_ID SEASON_ID LEAGUE_ID     TEAM_ID TEAM_ABBREVIATION  PLAYER_AGE  \
0    1628369   2017-18        00  1610612738               BOS        20.0   

   GP  GS     MIN  FGM  ...  FT_PCT  OREB  DREB  REB  AST  STL  BLK  TOV   PF  \
0  80  80  2443.0  5.9  ...   0.826   0.7   5.2  5.9  1.9  1.2  0.9  1.7  2.5   

    PTS  
0  16.4  

[1 rows x 27 columns]


In [4]:
import time
def getTeamStatsByPlayers(team_id, season):
    player_roster = getTeamSeasonRoster(season, team_id)
    df_team_stats = pd.DataFrame()
    for player_id in player_roster:
        time.sleep(.600)
        df_team_stats = df_team_stats.append(getPlayerStats(player_id, season))
    return df_team_stats

print(getTeamStatsByPlayers(1610612738, '2017-18'))

    PLAYER_ID SEASON_ID LEAGUE_ID     TEAM_ID TEAM_ABBREVIATION  PLAYER_AGE  \
0     1628369   2017-18        00  1610612738               BOS        20.0   
1     1626780   2017-18        00  1610612738               BOS        30.0   
1     1627759   2017-18        00  1610612738               BOS        21.0   
6      202681   2017-18        00  1610612738               BOS        26.0   
2     1626179   2017-18        00  1610612738               BOS        24.0   
8      202694   2017-18        00  1610612738               BOS        28.0   
7      202330   2017-18        00  1610612738               BOS        28.0   
0     1628464   2017-18        00  1610612738               BOS        26.0   
2     1626154   2017-18        00  1610612745               HOU        24.0   
0     1627824   2017-18        00  1610612738               BOS        22.0   
3      203935   2017-18        00  1610612738               BOS        24.0   
0     1628400   2017-18        00  1610612738       

In [5]:
from nba_api.stats.endpoints import teamyearbyyearstats
from nba_api.stats.static import teams

def getAllTeamsStatsByYear():
    nba_teams = teams.get_teams()
    df_stats = pd.DataFrame()
    for i in range(len(nba_teams)):
        team_id = nba_teams[i]['id']
        team_stats = teamyearbyyearstats.TeamYearByYearStats(per_mode_simple='PerGame',team_id=team_id)
        df_stats = df_stats.append(team_stats.get_data_frames())
        time.sleep(.600)
    return df_stats

df_all_team_stats = getAllTeamsStatsByYear()

In [6]:
df_all_team_stats_clean = df_all_team_stats
df_all_team_stats_clean['YEAR'] = df_all_team_stats['YEAR'].apply(lambda x: int(x[0:4]))
print(df_all_team_stats_clean)

       TEAM_ID   TEAM_CITY   TEAM_NAME  YEAR  GP  WINS  LOSSES  WIN_PCT  \
0   1610612737  Tri-Cities  Blackhawks  1949  64    29      35    0.453   
1   1610612737  Tri-Cities  Blackhawks  1950  68    25      43    0.368   
2   1610612737   Milwaukee       Hawks  1951  66    17      49    0.258   
3   1610612737   Milwaukee       Hawks  1952  71    27      44    0.380   
4   1610612737   Milwaukee       Hawks  1953  72    21      51    0.292   
..         ...         ...         ...   ...  ..   ...     ...      ...   
27  1610612766   Charlotte     Hornets  2017  82    36      46    0.439   
28  1610612766   Charlotte     Hornets  2018  82    39      43    0.476   
29  1610612766   Charlotte     Hornets  2019  65    23      42    0.354   
30  1610612766   Charlotte     Hornets  2020  72    33      39    0.458   
31  1610612766   Charlotte     Hornets  2021  82    43      39    0.524   

    CONF_RANK  DIV_RANK  ...  OREB  DREB   REB   AST    PF  STL   TOV  BLK  \
0           0        

In [7]:
from nba_api.stats.endpoints import leaguegamefinder

all_games = leaguegamefinder.LeagueGameFinder(player_or_team_abbreviation='T', league_id_nullable='00', season_type_nullable='Regular Season')

In [8]:
df_teams = pd.DataFrame(teams.get_teams())
print(df_teams)

            id               full_name abbreviation       nickname  \
0   1610612737           Atlanta Hawks          ATL          Hawks   
1   1610612738          Boston Celtics          BOS        Celtics   
2   1610612739     Cleveland Cavaliers          CLE      Cavaliers   
3   1610612740    New Orleans Pelicans          NOP       Pelicans   
4   1610612741           Chicago Bulls          CHI          Bulls   
5   1610612742        Dallas Mavericks          DAL      Mavericks   
6   1610612743          Denver Nuggets          DEN        Nuggets   
7   1610612744   Golden State Warriors          GSW       Warriors   
8   1610612745         Houston Rockets          HOU        Rockets   
9   1610612746    Los Angeles Clippers          LAC       Clippers   
10  1610612747      Los Angeles Lakers          LAL         Lakers   
11  1610612748              Miami Heat          MIA           Heat   
12  1610612749         Milwaukee Bucks          MIL          Bucks   
13  1610612750  Minn

In [43]:
def getGamesYearByYear():
    df_all_games_total = pd.DataFrame()
    year = ['2000-01', '2001-02', '2002-03', '2003-04', '2004-05', '2005-06', '2006-07', '2007-08', '2008-09']
    for i in range(2000, 2021):
        if i < 2009:
            year = str(i) + '-0' + str((i + 1) % 100)
        else:
            year = str(i) + '-' + str((i + 1) % 100)
        print(year)
        new_df = leaguegamefinder.LeagueGameFinder(player_or_team_abbreviation='T', league_id_nullable='00', season_type_nullable='Regular Season', season_nullable=year)
        df_all_games_total = df_all_games_total.append(new_df.get_data_frames()[0])
        time.sleep(.600)
    return df_all_games_total

df_all_games_total = getGamesYearByYear()
print(df_all_games_total)

2000-01
2001-02
2002-03
2003-04
2004-05
2005-06
2006-07
2007-08
2008-09
2009-10
2010-11
2011-12
2012-13
2013-14
2014-15
2015-16
2016-17
2017-18
2018-19
2019-20
2020-21
     SEASON_ID     TEAM_ID TEAM_ABBREVIATION               TEAM_NAME  \
0        22000  1610612744               GSW   Golden State Warriors   
1        22000  1610612750               MIN  Minnesota Timberwolves   
2        22000  1610612743               DEN          Denver Nuggets   
3        22000  1610612763               VAN     Vancouver Grizzlies   
4        22000  1610612738               BOS          Boston Celtics   
...        ...         ...               ...                     ...   
2155     22020  1610612740               NOP    New Orleans Pelicans   
2156     22020  1610612751               BKN           Brooklyn Nets   
2157     22020  1610612744               GSW   Golden State Warriors   
2158     22020  1610612746               LAC             LA Clippers   
2159     22020  1610612747              

In [44]:
df_all_games = df_all_games_total
df_relevant = df_all_games[['TEAM_ID', 'MATCHUP', 'WL', 'SEASON_ID', 'GAME_ID']]
df_single_matchup = df_relevant[df_relevant['MATCHUP'].apply(lambda x: x[4] == '@')]

def convertMatchupToOpposingTeamID(matchup):
    team_2_abbr = matchup.split('@')[1]
    team_2_abbr = team_2_abbr[1:]
    team_2_id = df_teams[df_teams['abbreviation'] == team_2_abbr]
    if len(team_2_id) == 0:
        return 'NaN'
    return team_2_id['id'].values[0]

df_single_matchup['MATCHUP'] = df_single_matchup['MATCHUP'].apply(convertMatchupToOpposingTeamID)
df_single_matchup['SEASON_ID'] = df_single_matchup['SEASON_ID'].apply(lambda x: x[1:])
df_single_matchup.rename(columns={'SEASON_ID': 'YEAR'}, inplace=True)
df_single_matchup['YEAR'] = df_single_matchup['YEAR'].astype(int)
print(df_single_matchup)

         TEAM_ID     MATCHUP WL  YEAR     GAME_ID
1     1610612750  1610612742  L  2000  0020001185
3     1610612763  1610612744  W  2000  0020001188
5     1610612765  1610612752  L  2000  0020001180
6     1610612762  1610612756  L  2000  0020001187
7     1610612758  1610612743  L  2000  0020001186
...          ...         ... ..   ...         ...
2152  1610612764  1610612755  L  2020  0022000013
2153  1610612765  1610612750  L  2020  0022000018
2155  1610612740  1610612761  W  2020  0022000014
2157  1610612744  1610612751  L  2020  0022000001
2158  1610612746  1610612747  W  2020  0022000002

[25104 rows x 5 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_single_matchup['MATCHUP'] = df_single_matchup['MATCHUP'].apply(convertMatchupToOpposingTeamID)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_single_matchup['SEASON_ID'] = df_single_matchup['SEASON_ID'].apply(lambda x: x[1:])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from 

In [23]:
print (all_games.get_data_frames()[0])

      SEASON_ID     TEAM_ID TEAM_ABBREVIATION           TEAM_NAME     GAME_ID  \
0         22021  1610612762               UTA           Utah Jazz  0022101230   
1         22021  1610612742               DAL    Dallas Mavericks  0022101219   
2         22021  1610612764               WAS  Washington Wizards  0022101217   
3         22021  1610612741               CHI       Chicago Bulls  0022101224   
4         22021  1610612747               LAL  Los Angeles Lakers  0022101220   
...         ...         ...               ...                 ...         ...   
29995     22009  1610612758               SAC    Sacramento Kings  0020900437   
29996     22009  1610612741               CHI       Chicago Bulls  0020900432   
29997     22009  1610612759               SAS   San Antonio Spurs  0020900435   
29998     22009  1610612762               UTA           Utah Jazz  0020900436   
29999     22009  1610612751               NJN     New Jersey Nets  0020900431   

        GAME_DATE      MATC

In [45]:
pd.set_option('display.max_columns', None)
df_games_1 = df_single_matchup.merge(right=df_all_team_stats_clean, on=['TEAM_ID', 'YEAR'])
df_games_2 = df_single_matchup.merge(right=df_all_team_stats_clean, left_on=['MATCHUP', 'YEAR'], right_on=['TEAM_ID', 'YEAR'])
df_total_games = pd.merge(df_games_1, df_games_2, on=['GAME_ID'])
print(df_total_games.head())

      TEAM_ID   MATCHUP_x WL_x  YEAR_x     GAME_ID TEAM_CITY_x   TEAM_NAME_x  \
0  1610612750  1610612742    L    2000  0020001185   Minnesota  Timberwolves   
1  1610612750  1610612745    L    2000  0020001175   Minnesota  Timberwolves   
2  1610612750  1610612746    L    2000  0020001147   Minnesota  Timberwolves   
3  1610612750  1610612747    L    2000  0020001136   Minnesota  Timberwolves   
4  1610612750  1610612757    W    2000  0020001061   Minnesota  Timberwolves   

   GP_x  WINS_x  LOSSES_x  WIN_PCT_x  CONF_RANK_x  DIV_RANK_x  PO_WINS_x  \
0    82      47        35      0.573            8           4          1   
1    82      47        35      0.573            8           4          1   
2    82      47        35      0.573            8           4          1   
3    82      47        35      0.573            8           4          1   
4    82      47        35      0.573            8           4          1   

   PO_LOSSES_x  CONF_COUNT_x  DIV_COUNT_x NBA_FINALS_APPEARANC

In [46]:
df_games_cleaned = df_total_games[['WL_x', 'AST_x', 'AST_y', 'BLK_x', 'BLK_y', 'DREB_x', 'DREB_y', 'FG3_PCT_x', 'FG3_PCT_y', 'FG_PCT_x', 'FG_PCT_y', 'FT_PCT_x', 'FT_PCT_y', 'FTA_x', 'FTA_y', 'OREB_x', 'OREB_y', 'STL_x', 'STL_y', 'TOV_x', 'TOV_y']]
df_games_cleaned['WL_x'] = df_games_cleaned['WL_x'].apply(lambda x: 1 if x == 'W' else 0)
print(df_games_cleaned.head())
corrGames = df_games_cleaned.corr()
corrGames.style.background_gradient(cmap='coolwarm')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_games_cleaned['WL_x'] = df_games_cleaned['WL_x'].apply(lambda x: 1 if x == 'W' else 0)


   WL_x  AST_x  AST_y  BLK_x  BLK_y  DREB_x  DREB_y  FG3_PCT_x  FG3_PCT_y  \
0     0   25.4   21.2    5.6    6.0    30.1    31.4      0.357      0.381   
1     0   25.4   19.7    5.6    4.4    30.1    30.8      0.357      0.357   
2     0   25.4   19.3    5.6    6.3    30.1    31.2      0.357      0.339   
3     0   25.4   23.0    5.6    6.0    30.1    31.5      0.357      0.344   
4     1   25.4   23.9    5.6    5.1    30.1    30.3      0.357      0.349   

   FG_PCT_x  FG_PCT_y  FT_PCT_x  FT_PCT_y  FTA_x  FTA_y  OREB_x  OREB_y  \
0     0.458     0.459     0.785     0.794   21.2   23.8    12.2    10.1   
1     0.458     0.453     0.785     0.758   21.2   25.4    12.2    11.2   
2     0.458     0.448     0.785     0.693   21.2   25.1    12.2    11.7   
3     0.458     0.465     0.785     0.683   21.2   28.5    12.2    13.2   
4     0.458     0.468     0.785     0.762   21.2   23.2    12.2    11.7   

   STL_x  STL_y  TOV_x  TOV_y  
0    8.3    7.5   13.9   13.9  
1    8.3    7.2   13.9

Unnamed: 0,WL_x,AST_x,AST_y,BLK_x,BLK_y,DREB_x,DREB_y,FG3_PCT_x,FG3_PCT_y,FG_PCT_x,FG_PCT_y,FT_PCT_x,FT_PCT_y,FTA_x,FTA_y,OREB_x,OREB_y,STL_x,STL_y,TOV_x,TOV_y
WL_x,1.0,0.102944,-0.08716,0.06804,-0.096381,0.115981,-0.07841,0.145232,-0.153867,0.170871,-0.182414,0.051025,-0.052165,0.010022,-0.047802,-0.064243,0.024874,0.03831,-0.044049,-0.108902,0.10709
AST_x,0.102944,1.0,0.274794,0.149534,-0.010658,0.521001,0.383988,0.301971,0.044884,0.5319,0.104354,0.190362,0.111236,-0.257458,-0.179816,-0.321318,-0.199403,0.235588,0.038375,-0.075101,-0.070894
AST_y,-0.08716,0.274794,1.0,0.007579,0.139121,0.386019,0.516084,0.05692,0.311018,0.124322,0.537426,0.113725,0.212036,-0.17679,-0.264567,-0.202517,-0.318169,0.033394,0.22914,-0.070418,-0.093038
BLK_x,0.06804,0.149534,0.007579,1.0,0.019776,0.15461,-0.015471,0.025494,-0.033278,0.158329,-0.049401,-0.070951,-0.00998,0.117348,-0.031419,0.125924,0.05631,0.101207,0.03338,0.114011,0.012984
BLK_y,-0.096381,-0.010658,0.139121,0.019776,1.0,-0.049196,0.120525,-0.034282,0.02061,-0.055857,0.144339,-0.015931,-0.070219,-0.010247,0.122616,0.075469,0.137961,0.028544,0.091519,0.02281,0.106634
DREB_x,0.115981,0.521001,0.386019,0.15461,-0.049196,1.0,0.582429,0.232449,0.064561,0.353968,0.168821,0.172918,0.162752,-0.223854,-0.294693,-0.418961,-0.357239,-0.075817,0.069584,-0.12338,-0.124279
DREB_y,-0.07841,0.383988,0.516084,-0.015471,0.120525,0.582429,1.0,0.067024,0.243559,0.183059,0.341058,0.15622,0.192097,-0.282543,-0.243705,-0.353939,-0.419614,0.071359,-0.084655,-0.116261,-0.139197
FG3_PCT_x,0.145232,0.301971,0.05692,0.025494,-0.034282,0.232449,0.067024,1.0,0.043186,0.560696,0.088265,0.276837,0.04061,-0.164502,-0.011889,-0.391286,-0.080132,-0.114916,-0.023381,-0.295292,-0.035548
FG3_PCT_y,-0.153867,0.044884,0.311018,-0.033278,0.02061,0.064561,0.243559,0.043186,1.0,0.084834,0.554023,0.042027,0.282447,-0.011369,-0.159524,-0.080457,-0.408708,-0.035028,-0.130354,-0.040955,-0.299771
FG_PCT_x,0.170871,0.5319,0.124322,0.158329,-0.055857,0.353968,0.183059,0.560696,0.084834,1.0,0.186455,0.20231,0.085067,0.021801,-0.04821,-0.438314,-0.181819,0.048059,-0.062117,-0.159222,-0.086571


In [47]:
def scaleColumns(df, col_dict):
    df_scaled = df.copy()
    for key in col_dict:
        df_scaled[key] = df_scaled[key]/df_scaled[col_dict[key]]
        df_scaled = df_scaled.drop(columns=col_dict[key], axis=1)
    return df_scaled

df_scaled = scaleColumns(df_games_cleaned, {'AST_x': 'AST_y', 'BLK_x': 'BLK_y', 'DREB_x': 'DREB_y', 
                                            'FG3_PCT_x': 'FG3_PCT_y', 'FG_PCT_x': 'FG_PCT_y', 'FTA_x': 'FTA_y',
                                            'FT_PCT_x': 'FT_PCT_y', 'OREB_x': 'OREB_y', 'STL_x': 'STL_y', 'TOV_x': 'TOV_y'})
print(df_scaled)

       WL_x     AST_x     BLK_x    DREB_x  FG3_PCT_x  FG_PCT_x  FT_PCT_x  \
0         0  1.198113  0.933333  0.958599   0.937008  0.997821  0.988665   
1         0  1.289340  1.272727  0.977273   1.000000  1.011038  1.035620   
2         0  1.316062  0.888889  0.964744   1.053097  1.022321  1.132756   
3         0  1.104348  0.933333  0.955556   1.037791  0.984946  1.149341   
4         1  1.062762  1.098039  0.993399   1.022923  0.978632  1.030184   
...     ...       ...       ...       ...        ...       ...       ...   
23721     0  1.033582  1.066667  1.035398   0.997347  0.964948  0.977584   
23722     1  1.144628  0.923077  1.060423   1.071225  1.035398  1.034256   
23723     1  1.033582  1.142857  0.994334   1.016216  0.983193  0.992415   
23724     0  1.086275  1.043478  0.928571   0.966581  0.960986  1.032895   
23725     0  1.033582  0.905660  0.988732   0.959184  0.947368  0.976368   

          FTA_x    OREB_x     STL_x     TOV_x  
0      0.890756  1.207921  1.106667  1.

In [48]:
corrGames = df_scaled.corr()
corrGames.style.background_gradient(cmap='coolwarm')

Unnamed: 0,WL_x,AST_x,BLK_x,DREB_x,FG3_PCT_x,FG_PCT_x,FT_PCT_x,FTA_x,OREB_x,STL_x,TOV_x
WL_x,1.0,0.155784,0.115995,0.213477,0.215614,0.277687,0.073141,0.046981,-0.073407,0.056115,-0.152668
AST_x,0.155784,1.0,0.168657,0.238589,0.294753,0.532329,0.10311,-0.119583,-0.169835,0.224997,-0.020236
BLK_x,0.115995,0.168657,1.0,0.257729,0.050425,0.218304,-0.06641,0.158538,0.067682,0.069628,0.086349
DREB_x,0.213477,0.238589,0.257729,1.0,0.272993,0.293473,0.036625,0.092212,-0.11415,-0.237396,-0.019249
FG3_PCT_x,0.215614,0.294753,0.050425,0.272993,1.0,0.530398,0.246618,-0.176668,-0.371147,-0.094857,-0.26867
FG_PCT_x,0.277687,0.532329,0.218304,0.293473,0.530398,1.0,0.133609,0.081331,-0.335767,0.123195,-0.082234
FT_PCT_x,0.073141,0.10311,-0.06641,0.036625,0.246618,0.133609,1.0,-0.200609,-0.271647,-0.07506,-0.261923
FTA_x,0.046981,-0.119583,0.158538,0.092212,-0.176668,0.081331,-0.200609,1.0,0.252697,0.175915,0.286051
OREB_x,-0.073407,-0.169835,0.067682,-0.11415,-0.371147,-0.335767,-0.271647,0.252697,1.0,0.053227,0.183405
STL_x,0.056115,0.224997,0.069628,-0.237396,-0.094857,0.123195,-0.07506,0.175915,0.053227,1.0,0.182769


In [49]:
# df_games_cleaned.to_csv('nba.csv')
# df_scaled.to_csv('nba_scaled.csv')
df_scaled.to_csv('nba_larger_scaled.csv')

In [56]:
playoff_games = leaguegamefinder.LeagueGameFinder(player_or_team_abbreviation='T', league_id_nullable='00', season_type_nullable='Playoffs', season_nullable='2015-16')
print(playoff_games.get_data_frames())

[    SEASON_ID     TEAM_ID TEAM_ABBREVIATION              TEAM_NAME  \
0       42015  1610612739               CLE    Cleveland Cavaliers   
1       42015  1610612744               GSW  Golden State Warriors   
2       42015  1610612739               CLE    Cleveland Cavaliers   
3       42015  1610612744               GSW  Golden State Warriors   
4       42015  1610612739               CLE    Cleveland Cavaliers   
..        ...         ...               ...                    ...   
167     42015  1610612754               IND         Indiana Pacers   
168     42015  1610612761               TOR        Toronto Raptors   
169     42015  1610612737               ATL          Atlanta Hawks   
170     42015  1610612760               OKC  Oklahoma City Thunder   
171     42015  1610612742               DAL       Dallas Mavericks   

        GAME_ID   GAME_DATE      MATCHUP WL  MIN  PTS  FGM  FGA  FG_PCT  FG3M  \
0    0041500407  2016-06-19    CLE @ GSW  W  241   93   33   82   0.402     6

In [57]:
playoff_games = playoff_games.get_data_frames()[0]
df_relevant = playoff_games[['TEAM_ID', 'MATCHUP', 'WL', 'SEASON_ID', 'GAME_ID', 'PLUS_MINUS']]
df_single_matchup = df_relevant[df_relevant['MATCHUP'].apply(lambda x: x[4] == '@')]

df_single_matchup['MATCHUP'] = df_single_matchup['MATCHUP'].apply(convertMatchupToOpposingTeamID)
df_single_matchup['SEASON_ID'] = df_single_matchup['SEASON_ID'].apply(lambda x: x[1:])
df_single_matchup.rename(columns={'SEASON_ID': 'YEAR'}, inplace=True)
df_single_matchup['YEAR'] = df_single_matchup['YEAR'].astype(int)
print(df_single_matchup)

        TEAM_ID     MATCHUP WL  YEAR     GAME_ID  PLUS_MINUS
0    1610612739  1610612744  W  2015  0041500407         4.0
3    1610612744  1610612739  L  2015  0041500406       -14.0
4    1610612739  1610612744  W  2015  0041500405        15.0
7    1610612744  1610612739  W  2015  0041500404        11.0
9    1610612744  1610612739  L  2015  0041500403       -30.0
..          ...         ... ..   ...         ...         ...
161  1610612763  1610612759  L  2015  0041500151       -32.0
165  1610612745  1610612744  L  2015  0041500141       -26.0
166  1610612738  1610612737  L  2015  0041500131        -1.0
167  1610612754  1610612761  W  2015  0041500111        10.0
171  1610612742  1610612760  L  2015  0041500161       -38.0

[86 rows x 6 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_single_matchup['MATCHUP'] = df_single_matchup['MATCHUP'].apply(convertMatchupToOpposingTeamID)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_single_matchup['SEASON_ID'] = df_single_matchup['SEASON_ID'].apply(lambda x: x[1:])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
A value is trying to be set on a copy of a slice from 

In [63]:
pd.set_option('display.max_columns', None)
df_games_1 = df_single_matchup.merge(right=df_all_team_stats_clean, on=['TEAM_ID', 'YEAR'])
df_games_2 = df_single_matchup.merge(right=df_all_team_stats_clean, left_on=['MATCHUP', 'YEAR'], right_on=['TEAM_ID', 'YEAR'])
df_total_games = pd.merge(df_games_1, df_games_2, on=['GAME_ID'])
print(df_total_games.head())

      TEAM_ID   MATCHUP_x WL_x  YEAR_x     GAME_ID  PLUS_MINUS_x TEAM_CITY_x  \
0  1610612739  1610612744    W    2015  0041500407           4.0   Cleveland   
1  1610612739  1610612744    W    2015  0041500405          15.0   Cleveland   
2  1610612739  1610612744    L    2015  0041500402         -33.0   Cleveland   
3  1610612739  1610612744    L    2015  0041500401         -15.0   Cleveland   
4  1610612739  1610612761    W    2015  0041500306          26.0   Cleveland   

  TEAM_NAME_x  GP_x  WINS_x  LOSSES_x  WIN_PCT_x  CONF_RANK_x  DIV_RANK_x  \
0   Cavaliers    82      57        25      0.695            1           1   
1   Cavaliers    82      57        25      0.695            1           1   
2   Cavaliers    82      57        25      0.695            1           1   
3   Cavaliers    82      57        25      0.695            1           1   
4   Cavaliers    82      57        25      0.695            1           1   

   PO_WINS_x  PO_LOSSES_x  CONF_COUNT_x  DIV_COUNT_x NBA

In [65]:
df_games_cleaned = df_total_games[['WL_x', 'AST_x', 'AST_y', 'BLK_x', 'BLK_y', 'DREB_x', 'DREB_y', 'FG3_PCT_x', 'FG3_PCT_y', 'FG_PCT_x', 'FG_PCT_y', 'FT_PCT_x', 'FT_PCT_y', 'FTA_x', 'FTA_y', 'OREB_x', 'OREB_y', 'STL_x', 'STL_y', 'TOV_x', 'TOV_y', 'PLUS_MINUS_x']]
df_games_cleaned['WL_x'] = df_games_cleaned['WL_x'].apply(lambda x: 1 if x == 'W' else 0)
print(df_games_cleaned.head())
corrGames = df_games_cleaned.corr()
corrGames.style.background_gradient(cmap='coolwarm')

   WL_x  AST_x  AST_y  BLK_x  BLK_y  DREB_x  DREB_y  FG3_PCT_x  FG3_PCT_y  \
0     1   22.7   28.9    3.9    6.1    33.9    36.2      0.362      0.416   
1     1   22.7   28.9    3.9    6.1    33.9    36.2      0.362      0.416   
2     0   22.7   28.9    3.9    6.1    33.9    36.2      0.362      0.416   
3     0   22.7   28.9    3.9    6.1    33.9    36.2      0.362      0.416   
4     1   22.7   18.7    3.9    5.5    33.9    33.2      0.362      0.370   

   FG_PCT_x  FG_PCT_y  FT_PCT_x  FT_PCT_y  FTA_x  FTA_y  OREB_x  OREB_y  \
0      0.46     0.487     0.748     0.763   21.7   21.8    10.6    10.0   
1      0.46     0.487     0.748     0.763   21.7   21.8    10.6    10.0   
2      0.46     0.487     0.748     0.763   21.7   21.8    10.6    10.0   
3      0.46     0.487     0.748     0.763   21.7   21.8    10.6    10.0   
4      0.46     0.451     0.748     0.777   21.7   26.7    10.6    10.2   

   STL_x  STL_y  TOV_x  TOV_y  PLUS_MINUS_x  
0    6.7    8.4   13.6   15.2           

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_games_cleaned['WL_x'] = df_games_cleaned['WL_x'].apply(lambda x: 1 if x == 'W' else 0)


Unnamed: 0,WL_x,AST_x,AST_y,BLK_x,BLK_y,DREB_x,DREB_y,FG3_PCT_x,FG3_PCT_y,FG_PCT_x,FG_PCT_y,FT_PCT_x,FT_PCT_y,FTA_x,FTA_y,OREB_x,OREB_y,STL_x,STL_y,TOV_x,TOV_y,PLUS_MINUS_x
WL_x,1.0,0.129275,-0.097561,0.009239,-0.095269,0.245216,-0.165852,0.113227,-0.179364,0.259562,-0.162915,0.15372,-0.02046,-0.205241,0.086258,0.061275,0.025489,-0.204107,0.07826,0.051018,-0.078043,0.794647
AST_x,0.129275,1.0,0.094722,0.301471,-0.18807,0.613759,-0.082346,0.632813,-0.111898,0.664415,-0.051105,0.15372,-0.017072,-0.558195,0.11267,-0.186334,0.404211,0.364124,0.091985,0.44756,0.306326,0.146046
AST_y,-0.097561,0.094722,1.0,-0.196244,0.33295,-0.104906,0.685516,-0.090941,0.679394,-0.036824,0.711564,-0.06062,0.127376,0.176614,-0.611808,0.45375,-0.196933,0.07409,0.405044,0.340752,0.522257,-0.180095
BLK_x,0.009239,0.301471,-0.196244,1.0,-0.193765,0.452633,-0.14514,0.258804,-0.160157,0.649257,-0.222943,0.176826,0.137668,0.034103,0.121248,-0.194981,0.129328,0.248404,-0.139842,0.374506,-0.144334,0.055848
BLK_y,-0.095269,-0.18807,0.33295,-0.193765,1.0,-0.096618,0.489156,-0.176609,0.308081,-0.218627,0.662959,0.138603,0.199076,0.133688,-0.01336,0.179613,-0.229568,-0.15054,0.251494,-0.082854,0.393786,-0.169831
DREB_x,0.245216,0.613759,-0.104906,0.452633,-0.096618,1.0,-0.063217,0.554342,-0.136675,0.697388,-0.064395,0.257866,-0.076141,-0.393386,0.211447,0.08449,0.225289,-0.22481,-0.139799,0.393897,0.095409,0.251438
DREB_y,-0.165852,-0.082346,0.685516,-0.14514,0.489156,-0.063217,1.0,-0.115311,0.609048,-0.065619,0.761775,-0.087732,0.186934,0.24018,-0.406601,0.277123,0.083646,-0.128682,-0.124609,0.142971,0.521471,-0.306396
FG3_PCT_x,0.113227,0.632813,-0.090941,0.258804,-0.176609,0.554342,-0.115311,1.0,-0.171636,0.551326,-0.010012,0.071223,-0.371076,-0.249191,0.213056,-0.206381,0.421595,0.130255,-0.11605,0.12611,0.334242,0.109501
FG3_PCT_y,-0.179364,-0.111898,0.679394,-0.160157,0.308081,-0.136675,0.609048,-0.171636,1.0,0.015685,0.606535,-0.386653,0.063095,0.276305,-0.274526,0.49337,-0.234016,-0.087768,0.205761,0.411514,0.203749,-0.161777
FG_PCT_x,0.259562,0.664415,-0.036824,0.649257,-0.218627,0.697388,-0.065619,0.551326,0.015685,1.0,-0.105402,0.098153,0.08928,-0.323687,0.221149,0.007648,0.355838,0.028349,-0.009101,0.465589,0.097346,0.287862


In [66]:
df_scaled = scaleColumns(df_games_cleaned, {'AST_x': 'AST_y', 'BLK_x': 'BLK_y', 'DREB_x': 'DREB_y', 
                                            'FG3_PCT_x': 'FG3_PCT_y', 'FG_PCT_x': 'FG_PCT_y', 'FTA_x': 'FTA_y',
                                            'FT_PCT_x': 'FT_PCT_y', 'OREB_x': 'OREB_y', 'STL_x': 'STL_y', 'TOV_x': 'TOV_y'})
print(df_scaled)

    WL_x     AST_x     BLK_x    DREB_x  FG3_PCT_x  FG_PCT_x  FT_PCT_x  \
0      1  0.785467  0.639344  0.936464   0.870192  0.944559  0.980341   
1      1  0.785467  0.639344  0.936464   0.870192  0.944559  0.980341   
2      0  0.785467  0.639344  0.936464   0.870192  0.944559  0.980341   
3      0  0.785467  0.639344  0.936464   0.870192  0.944559  0.980341   
4      1  1.213904  0.709091  1.021084   0.978378  1.019956  0.962677   
..   ...       ...       ...       ...        ...       ...       ...   
81     0  0.960870  0.627119  0.952247   0.985673  0.932773  1.015345   
82     0  0.854626  0.948718  1.000000   0.953039  0.954348  0.893048   
83     0  0.854626  0.948718  1.000000   0.953039  0.954348  0.893048   
84     0  0.844898  0.728814  0.884058   0.882667  0.909091  0.975093   
85     0  0.844898  0.728814  0.884058   0.882667  0.909091  0.975093   

       FTA_x    OREB_x     STL_x     TOV_x  PLUS_MINUS_x  
0   0.995413  1.060000  0.797619  0.894737           4.0  
1   0

In [67]:
corrGames = df_scaled.corr()
corrGames.style.background_gradient(cmap='coolwarm')

Unnamed: 0,WL_x,AST_x,BLK_x,DREB_x,FG3_PCT_x,FG_PCT_x,FT_PCT_x,FTA_x,OREB_x,STL_x,TOV_x,PLUS_MINUS_x
WL_x,1.0,0.169813,0.073992,0.285347,0.197222,0.286583,0.14875,-0.177306,0.036004,-0.207725,0.099764,0.794647
AST_x,0.169813,1.0,0.441108,0.745891,0.673895,0.723168,0.199383,-0.674583,-0.518286,0.329926,0.18709,0.258418
BLK_x,0.073992,0.441108,1.0,0.503472,0.379269,0.728018,0.076814,-0.076794,-0.289449,0.439896,0.43186,0.137537
DREB_x,0.285347,0.745891,0.503472,1.0,0.622781,0.727702,0.334007,-0.511703,-0.143442,-0.017653,0.313176,0.391127
FG3_PCT_x,0.197222,0.673895,0.379269,0.622781,1.0,0.486693,0.471583,-0.399424,-0.495661,0.297469,-0.198038,0.181205
FG_PCT_x,0.286583,0.723168,0.728018,0.727702,0.486693,1.0,0.032592,-0.483723,-0.308922,0.091349,0.352883,0.434009
FT_PCT_x,0.14875,0.199383,0.076814,0.334007,0.471583,0.032592,1.0,-0.311999,-0.133935,-0.04843,-0.1359,0.054773
FTA_x,-0.177306,-0.674583,-0.076794,-0.511703,-0.399424,-0.483723,-0.311999,1.0,0.546486,0.173809,-0.004572,-0.290715
OREB_x,0.036004,-0.518286,-0.289449,-0.143442,-0.495661,-0.308922,-0.133935,0.546486,1.0,-0.377782,0.409437,0.023828
STL_x,-0.207725,0.329926,0.439896,-0.017653,0.297469,0.091349,-0.04843,0.173809,-0.377782,1.0,0.055564,-0.213802


In [69]:
df_scaled.to_csv('playoffs_2015.csv')

In [70]:
celtics_stats = teamyearbyyearstats.TeamYearByYearStats(per_mode_simple='PerGame',team_id=1610612738)
nets_stats = teamyearbyyearstats.TeamYearByYearStats(per_mode_simple='PerGame',team_id=1610612751)


In [99]:
celtics_df = celtics_stats.get_data_frames()[0]
nets_df = nets_stats.get_data_frames()[0]
print(celtics_df)

       TEAM_ID TEAM_CITY TEAM_NAME     YEAR  GP  WINS  LOSSES  WIN_PCT  \
0   1610612738    Boston   Celtics  1946-47  60    22      38    0.367   
1   1610612738    Boston   Celtics  1947-48  48    20      28    0.417   
2   1610612738    Boston   Celtics  1948-49  60    25      35    0.417   
3   1610612738    Boston   Celtics  1949-50  68    22      46    0.324   
4   1610612738    Boston   Celtics  1950-51  69    39      30    0.565   
..         ...       ...       ...      ...  ..   ...     ...      ...   
71  1610612738    Boston   Celtics  2017-18  82    55      27    0.671   
72  1610612738    Boston   Celtics  2018-19  82    49      33    0.598   
73  1610612738    Boston   Celtics  2019-20  72    48      24    0.667   
74  1610612738    Boston   Celtics  2020-21  72    36      36    0.500   
75  1610612738    Boston   Celtics  2021-22  82    51      31    0.622   

    CONF_RANK  DIV_RANK  PO_WINS  PO_LOSSES  CONF_COUNT  DIV_COUNT  \
0           0         5        0         

In [100]:
celtics_df = celtics_df[celtics_df['YEAR'] == '2021-22']
nets_df = nets_df[nets_df['YEAR'] == '2021-22']
print(celtics_df)

       TEAM_ID TEAM_CITY TEAM_NAME     YEAR  GP  WINS  LOSSES  WIN_PCT  \
75  1610612738    Boston   Celtics  2021-22  82    51      31    0.622   

    CONF_RANK  DIV_RANK  PO_WINS  PO_LOSSES  CONF_COUNT  DIV_COUNT  \
75          2         0        2          0        15.0          5   

   NBA_FINALS_APPEARANCE   FGM   FGA  FG_PCT  FG3M  FG3A  FG3_PCT   FTM   FTA  \
75                   N/A  40.7  87.4   0.466  13.2  37.1    0.356  17.0  20.9   

    FT_PCT  OREB  DREB   REB   AST    PF  STL   TOV  BLK    PTS  PTS_RANK  
75   0.816  10.5  35.5  46.1  24.8  18.5  7.2  13.6  5.8  111.8        12  


In [101]:
celtics_df = celtics_df[['AST', 'BLK', 'DREB', 'FG3_PCT', 'FG_PCT', 'FT_PCT', 'FTA', 'OREB', 'STL', 'TOV', 'YEAR']]
nets_df = nets_df[['AST', 'BLK', 'DREB', 'FG3_PCT', 'FG_PCT', 'FT_PCT', 'FTA', 'OREB', 'STL', 'TOV', 'YEAR']]

print(celtics_df)
print(nets_df)


     AST  BLK  DREB  FG3_PCT  FG_PCT  FT_PCT   FTA  OREB  STL   TOV     YEAR
75  24.8  5.8  35.5    0.356   0.466   0.816  20.9  10.5  7.2  13.6  2021-22
     AST  BLK  DREB  FG3_PCT  FG_PCT  FT_PCT   FTA  OREB  STL   TOV     YEAR
45  25.3  5.5  34.1    0.361   0.475   0.805  21.7  10.3  7.1  14.1  2021-22


In [103]:
celtics_nets_df = pd.merge(celtics_df, nets_df, on=['YEAR'])
celtics_nets_df = scaleColumns(celtics_nets_df, {'AST_x': 'AST_y', 'BLK_x': 'BLK_y', 'DREB_x': 'DREB_y', 
                                            'FG3_PCT_x': 'FG3_PCT_y', 'FG_PCT_x': 'FG_PCT_y', 'FTA_x': 'FTA_y',
                                            'FT_PCT_x': 'FT_PCT_y', 'OREB_x': 'OREB_y', 'STL_x': 'STL_y', 'TOV_x': 'TOV_y'})

In [104]:
print(celtics_nets_df)

      AST_x     BLK_x    DREB_x  FG3_PCT_x  FG_PCT_x  FT_PCT_x     FTA_x  \
0  0.980237  1.054545  1.041056    0.98615  0.981053  1.013665  0.963134   

     OREB_x     STL_x     TOV_x     YEAR  
0  1.019417  1.014085  0.964539  2021-22  


In [105]:
celtics_nets_df.to_csv('celtics_nets.csv')