In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error
from sklearn.preprocessing import LabelEncoder
from nba_api.stats.endpoints import leaguegamefinder, scoreboardv2, boxscorescoringv2
from nba_api.stats.static import teams
import matplotlib.pyplot as mtplot
import seaborn 
import datetime
import math


In [23]:
nba_teams = teams.get_teams()
team_abr_to_id = {i['abbreviation']: i['id'] for i in nba_teams}
allgames = pd.DataFrame()
games_from_17_on = pd.DataFrame()


for i in nba_teams:
    id = i['id']
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=id)
    g = gamefinder.get_data_frames()[0]
    allgames = pd.concat([allgames, g], ignore_index=True)

allgames['GAME_DATE'] = pd.to_datetime(allgames['GAME_DATE'])

games_from_17_on = allgames[allgames['GAME_DATE'] > datetime.datetime(2017, 5,5,0,0,0,0)]

print(games_from_17_on.head())
print(games_from_17_on['GAME_DATE'])

print(nba_teams)

  SEASON_ID     TEAM_ID TEAM_ABBREVIATION      TEAM_NAME     GAME_ID  \
0     22024  1610612737               ATL  Atlanta Hawks  0022400612   
1     22024  1610612737               ATL  Atlanta Hawks  0022400602   
2     22024  1610612737               ATL  Atlanta Hawks  0022400587   
3     22024  1610612737               ATL  Atlanta Hawks  0022400563   
4     22024  1610612737               ATL  Atlanta Hawks  0022400556   

   GAME_DATE      MATCHUP WL  MIN  PTS  ...  FT_PCT  OREB  DREB   REB  AST  \
0 2025-01-22  ATL vs. DET  L  241  104  ...   0.769  15.0  32.0  47.0   25   
1 2025-01-20    ATL @ NYK  L  239  110  ...   0.773  16.0  25.0  41.0   23   
2 2025-01-18    ATL @ BOS  W  265  119  ...   0.786  13.0  42.0  55.0   27   
3 2025-01-15    ATL @ CHI  W  240  110  ...   0.682  14.0  37.0  51.0   28   
4 2025-01-14  ATL vs. PHX  W  241  122  ...   0.731  20.0  34.0  54.0   22   

    STL  BLK  TOV  PF  PLUS_MINUS  
0  14.0    3   13  14       -10.0  
1  10.0    6   23  21     

In [24]:

games_from_17_on['WIN'] = games_from_17_on['WL'].apply(lambda x: 1 if x == 'W' else 0) 
games_from_17_on['PTS'] = games_from_17_on['PTS']
games_from_17_on['Points_Per_Game'] = games_from_17_on.groupby('TEAM_ID')['PTS'].transform('mean')


def get_opponent(matchup, team_abbr_to_id, team_id):
    if '@' in matchup:
        opp_abbr = matchup.split(' @ ')[-1]
    else:
        opp_abbr = matchup.split(' vs ')[-1]
    return team_abbr_to_id.get(opp_abbr, team_id)

games_from_17_on['OPPONENT_TEAM_ID'] = games_from_17_on.apply(lambda row: get_opponent(row['MATCHUP'], team_abr_to_id, row['TEAM_ID']), axis=1)

games_from_17_on['HOME_GAME'] = games_from_17_on['MATCHUP'].apply(lambda i: 1 if 'vs.' in i else 0)
games_from_17_on['LAST_GAME_RESULT'] = games_from_17_on.groupby('TEAM_ID')['WIN'].shift(1).fillna(0)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games_from_17_on['WIN'] = games_from_17_on['WL'].apply(lambda x: 1 if x == 'W' else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games_from_17_on['PTS'] = games_from_17_on['PTS']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games_from_17_on['Points_Per_Game'] = games_from_17_on.groupby('TEAM

In [25]:
lable_encoder = LabelEncoder()

games_from_17_on['TEAM_ID'] = lable_encoder.fit_transform(games_from_17_on['TEAM_ID'])
games_from_17_on['OPPONENT_TEAM_ID'] = lable_encoder.fit_transform(games_from_17_on['OPPONENT_TEAM_ID'])

X = games_from_17_on[['TEAM_ID', 'OPPONENT_TEAM_ID', 'Points_Per_Game', 'HOME_GAME', 'LAST_GAME_RESULT']]
y = games_from_17_on['WIN']
X_train, X_test, y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games_from_17_on['TEAM_ID'] = lable_encoder.fit_transform(games_from_17_on['TEAM_ID'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games_from_17_on['OPPONENT_TEAM_ID'] = lable_encoder.fit_transform(games_from_17_on['OPPONENT_TEAM_ID'])


Accuracy 0.5479007190906983
              precision    recall  f1-score   support

           0       0.57      0.46      0.51      2212
           1       0.53      0.64      0.58      2099

    accuracy                           0.55      4311
   macro avg       0.55      0.55      0.55      4311
weighted avg       0.55      0.55      0.54      4311



In [26]:
feature_importances = pd.DataFrame(model.feature_importances_, index=X_train.columns, columns=['importance']).sort_values('importance', ascending=False)

print('Feature Importance: \n ', feature_importances)

Feature Importance: 
                    importance
OPPONENT_TEAM_ID    0.606096
TEAM_ID             0.150547
Points_Per_Game     0.137661
HOME_GAME           0.063581
LAST_GAME_RESULT    0.042115


In [27]:
print(games_from_17_on.head())

print(games_from_17_on['PTS'])

  SEASON_ID  TEAM_ID TEAM_ABBREVIATION      TEAM_NAME     GAME_ID  GAME_DATE  \
0     22024        0               ATL  Atlanta Hawks  0022400612 2025-01-22   
1     22024        0               ATL  Atlanta Hawks  0022400602 2025-01-20   
2     22024        0               ATL  Atlanta Hawks  0022400587 2025-01-18   
3     22024        0               ATL  Atlanta Hawks  0022400563 2025-01-15   
4     22024        0               ATL  Atlanta Hawks  0022400556 2025-01-14   

       MATCHUP WL  MIN  PTS  ...   STL  BLK  TOV  PF  PLUS_MINUS  WIN  \
0  ATL vs. DET  L  241  104  ...  14.0    3   13  14       -10.0    0   
1    ATL @ NYK  L  239  110  ...  10.0    6   23  21        -9.0    0   
2    ATL @ BOS  W  265  119  ...   9.0   10   17  17         4.0    1   
3    ATL @ CHI  W  240  110  ...  11.0    5    9  15        16.0    1   
4  ATL vs. PHX  W  241  122  ...  10.0    3   12  21         5.0    1   

   Points_Per_Game  OPPONENT_TEAM_ID  HOME_GAME  LAST_GAME_RESULT  
0       110.

In [28]:
games_from_17_on.to_csv('games.csv')

In [29]:
games_from_17_on.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,STL,BLK,TOV,PF,PLUS_MINUS,WIN,Points_Per_Game,OPPONENT_TEAM_ID,HOME_GAME,LAST_GAME_RESULT
0,22024,0,ATL,Atlanta Hawks,22400612,2025-01-22,ATL vs. DET,L,241,104,...,14.0,3,13,14,-10.0,0,110.982906,0,1,0.0
1,22024,0,ATL,Atlanta Hawks,22400602,2025-01-20,ATL @ NYK,L,239,110,...,10.0,6,23,21,-9.0,0,110.982906,15,0,0.0
2,22024,0,ATL,Atlanta Hawks,22400587,2025-01-18,ATL @ BOS,W,265,119,...,9.0,10,17,17,4.0,1,110.982906,1,0,0.0
3,22024,0,ATL,Atlanta Hawks,22400563,2025-01-15,ATL @ CHI,W,240,110,...,11.0,5,9,15,16.0,1,110.982906,4,0,1.0
4,22024,0,ATL,Atlanta Hawks,22400556,2025-01-14,ATL vs. PHX,W,241,122,...,10.0,3,12,21,5.0,1,110.982906,0,1,1.0


In [30]:
list = []


for i, r in games_from_17_on.iterrows():
    if r['GAME_DATE'] > datetime.datetime(2017,5,5) and r['GAME_DATE'] < datetime.datetime(2018,5,5):
        list.append(r)

games1718 = pd.DataFrame(list)
games1718.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,STL,BLK,TOV,PF,PLUS_MINUS,WIN,Points_Per_Game,OPPONENT_TEAM_ID,HOME_GAME,LAST_GAME_RESULT
610,22017,0,ATL,Atlanta Hawks,21701214,2018-04-10,ATL vs. PHI,L,240,113,...,10.0,3,10,24,-8.0,0,110.982906,0,1,0.0
611,22017,0,ATL,Atlanta Hawks,21701202,2018-04-08,ATL @ BOS,W,239,112,...,7.0,4,13,19,6.0,1,110.982906,1,0,0.0
612,22017,0,ATL,Atlanta Hawks,21701184,2018-04-06,ATL @ WAS,W,239,103,...,5.0,5,18,22,6.0,1,110.982906,27,0,1.0
613,22017,0,ATL,Atlanta Hawks,21701172,2018-04-04,ATL vs. MIA,L,239,86,...,5.0,6,15,16,-29.0,0,110.982906,0,1,1.0
614,22017,0,ATL,Atlanta Hawks,21701158,2018-04-03,ATL @ MIA,L,240,98,...,10.0,1,15,15,-3.0,0,110.982906,11,0,0.0


In [31]:
print(games1718.columns)



Index(['SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT',
       'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB',
       'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS', 'WIN',
       'Points_Per_Game', 'OPPONENT_TEAM_ID', 'HOME_GAME', 'LAST_GAME_RESULT'],
      dtype='object')


In [64]:
def find_stats_from_both_teams(matchup):
  a =  matchup.split()
  return a
  
find_stats_from_both_teams('ATL @ NYK')
for i in games1718['MATCHUP']:
 matchup = find_stats_from_both_teams(i)
 match = i.split()
 

for i, r in games1718.iterrows():
    
     
 

ValueError: Can only compare identically-labeled Series objects

In [15]:
#features = ['Points_Per_Game', 'FGM', 'FGA', 'FG_PCT', 'FG3M','FG3A','FG3_PCT','FTM','FTA','FT_PCT','OREB','DREB','REB','AST','TOV','STL']



609       ATL vs. PHI
610         ATL @ BOS
611         ATL @ WAS
612       ATL vs. MIA
613         ATL @ MIA
             ...     
105039      CHA @ ORL
105040      CHA @ DET
105041      CHA @ OKC
105042    CHA vs. IND
105043      CHA @ MIA
Name: MATCHUP, Length: 2953, dtype: object


0           ATL @ LAC
1           ATL @ LAL
2           ATL @ DEN
3           ATL @ TOR
4         ATL vs. MIA
             ...     
104819      CHA @ ORL
104820      CHA @ DET
104821      CHA @ OKC
104822    CHA vs. IND
104823      CHA @ MIA
Name: MATCHUP, Length: 21305, dtype: object
