In [40]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from nba_api.stats.endpoints import leaguegamefinder, scoreboardv2, boxscorescoringv2
from nba_api.stats.static import teams
import matplotlib.pyplot as mtplot
import seaborn 
import datetime


In [None]:
nba_teams = teams.get_teams()
team_abr_to_id = {i['abbreviation']: i['id'] for i in nba_teams}
allgames = pd.DataFrame()
games_from_17_on = pd.DataFrame()


for i in nba_teams:
    id = i['id']
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=id)
    g = gamefinder.get_data_frames()[0]
    allgames = pd.concat([allgames, g], ignore_index=True)

allgames['GAME_DATE'] = pd.to_datetime(allgames['GAME_DATE'])

games_from_17_on = allgames[allgames['GAME_DATE'] > datetime.datetime(2017, 5,5,0,0,0,0)]

print(games_from_17_on.head())
print(games_from_17_on['GAME_DATE'])

print(nba_teams)

  SEASON_ID     TEAM_ID TEAM_ABBREVIATION      TEAM_NAME     GAME_ID  \
0     22024  1610612737               ATL  Atlanta Hawks  0022400438   
1     22024  1610612737               ATL  Atlanta Hawks  0022400427   
2     22024  1610612737               ATL  Atlanta Hawks  0022400413   
3     22024  1610612737               ATL  Atlanta Hawks  0022400395   
4     22024  1610612737               ATL  Atlanta Hawks  0022400378   

   GAME_DATE      MATCHUP WL  MIN  PTS  ...  FT_PCT  OREB  DREB   REB  AST  \
0 2024-12-29    ATL @ TOR  W  240  136  ...   0.743  14.0  26.0  40.0   30   
1 2024-12-28  ATL vs. MIA  W  240  120  ...   0.773   9.0  38.0  47.0   34   
2 2024-12-26  ATL vs. CHI  W  240  141  ...   0.941  12.0  32.0  44.0   32   
3 2024-12-23  ATL vs. MIN  W  239  117  ...   0.808   6.0  32.0  38.0   27   
4 2024-12-21  ATL vs. MEM  L  239  112  ...   0.833   6.0  35.0  41.0   29   

    STL  BLK  TOV  PF  PLUS_MINUS  
0  22.0    8   14  19        29.0  
1   7.0    2   11  16     

In [30]:

games_from_17_on['WIN'] = games_from_17_on['WL'].apply(lambda x: 1 if x == 'W' else 0) 
games_from_17_on['PTS'] = games_from_17_on['PTS']
games_from_17_on['Points_Per_Game'] = games_from_17_on.groupby('TEAM_ID')['PTS'].transform('mean')


def get_opponent(matchup, team_abbr_to_id, team_id):
    if '@' in matchup:
        opp_abbr = matchup.split(' @ ')[-1]
    else:
        opp_abbr = matchup.split(' vs ')[-1]
    return team_abbr_to_id.get(opp_abbr, team_id)

games_from_17_on['OPPONENT_TEAM_ID'] = games_from_17_on.apply(lambda row: get_opponent(row['MATCHUP'], team_abr_to_id, row['TEAM_ID']), axis=1)

games_from_17_on['HOME_GAME'] = games_from_17_on['MATCHUP'].apply(lambda i: 1 if 'vs.' in i else 0)
games_from_17_on['LAST_GAME_RESULT'] = games_from_17_on.groupby('TEAM_ID')['WIN'].shift(1).fillna(0)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games_from_17_on['WIN'] = games_from_17_on['WL'].apply(lambda x: 1 if x == 'W' else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games_from_17_on['PTS'] = games_from_17_on['PTS']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games_from_17_on['Points_Per_Game'] = games_from_17_on.groupby('TEAM

In [31]:
lable_encoder = LabelEncoder()

games_from_17_on['TEAM_ID'] = lable_encoder.fit_transform(games_from_17_on['TEAM_ID'])
games_from_17_on['OPPONENT_TEAM_ID'] = lable_encoder.fit_transform(games_from_17_on['OPPONENT_TEAM_ID'])

X = games_from_17_on[['TEAM_ID', 'OPPONENT_TEAM_ID', 'Points_Per_Game', 'HOME_GAME', 'LAST_GAME_RESULT']]
y = games_from_17_on['WIN']
X_train, X_test, y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games_from_17_on['TEAM_ID'] = lable_encoder.fit_transform(games_from_17_on['TEAM_ID'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  games_from_17_on['OPPONENT_TEAM_ID'] = lable_encoder.fit_transform(games_from_17_on['OPPONENT_TEAM_ID'])


Accuracy 0.5551626591230552
              precision    recall  f1-score   support

           0       0.56      0.54      0.55      2120
           1       0.55      0.57      0.56      2122

    accuracy                           0.56      4242
   macro avg       0.56      0.56      0.56      4242
weighted avg       0.56      0.56      0.56      4242



In [32]:
feature_importances = pd.DataFrame(model.feature_importances_, index=X_train.columns, columns=['importance']).sort_values('importance', ascending=False)

print('Feature Importance: \n ', feature_importances)

Feature Importance: 
                    importance
OPPONENT_TEAM_ID    0.615211
Points_Per_Game     0.138756
TEAM_ID             0.136310
HOME_GAME           0.068632
LAST_GAME_RESULT    0.041091


In [34]:
print(games_from_17_on.head())

print(games_from_17_on['PTS'])

  SEASON_ID  TEAM_ID TEAM_ABBREVIATION      TEAM_NAME     GAME_ID  GAME_DATE  \
0     22024        0               ATL  Atlanta Hawks  0022400438 2024-12-29   
1     22024        0               ATL  Atlanta Hawks  0022400427 2024-12-28   
2     22024        0               ATL  Atlanta Hawks  0022400413 2024-12-26   
3     22024        0               ATL  Atlanta Hawks  0022400395 2024-12-23   
4     22024        0               ATL  Atlanta Hawks  0022400378 2024-12-21   

       MATCHUP WL  MIN  PTS  ...   STL  BLK  TOV  PF  PLUS_MINUS  WIN  \
0    ATL @ TOR  W  240  136  ...  22.0    8   14  19        29.0    1   
1  ATL vs. MIA  W  240  120  ...   7.0    2   11  16        10.0    1   
2  ATL vs. CHI  W  240  141  ...   9.0    5   12  13         8.0    1   
3  ATL vs. MIN  W  239  117  ...  15.0    6   21  14        13.0    1   
4  ATL vs. MEM  L  239  112  ...  11.0    6   25  14       -16.0    0   

   Points_Per_Game  OPPONENT_TEAM_ID  HOME_GAME  LAST_GAME_RESULT  
0       110.

In [36]:
games_from_17_on.to_csv('games.csv')