In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.static import teams
import matplotlib.pyplot as mtplot
import seaborn 


In [27]:
nba_teams = teams.get_teams()
team_abr_to_id = {i['abbreviation']: i['id'] for i in nba_teams}
allgames = pd.DataFrame()

for i in nba_teams:
    id = i['id']
    gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=id)
    g = gamefinder.get_data_frames()[0]
    allgames = pd.concat([allgames, g], ignore_index=True)

print(allgames)

       SEASON_ID     TEAM_ID TEAM_ABBREVIATION          TEAM_NAME     GAME_ID  \
0          22024  1610612737               ATL      Atlanta Hawks  0022400438   
1          22024  1610612737               ATL      Atlanta Hawks  0022400427   
2          22024  1610612737               ATL      Atlanta Hawks  0022400413   
3          22024  1610612737               ATL      Atlanta Hawks  0022400395   
4          22024  1610612737               ATL      Atlanta Hawks  0022400378   
...          ...         ...               ...                ...         ...   
107075     21988  1610612766               CHH  Charlotte Hornets  0028800062   
107076     21988  1610612766               CHH  Charlotte Hornets  0028800052   
107077     21988  1610612766               CHH  Charlotte Hornets  0028800024   
107078     21988  1610612766               CHH  Charlotte Hornets  0028800015   
107079     21988  1610612766               CHH  Charlotte Hornets  0028800008   

         GAME_DATE      MAT

In [33]:
allgames['GAME_DATE'] = pd.to_datetime(allgames['GAME_DATE'])
allgames['WIN'] = allgames['WL'].apply(lambda x: 1 if x == 'W' else 0) 
allgames['PTS'] = allgames['PTS']
allgames['Points_Per_Game'] = allgames.groupby('TEAM_ID')['PTS'].transform('mean')


def get_opponent(matchup, team_abbr_to_id, team_id):
    if '@' in matchup:
        opp_abbr = matchup.split(' @ ')[-1]
    else:
        opp_abbr = matchup.split(' vs ')[-1]
    return team_abbr_to_id.get(opp_abbr, team_id)

allgames['OPPONENT_TEAM_ID'] = allgames.apply(lambda row: get_opponent(row['MATCHUP'], team_abr_to_id, row['TEAM_ID']), axis=1)

allgames['HOME_GAME'] = allgames['MATCHUP'].apply(lambda i: 1 if 'vs.' in i else 0)
allgames['LAST_GAME_RESULT'] = allgames.groupby('TEAM_ID')['WIN'].shift(1).fillna(0)


In [34]:
lable_encoder = LabelEncoder()

allgames['TEAM_ID'] = lable_encoder.fit_transform(allgames['TEAM_ID'])
allgames['OPPONENT_TEAM_ID'] = lable_encoder.fit_transform(allgames['OPPONENT_TEAM_ID'])

X = allgames[['TEAM_ID', 'OPPONENT_TEAM_ID', 'Points_Per_Game', 'HOME_GAME', 'LAST_GAME_RESULT']]
y = allgames['WIN']
X_train, X_test, y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy 0.596843481509152
              precision    recall  f1-score   support

           0       0.60      0.56      0.58     10638
           1       0.59      0.63      0.61     10778

    accuracy                           0.60     21416
   macro avg       0.60      0.60      0.60     21416
weighted avg       0.60      0.60      0.60     21416



In [35]:
feature_importances = pd.DataFrame(model.feature_importances_, index=X_train.columns, columns=['importance']).sort_values('importance', ascending=False)

print('Feature Importance: \n ', feature_importances)

Feature Importance: 
                    importance
HOME_GAME           0.398567
OPPONENT_TEAM_ID    0.358809
Points_Per_Game     0.104804
TEAM_ID             0.080394
LAST_GAME_RESULT    0.057426
