In [63]:
import pandas as pd
from datetime import datetime
from nba_api.stats.endpoints import leaguegamefinder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import preprocessing 
from sklearn.metrics import classification_report

In [64]:
data = pd.read_csv('nbaHomeWinLossModelDataset.csv').drop(['Unnamed: 0'],axis=1)
data = data.dropna()
data.head(100)

Unnamed: 0,HOME_LAST_GAME_OE,HOME_LAST_GAME_HOME_WIN_PCTG,HOME_NUM_REST_DAYS,HOME_LAST_GAME_AWAY_WIN_PCTG,HOME_LAST_GAME_TOTAL_WIN_PCTG,HOME_LAST_GAME_ROLLING_SCORING_MARGIN,HOME_LAST_GAME_ROLLING_OE,HOME_W,SEASON,AWAY_LAST_GAME_OE,AWAY_LAST_GAME_HOME_WIN_PCTG,AWAY_NUM_REST_DAYS,AWAY_LAST_GAME_AWAY_WIN_PCTG,AWAY_LAST_GAME_TOTAL_WIN_PCTG,AWAY_LAST_GAME_ROLLING_SCORING_MARGIN,AWAY_LAST_GAME_ROLLING_OE
0,0.572650,0.275000,2.0,0.219512,0.246914,-7.333333,0.567699,0,2021-22,0.551402,0.658537,2.0,0.375000,0.518519,0.333333,0.582568
1,0.641667,0.700000,3.0,0.600000,0.650000,17.333333,0.624980,1,2021-22,0.589744,0.658537,2.0,0.384615,0.525000,4.000000,0.576985
2,0.606557,0.650000,1.0,0.384615,0.518987,7.000000,0.589099,1,2021-22,0.671429,0.525000,1.0,0.358974,0.443038,2.666667,0.626338
3,0.495413,0.552632,2.0,0.600000,0.576923,10.333333,0.538786,1,2021-22,0.534653,0.650000,3.0,0.394737,0.525641,16.333333,0.590617
4,0.626087,0.641026,2.0,0.394737,0.519481,17.000000,0.631844,1,2021-22,0.546763,0.447368,2.0,0.589744,0.519481,-1.000000,0.576689
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,0.577236,0.500000,1.0,0.470588,0.484848,5.333333,0.601310,0,2021-22,0.601852,0.676471,3.0,0.500000,0.590909,9.333333,0.612938
98,0.652893,0.666667,3.0,0.500000,0.584615,0.333333,0.599456,1,2021-22,0.557522,0.419355,3.0,0.575758,0.500000,-14.333333,0.534942
99,0.584071,0.656250,2.0,0.500000,0.578125,-1.000000,0.586307,1,2021-22,0.610169,0.677419,3.0,0.687500,0.682540,4.666667,0.578496
100,0.597015,0.437500,2.0,0.206897,0.327869,-2.666667,0.620029,1,2021-22,0.613445,0.645161,1.0,0.516129,0.580645,10.333333,0.607357


In [65]:
validation = data[data['SEASON'] == '2023-24']
modelData = data[data['SEASON'] != '2023-24'].sample(frac=1)

In [66]:
X = modelData.drop(['HOME_W','SEASON'],axis=1)
y = modelData['HOME_W']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.33)

# Standard Scaling Prediction Variables
scaler = preprocessing.StandardScaler()
scaler.fit(X_train)
scaled_data_train = scaler.transform(X_train)

scaler.fit(X_test)
scaled_data_test = scaler.transform(X_test)

In [68]:
#Logistic Regression

model = LogisticRegression()
model.fit(scaled_data_train,y_train)
model.score(scaled_data_test,y_test)

0.6372924648786717

In [69]:
F1Score = cross_val_score(model,scaled_data_test,y_test,cv=12,scoring='f1_macro');
print("Logistic Model F1 Accuracy: %0.2f (+/- %0.2f)"%(F1Score.mean(), F1Score.std() *2))

Logistic Model F1 Accuracy: 0.60 (+/- 0.08)


In [70]:
# Test Set Review

y_pred = model.predict(scaled_data_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.60      0.45      0.51       334
           1       0.65      0.78      0.71       449

    accuracy                           0.64       783
   macro avg       0.63      0.61      0.61       783
weighted avg       0.63      0.64      0.63       783



In [72]:
#Validation Set review

# Standard Scaling Prediction Variables
scaler = preprocessing.StandardScaler()
scaler.fit(validation.drop(['HOME_W','SEASON'],axis=1))
scaled_val_data = scaler.transform(validation.drop(['HOME_W','SEASON'],axis=1))

In [73]:
y_pred = model.predict(scaled_val_data)
print(classification_report(validation['HOME_W'],y_pred))

              precision    recall  f1-score   support

           0       0.61      0.47      0.53       217
           1       0.67      0.78      0.72       297

    accuracy                           0.65       514
   macro avg       0.64      0.62      0.62       514
weighted avg       0.64      0.65      0.64       514

