In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.inspection import permutation_importance
from nba_api.stats.endpoints import teamgamelog

def get_team_game_log(team_id, season):
    game_log = teamgamelog.TeamGameLog(team_id=team_id, season=season)
    df = game_log.get_data_frames()[0]
    return df

team_ids = ['1610612744', '1610612737', '1610612738']  # Warriors, Hawks, Celtics
seasons = ['2020-21', '2021-22', '2022-23']

all_data = []
for team_id in team_ids:
    for season in seasons:
        df = get_team_game_log(team_id, season)
        df['TEAM_ID'] = team_id
        df['SEASON'] = season
        all_data.append(df)

df = pd.concat(all_data, ignore_index=True)

features = ['FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 
            'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']

X = df[features]
y = df['WL']

# Encode target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Hyperparameter tuning
param_grid = {
    'hidden_layer_sizes': [(100,), (100, 50), (100, 100, 50)],
    'activation': ['relu', 'tanh'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive'],
}

# MLP Classifier Model
mlp_model = MLPClassifier(max_iter=1000, random_state=42)
grid_search = GridSearchCV(mlp_model, param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

best_mlp = grid_search.best_estimator_
y_pred = best_mlp.predict(X_test_scaled)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Best parameters: {grid_search.best_params_}")
print(f"Accuracy: {accuracy:.2f}")
print(report)

# Feature importance
perm_importance = permutation_importance(best_mlp, X_test_scaled, y_test, n_repeats=10, random_state=42)
feature_importance = pd.DataFrame({'feature': features, 'importance': perm_importance.importances_mean})
feature_importance = feature_importance.sort_values('importance', ascending=False)

print("\nFeature Importance:")
print(feature_importance)

# Predict on new data (2023-24 season)
new_season = '2023-24'
new_team_id = '1610612744'  # Warriors

new_game_data = get_team_game_log(new_team_id, new_season)

new_game_features = new_game_data[features]
new_game_features_scaled = scaler.transform(new_game_features)

predictions = best_mlp.predict(new_game_features_scaled)
probabilities = best_mlp.predict_proba(new_game_features_scaled)

new_game_data['Predicted_Outcome'] = label_encoder.inverse_transform(predictions)
new_game_data['Win_Probability'] = probabilities[:, 1]

print("\nPredictions for new games (2023-24 season):")
print(new_game_data[['GAME_DATE', 'MATCHUP', 'Predicted_Outcome', 'Win_Probability']])


Best parameters: {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (100, 50), 'learning_rate': 'constant'}
Accuracy: 0.82
              precision    recall  f1-score   support

           0       0.78      0.78      0.78        59
           1       0.84      0.84      0.84        83

    accuracy                           0.82       142
   macro avg       0.81      0.81      0.81       142
weighted avg       0.82      0.82      0.82       142


Feature Importance:
    feature  importance
2    FG_PCT    0.073944
1       FGA    0.072535
15      TOV    0.071127
10     DREB    0.059155
11      REB    0.037324
5   FG3_PCT    0.034507
13      STL    0.032394
16       PF    0.022535
17      PTS    0.021127
3      FG3M    0.019718
12      AST    0.016901
8    FT_PCT    0.014085
9      OREB    0.012676
4      FG3A    0.011268
6       FTM    0.001408
0       FGM    0.001408
7       FTA   -0.002113
14      BLK   -0.008451

Predictions for new games (2023-24 season):
       GAME_DATE 