In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
import keras_tuner as kt

# For reproducibility
np.random.seed(42)

# Load the data
df = pd.read_csv("/home/swisnoski/nba_predictor_development/models/data/combined_data_2010-2023.csv")

# Replace win/loss indicators with binary values
df['TEAM_1_WIN/LOSS'] = df['TEAM_1_WIN/LOSS'].replace({100: 1, 0: 0})

# Drop rows where the target variable contains NaN
df = df.dropna(subset=['TEAM_1_WIN/LOSS'])

# Define features and target
X = df[[
        "TEAM_1_HOME/AWAY",
        "TEAM_1_PTS",
        "TEAM_1_POSS",
        "TEAM_1_PPP",
        "TEAM_1_DEF_PPP",
        "TEAM_1_TS%",
        "TEAM_1_eFG%",
        "TEAM_1_FGA",
        "TEAM_1_FGM",
        "TEAM_1_FG3M",
        "TEAM_1_FTA",
        "TEAM_1_FG_PCT",
        "TEAM_1_OREB",
        "TEAM_1_DREB",
        "TEAM_1_AST",
        "TEAM_1_TOV",
        "TEAM_1_WIN_PCT",
        "TEAM_2_HOME/AWAY",
        "TEAM_2_PTS",
        "TEAM_2_POSS",
        "TEAM_2_PPP",
        "TEAM_2_DEF_PPP",
        "TEAM_2_TS%",
        "TEAM_2_eFG%",
        "TEAM_2_FGA",
        "TEAM_2_FGM",
        "TEAM_2_FG3M",
        "TEAM_2_FTA",
        "TEAM_2_FG_PCT",
        "TEAM_2_OREB",
        "TEAM_2_DREB",
        "TEAM_2_AST",
        "TEAM_2_TOV",
        "TEAM_2_WIN_PCT",
    ]]  
y = df['TEAM_1_WIN/LOSS']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# One-hot encode the target variable
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Define the model
model = Sequential()
model.add(Dense(32, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))  

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Neural Network accuracy: {accuracy * 100:.2f}%")

# Make predictions
y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)
y_test_true = np.argmax(y_test, axis=1)

# Evaluate the model
print('Neural Network classification report:')
print(classification_report(y_test_true, y_pred))

# Confusion matrix
conf_matrix_nn = confusion_matrix(y_test_true, y_pred)
sns.heatmap(conf_matrix_nn, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m334/334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5671 - loss: 0.7110 - val_accuracy: 0.6489 - val_loss: 0.6347
Epoch 2/50
[1m334/334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6205 - loss: 0.6485 - val_accuracy: 0.6714 - val_loss: 0.6156
Epoch 3/50
[1m334/334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6429 - loss: 0.6268 - val_accuracy: 0.6725 - val_loss: 0.6076
Epoch 4/50
[1m334/334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6640 - loss: 0.6245 - val_accuracy: 0.6789 - val_loss: 0.6042
Epoch 5/50
[1m334/334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6664 - loss: 0.6206 - val_accuracy: 0.6770 - val_loss: 0.5999
Epoch 6/50
[1m334/334[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6657 - loss: 0.6209 - val_accuracy: 0.6759 - val_loss: 0.5949
Epoch 7/50
[1m334/334[0m [32m━━━━━━━