In [None]:
!pip install pymongo

In [None]:
# Initial imports
from pymongo import MongoClient
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import sklearn as skl
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
uri = 'mongodb+srv://bluelilly2812lr:a1c3l12@lillyruetho.trsirdh.mongodb.net/'
mongo = MongoClient(uri)

In [None]:
# Confirm that our new database was created
print(mongo.list_database_names())

In [None]:
# Review the database
db = mongo['basketball_data']

In [None]:
# Review the collections in our database
print(db.list_collection_names())

In [None]:
bball_stats = db['basketball_stats']

In [None]:
# query
team_stats_final_df = pd.DataFrame(bball_stats.find())

# Review the DataFrame
team_stats_final_df.head()

In [None]:
# Separate the data into labels and features
# Separate the y variable, the outcome variable (wins)
y = team_stats_final_df['wins_tot']
# Separate the X variable, the features
X = team_stats_final_df.drop(columns=['_id', 'Tm', 'TEAM', 'games_tot', 'wins_tot', 'losses_tot'])
X.head()

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# Create scaler instance
X_scaler = skl.preprocessing.StandardScaler()

# Fit the scaler
X_scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create the Keras Sequential model
nn_model = tf.keras.models.Sequential()

# Add our first Dense layer, including the input layer
nn_model.add(tf.keras.layers.Dense(units=15, activation="relu", input_dim=18))

# Add the output layer that uses a probability activation function
nn_model.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Check the structure of the Sequential model
nn_model.summary()

In [None]:
# Compile the Sequential model together and customize metrics
nn_model.compile(loss="mse", optimizer="adam", metrics=["mae"])

In [None]:
# Fit the model to the training data
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=100)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Use keras hyper tuner to fine-tune the model
# First import keras tuner
!pip install keras_tuner
import keras_tuner as kt

In [None]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=20,
        step=2), activation=activation, input_dim=18))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=20,
            step=2),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(units=1, activation="linear"))

    # Compile the model
    nn_model.compile(loss="mse", optimizer='adam', metrics=["mae"])

    return nn_model

In [None]:
tuner = kt.Hyperband(
    create_model,
    objective="val_loss",
    max_epochs=20,
    hyperband_iterations=2)

In [None]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

In [None]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

In [None]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Get predictions on the test data using the optimized model
predict_wins = best_model.predict(X_test_scaled)
predictions_df = pd.DataFrame({'Actual Wins': y_test, 'Predicted Wins': predict_wins.flatten()})
predictions_df.head()

In [None]:
plt.scatter(predictions_df['Actual Wins'], predictions_df['Predicted Wins'])
plt.plot([predictions_df['Actual Wins'].min(), predictions_df['Actual Wins'].max()],
         [predictions_df['Actual Wins'].min(), predictions_df['Actual Wins'].max()],
         linestyle='--', color='red')
plt.xlabel('Actual Wins')
plt.ylabel('Predicted Wins Using All Stats')
plt.title('Actual Wins vs Predicted Wins Using All Stats')
plt.show()