In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Input
from keras.optimizers import Adam
import os

# Note: Make sure to replace '/path/to/save/model' and '/path/to/norm_params.csv' with actual paths where you want to save your model and normalization parameters.

In [4]:
# Load and combine datasets
dataset_path = "datasets/nba-shot-chart-dataset-2000-2024"
file_paths = [os.path.join(dataset_path, file_name) for file_name in os.listdir(dataset_path) if file_name.endswith('.csv')]
print("Number of dataset files: ", len(file_paths))

# Load and combine datasets dynamically
dfs = [pd.read_csv(file_path) for file_path in file_paths]
combined_df = pd.concat(dfs, ignore_index=True)
print("Total number dataset rows: ", combined_df.shape[0])

Number of dataset files:  4449
Total number dataset rows:  4613003


In [5]:
# Data cleaning and preprocessing
cleaned_df = combined_df.dropna(subset=['made', 'shotX', 'shotY'])
cleaned_df['shotX'] = (cleaned_df['shotX'] - cleaned_df['shotX'].mean()) / cleaned_df['shotX'].std()
cleaned_df['shotY'] = (cleaned_df['shotY'] - cleaned_df['shotY'].mean()) / cleaned_df['shotY'].std()
cleaned_df['made'] = cleaned_df['made'].astype(int)


In [6]:
# Global normalization
x_mean = cleaned_df['shotX'].mean()
x_std = cleaned_df['shotX'].std()
y_mean = cleaned_df['shotY'].mean()
y_std = cleaned_df['shotY'].std()

cleaned_df['shotX'] = (cleaned_df['shotX'] - x_mean) / x_std
cleaned_df['shotY'] = (cleaned_df['shotY'] - y_mean) / y_std
cleaned_df['made'] = cleaned_df['made'].astype(int)



In [7]:
# Save normalization parameters for use in prediction script
norm_params = {'x_mean': x_mean, 'x_std': x_std, 'y_mean': y_mean, 'y_std': y_std}
norm_params_df = pd.DataFrame(norm_params, index=[0])
norm_params_df.to_csv('norm_params.csv', index=False)

In [8]:
# Preparing data for model
X = cleaned_df[['shotX', 'shotY']]
y = cleaned_df['made']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
# Model definition
# print(X_train)

from keras.layers import Dropout

model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Model compilation
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])



In [31]:
# Model training
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m92261/92261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 665us/step - accuracy: 0.6149 - loss: 0.6650 - val_accuracy: 0.6172 - val_loss: 0.6622
Epoch 2/10
[1m92261/92261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 596us/step - accuracy: 0.6167 - loss: 0.6632 - val_accuracy: 0.6178 - val_loss: 0.6621
Epoch 3/10
[1m92261/92261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 608us/step - accuracy: 0.6173 - loss: 0.6627 - val_accuracy: 0.6178 - val_loss: 0.6620
Epoch 4/10
[1m92261/92261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 570us/step - accuracy: 0.6169 - loss: 0.6630 - val_accuracy: 0.6158 - val_loss: 0.6627
Epoch 5/10
[1m92261/92261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 586us/step - accuracy: 0.6172 - loss: 0.6629 - val_accuracy: 0.6169 - val_loss: 0.6622
Epoch 6/10
[1m92261/92261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 594us/step - accuracy: 0.6174 - loss: 0.6629 - val_accuracy: 0.6174 - val

In [20]:
# Model evaluation
evaluation = model.evaluate(X_test, y_test)
print(f'Test Loss: {evaluation[0]}, Test Accuracy: {evaluation[1]}')


[1m28832/28832[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 237us/step - accuracy: 0.6191 - loss: 0.6606
Test Loss: 0.660916805267334, Test Accuracy: 0.6183420419692993


In [29]:
import os
import re

def save_model_with_version_control(model, base_directory='models', base_name='nba_shot_chart_model'):
    # Create the models directory if it doesn't exist
    os.makedirs(base_directory, exist_ok=True)
    
    # Define the base model path and pattern to search for existing versions
    base_model_path = os.path.join(base_directory, f"{base_name}.keras")
    version_pattern = re.compile(rf"{base_name}_v(\d+)\.keras$")
    
    # Find the highest version number
    highest_version = 0
    for filename in os.listdir(base_directory):
        match = version_pattern.match(filename)
        if match:
            highest_version = max(highest_version, int(match.group(1)))
    
    # If the current model exists, rename it to the next version number
    if os.path.exists(base_model_path):
        new_version_path = os.path.join(base_directory, f"{base_name}_v{highest_version + 1}.keras")
        os.rename(base_model_path, new_version_path)
    
    # Save the new current model
    model.save(base_model_path)

# Usage
save_model_with_version_control(model)
