In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import pandas as pd

#  Import nba data
nba_gamestats_df = pd.read_csv("Basketball_Data/curated_game_stats.csv")
print(nba_gamestats_df.head())

nba_advancedgame_df = pd.read_csv("Basketball_Data/Advanced_Game_Stats_2001-2003.csv")
nba_advancedgame_df.head()

In [None]:
#Check types for nba game stats
nba_gamestats_df.dtypes

In [None]:
#Check types for nba advanced stats
nba_advancedgame_df.dtypes

In [None]:
#Test matching dates needed for merge
nba_gamestats_df.loc[nba_gamestats_df["Date"] == nba_advancedgame_df["Date"][0]]

In [None]:
#Drop columns not needed, rename column
nba_advancedgame_df = nba_advancedgame_df.drop(["Result", "Rk", "FTr",	'3PAr',	'eFG%.1',	'FT/FGA',	'ORtg.1',	'FTr.1',	'3PAr.1',	'TS%.1',	'eFG%.2',	'FT/FGA.1'], axis=1)

nba_advancedgame_df.rename({
  "Unnamed: 4" : "Home/Away"
}, axis=1, inplace=True)


nba_advancedgame_df.head()

In [None]:
#Drop columns not needed
nba_gamestats_df = nba_gamestats_df.drop(["Unnamed: 0"], axis=1)

nba_gamestats_df.head()

In [None]:
complete_stats_df = pd.merge(nba_gamestats_df, nba_advancedgame_df, on=["Date", "Team", "Opp"])

In [None]:
complete_stats_df

In [None]:
#Transform Dataframe
complete_stats_df=complete_stats_df.astype({
  'PTS' : 'float',
  'Opponent_Points' : 'float',
  'Home/Away': 'string',
  'FG': 'float',
  'FGA': 'float',
  '2PA': 'float',
  '3P': 'float',
  '3PA': 'float',
  'Team': 'string',
  'Opp': 'string'
})

complete_stats_df.dtypes

In [None]:
#Add Column and convert home/away
complete_stats_df["Pts Difference"] =  complete_stats_df["PTS"] - complete_stats_df["Opponent_Points"]
complete_stats_df['Home/Away'] = complete_stats_df['Home/Away'].replace(['@', ''], ['Away', 'Home'])
complete_stats_df['Home/Away'].fillna(value="Home", inplace=True)

complete_stats_df.head()

In [None]:
complete_stats_df.drop(["Date"], axis=1, inplace=True)

In [None]:
for_points_df = complete_stats_df.copy()

In [None]:
complete_stats_df.loc[complete_stats_df["Pts Difference"] >= 0, "Pts Difference"] = 1

In [None]:
complete_stats_df.loc[complete_stats_df["Pts Difference"] < 0, "Pts Difference"] = 0

In [None]:
complete_stats_df.head()

# Test Model for Win/Loss Prediction

In [None]:
# Split our preprocessed data into our features and target arrays
y = complete_stats_df['Pts Difference']
X = complete_stats_df.drop(["Team", "Opp",'Pts Difference'], axis= 1)


In [None]:
#One Hot Encoding
X = pd.get_dummies(X, columns=["Home/Away"])

In [None]:
X.head()

In [None]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=42)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()


# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
#Instantiate Neural Network Model
nn_model = tf.keras.models.Sequential()

unit = 2 * X_train_scaled.shape[1]

#Dense Layers
nn_model.add(tf.keras.layers.Dense(units=unit, activation="relu", input_dim=X_train_scaled.shape[1]))
nn_model.add(tf.keras.layers.Dense(units=unit/4, activation="relu"))
# nn_model.add(tf.keras.layers.Dense(units=unit/6, activation="relu"))

#Output Layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))


In [None]:
#Summary of Neural Network
nn_model.summary()

In [None]:
#Compile the model
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

#Train model
fit_model = nn_model.fit(X_train, y_train, epochs=200)

In [None]:


model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
#Old copy of using one hot encoding on teams/opp teams
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
test_predict = X.loc[(X["Team_SAC"] == 1) & (X["Opp_LAC"] == 1)].copy()

In [None]:
scaler = StandardScaler()


# Fit the StandardScaler
X_scaler_test = scaler.fit(test_predict)

# Scale the data
X_predict = X_scaler_test.transform(X_train)
X_predict_test = X_scaler_test.transform(X_test)

In [None]:
nn_model.predict(X_predict).round()

In [None]:
pd.DataFrame(nn_model.predict(X_predict, batch_size=128).round()).value_counts()

# Test Model to Predict Point Differential


In [None]:
for_points_df

In [None]:
y_points = for_points_df['Pts Difference']
X_points = for_points_df.drop(['Pts Difference', "Team", "Opp"], axis=1)
X_points = pd.get_dummies(X_points, columns=["Home/Away"])

X_train_p, X_test_p, y_train_p, y_test_p = train_test_split(X_points,y_points, random_state=42)

# Create a StandardScaler instances
scaler_p = StandardScaler()

# Fit the StandardScaler
X_scaler_p = scaler_p.fit(X_train_p)

# Scale the data
X_train_scaled_p = X_scaler_p.transform(X_train_p)
X_test_scaled_p = X_scaler_p.transform(X_test_p)


#Instantiate Neural Network Model
points_model = tf.keras.models.Sequential()

unit = X_train_scaled_p.shape[1]

#Dense Layers
points_model.add(tf.keras.layers.Dense(units=unit, activation="relu", input_dim=X_train_scaled_p.shape[1]))
points_model.add(tf.keras.layers.Dense(units=unit, activation="relu"))

#Output Layer
points_model.add(tf.keras.layers.Dense(units=1))

In [None]:
#Summary of Neural Network
points_model.summary()

In [None]:
#Compile the model
points_model.compile(loss="mean_squared_error", optimizer="adam", metrics=["accuracy"])

#Train model
fit_model_p = points_model.fit(X_train_p, y_train_p, epochs=200)

In [None]:
model_loss_p, model_accuracy_p = points_model.evaluate(X_test_p,y_test_p,verbose=2)
print(f"Loss: {model_loss_p}, Accuracy: {model_accuracy_p}")

In [None]:
X_points
