In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

#  Import and read the charity_data.csv.
import pandas as pd 
nba_df = pd.read_csv("merged_df3.5.csv")
nba_df.head()

Unnamed: 0,game_date_est,game_id,home_team,away_team,home_team_wins,pts_home,pts_away,pts_comb_actual,book_name_over,total1_over,over_odds,book_name_under,total1_under,under_odds,over_successful,under_successful
0,9/30/2017,11700001,GSW,DEN,0,102,108,210,Intertops,223.0,-110,Pinnacle Sports,222.5,108,0,1
1,9/30/2017,11700002,LAL,MIN,0,99,108,207,Pinnacle Sports,214.5,-110,Bookmaker,216.0,-102,0,1
2,11/1/2017,21700111,NYK,HOU,0,97,119,216,5Dimes,214.0,-107,Pinnacle Sports,215.5,-102,1,0
3,11/1/2017,21700107,PHI,ATL,1,119,109,228,Pinnacle Sports,209.0,-104,5Dimes,209.0,-105,1,0
4,11/1/2017,21700108,WAS,PHX,0,116,122,238,Pinnacle Sports,227.5,-105,Heritage,228.0,-105,1,0


In [2]:
# Determine the number of unique values in each column.
nba_df.nunique()

game_date_est        198
game_id             1210
home_team             30
away_team             30
home_team_wins         2
pts_home              68
pts_away              68
pts_comb_actual      106
book_name_over         9
total1_over           92
over_odds             21
book_name_under        8
total1_under          89
under_odds            22
over_successful        2
under_successful       2
dtype: int64

In [3]:
# Look at over_odds value counts
over_counts = nba_df['over_odds'].value_counts()
over_counts

-105    555
-103    138
-102    116
-110     66
-104     55
-101     48
-107     43
-108     37
-100     37
 100     26
 102     22
-115     13
-106     13
 101     12
-109      8
 103      8
 104      6
 105      4
-111      1
 106      1
 109      1
Name: over_odds, dtype: int64

In [4]:
# Choose a cutoff value and create a list of over types to be replaced
# use the variable name `over_types_to_replace`
over_types_to_replace = list(over_counts[over_counts < 10].index)

# Replace in dataframe
for over in over_types_to_replace:
    nba_df['over_odds'] = nba_df['over_odds'].replace(
        over, "Other")

# Check to make sure binning was successful
nba_df['over_odds'].value_counts()

-105     555
-103     138
-102     116
-110      66
-104      55
-101      48
-107      43
-108      37
-100      37
Other     29
100       26
102       22
-106      13
-115      13
101       12
Name: over_odds, dtype: int64

In [5]:
# Look at under_odds value counts
under_counts = nba_df['under_odds'].value_counts()
under_counts

-105    655
-103    108
-102     77
-104     63
-100     59
-110     57
-101     35
-115     25
 102     21
-107     19
 100     16
-108     15
-106     15
-109     14
 101      8
 104      6
 105      5
 103      4
 109      3
-113      2
 108      2
-111      1
Name: under_odds, dtype: int64

In [6]:
# Choose a cutoff value and create a list of under_odd types to be replaced
# use the variable name `under_types_to_replace`
under_types_to_replace = list(under_counts[under_counts < 10].index)

# Replace in dataframe
for under in under_types_to_replace:
    nba_df['under_odds'] = nba_df['under_odds'].replace(
        under, "Other")

# Check to make sure binning was successful
nba_df['under_odds'].value_counts()

-105     655
-103     108
-102      77
-104      63
-100      59
-110      57
-101      35
Other     31
-115      25
102       21
-107      19
100       16
-108      15
-106      15
-109      14
Name: under_odds, dtype: int64

In [7]:
# Convert categorical data to numeric with `pd.get_dummies`
nba_numeric = pd.get_dummies(nba_df)
nba_numeric.head()

Unnamed: 0,game_id,home_team_wins,pts_home,pts_away,pts_comb_actual,total1_over,total1_under,over_successful,under_successful,game_date_est_1/1/2018,...,under_odds_-106,under_odds_-105,under_odds_-104,under_odds_-103,under_odds_-102,under_odds_-101,under_odds_-100,under_odds_100,under_odds_102,under_odds_Other
0,11700001,0,102,108,210,223.0,222.5,0,1,0,...,0,0,0,0,0,0,0,0,0,1
1,11700002,0,99,108,207,214.5,216.0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
2,21700111,0,97,119,216,214.0,215.5,1,0,0,...,0,0,0,0,1,0,0,0,0,0
3,21700107,1,119,109,228,209.0,209.0,1,0,0,...,0,1,0,0,0,0,0,0,0,0
4,21700108,0,116,122,238,227.5,228.0,1,0,0,...,0,1,0,0,0,0,0,0,0,0


In [8]:
# Split our preprocessed data into our features and target arrays
X = nba_numeric.drop(['under_successful'], axis=1)
y = nba_numeric['over_successful']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=58)

In [9]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [10]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 10
hidden_nodes_layer2 = 8
hidden_nodes_layer3= 6

nba_model = tf.keras.models.Sequential()

# First hidden layer
nba_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer1,
             input_dim=number_input_features, activation="sigmoid"))

# Second hidden layer
nba_model.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer2, activation="sigmoid"))

# Third hidden layer
nba_model.add(tf.keras.layers.Dense(
    units=hidden_nodes_layer3, activation="sigmoid"))

# Output layer
nba_model.add(tf.keras.layers.Dense(units=1, activation="relu"))


# Check the structure of the model
nba_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                3140      
                                                                 
 dense_1 (Dense)             (None, 8)                 88        
                                                                 
 dense_2 (Dense)             (None, 6)                 54        
                                                                 
 dense_3 (Dense)             (None, 1)                 7         
                                                                 
Total params: 3,289
Trainable params: 3,289
Non-trainable params: 0
_________________________________________________________________


In [11]:
# Compile the model
nba_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [12]:
# Train the model
fit_model = nba_model.fit(X_train_scaled, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [13]:
# Evaluate the model using the test data
model_loss, model_accuracy = nba_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

10/10 - 0s - loss: 7.5995 - accuracy: 0.5017 - 116ms/epoch - 12ms/step
Loss: 7.599456310272217, Accuracy: 0.5016501545906067
