In [2]:
from keras.layers import Dense, Dropout, LSTM, Bidirectional, Embedding
from keras.models import Sequential
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
import time

%matplotlib inline
import matplotlib.pyplot as plt
from scipy import stats

# use seaborn plotting defaults
import seaborn as sns; sns.set()

import warnings
warnings.filterwarnings('ignore')


In [3]:
df = pd.read_csv('../FeatureEngineering/MetaData/data6_&_odds.csv')
df.dropna(inplace=True)

In [4]:
train_data = df.loc[(df.season <= 2013) & (df.season >= 2007)]
valid_data = df.loc[(df.season > 2013) & (df.season < 2016)]
test_data = df.loc[df.season >= 2016]
full_train_data = pd.concat([train_data, valid_data], axis=0)

X, y = train_data.drop(columns=['home_team_wins']), train_data.home_team_wins
valid_X, valid_y = valid_data.drop(columns=['home_team_wins']), valid_data.home_team_wins
test_X, test_y = test_data.drop(columns=['home_team_wins']), test_data.home_team_wins

# Split our data
X_train, y_train = train_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), train_data.home_team_wins
X_val, y_val = valid_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), valid_data.home_team_wins
X_test, y_test = test_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), test_data.home_team_wins


In [5]:
arr_X_train = X_train.to_numpy()
arr_X_train = arr_X_train.reshape(arr_X_train.shape[0], arr_X_train.shape[1], 1)

arr_X_val = X_val.to_numpy()
arr_X_val = arr_X_val.reshape(arr_X_val.shape[0], arr_X_val.shape[1], 1)



### Making a LSTM model

### 3X sigmoid

In [17]:
model = Sequential()
model.add(LSTM(90, activation='sigmoid', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(90, activation='sigmoid', return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
                optimizer='rmsprop',
                metrics=['accuracy'])




In [48]:
# model = get_model(input_shape=(100,1))

In [None]:
model.fit(arr_X_train, y_train, epochs=100, batch_size=32)


In [10]:
# evaluate the keras model
scores = model.evaluate(arr_X_val, y_val, verbose=0)
print('Accuracy: %.2f' % (scores[1]*100))

Accuracy: 68.12


We are testing different parameters for the LSTM model

### Tanh, relu, sigmoid

In [19]:
model = Sequential()
model.add(LSTM(100, activation='tanh', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(100, activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

In [None]:
model.fit(arr_X_train, y_train, epochs=20, batch_size=64)


In [21]:
print(model.summary())

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_8 (LSTM)               (None, 129, 100)          40800     
                                                                 
 dropout_8 (Dropout)         (None, 129, 100)          0         
                                                                 
 lstm_9 (LSTM)               (None, 129, 100)          80400     
                                                                 
 dropout_9 (Dropout)         (None, 129, 100)          0         
                                                                 
 dense_4 (Dense)             (None, 129, 1)            101       
                                                                 
Total params: 121,301
Trainable params: 121,301
Non-trainable params: 0
_________________________________________________________________
None


In [22]:
# evaluate the keras model
scores = model.evaluate(arr_X_val, y_val, verbose=0)
print('Accuracy: %.2f' % (scores[1]*100))

Accuracy: 68.81


### Recurrent dropout

In [32]:
model = Sequential()
model.add(LSTM(50, activation='tanh', recurrent_dropout=0.2, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50, activation='relu', recurrent_dropout=0.2, return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

In [None]:
model.fit(arr_X_train, y_train, epochs=50, batch_size=32)
print(model.summary())

In [34]:
# evaluate the keras model
scores = model.evaluate(arr_X_val, y_val, verbose=0)
print('Accuracy: %.2f' % (scores[1]*100))

Accuracy: 68.90


### Tanh, relu, sigmoid with 100 epochs

In [36]:
model = Sequential()
model.add(LSTM(50, activation='tanh', recurrent_dropout=0.2, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50, activation='relu', recurrent_dropout=0.2, return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

In [None]:
model.fit(arr_X_train, y_train, epochs=100, batch_size=32)


In [38]:
# evaluate the keras model
scores = model.evaluate(arr_X_val, y_val, verbose=0)
print('Accuracy: %.2f' % (scores[1]*100))

Accuracy: 69.69


## Test the models

In [6]:
train_data = df.loc[(df.season < 2016) & (df.season >= 2007)]
test_data = df.loc[df.season >= 2016]

X, y = train_data.drop(columns=['home_team_wins']), train_data.home_team_wins
test_X, test_y = test_data.drop(columns=['home_team_wins']), test_data.home_team_wins

# Split our data
X_train, y_train = train_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), train_data.home_team_wins
X_test, y_test = test_data.drop(columns=["game_date_est","season","game_id","home_team","visitor_team","home_team_id","visitor_team_id","home_team_wins","conference","conference_visitor"]), test_data.home_team_wins


In [None]:
arr_X_test = X_test.to_numpy()
arr_X_test = arr_X_test.reshape(arr_X_test.shape[0], arr_X_test.shape[1], 1)

### 3X Sigmoid

In [None]:
model = Sequential()
model.add(LSTM(90, activation='sigmoid', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(90, activation='sigmoid', return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
                optimizer='rmsprop',
                metrics=['accuracy'])

In [7]:
model.fit(arr_X_train, y_train, epochs=100, batch_size=32)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x244ce02ea60>

In [None]:
# evaluate the keras model
scores = model.evaluate(arr_X_test, y_test, verbose=0)
print('Accuracy: %.2f' % (scores[1]*100))

Accuracy: 65.53


### Tanh, relu, sigmoid

In [None]:
model = Sequential()
model.add(LSTM(100, activation='tanh', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(100, activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

In [None]:
model.fit(arr_X_train, y_train, epochs=20, batch_size=64)


In [None]:
# evaluate the keras model
scores = model.evaluate(arr_X_test, y_test, verbose=0)
print('Accuracy: %.2f' % (scores[1]*100))

Accuracy: 65.95


### Recurrent dropout

In [None]:
model = Sequential()
model.add(LSTM(50, activation='tanh', recurrent_dropout=0.2, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50, activation='relu', recurrent_dropout=0.2, return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

In [None]:
model.fit(arr_X_train, y_train, epochs=50, batch_size=32)


In [None]:
# evaluate the keras model
scores = model.evaluate(arr_X_test, y_test, verbose=0)
print('Accuracy: %.2f' % (scores[1]*100))

Accuracy: 66.72


### Tanh, relu, sigmoid with 100 epochs

In [None]:
model = Sequential()
model.add(LSTM(50, activation='tanh', recurrent_dropout=0.2, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50, activation='relu', recurrent_dropout=0.2, return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

In [None]:
model.fit(arr_X_train, y_train, epochs=100, batch_size=32)


In [None]:
# evaluate the keras model
scores = model.evaluate(arr_X_test, y_test, verbose=0)
print('Accuracy: %.2f' % (scores[1]*100))

Accuracy: 66.98
