In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

In [3]:
columns = ['date', 'home', 'homeScore', 'away', 'awayScore', 'averageHomeScore', 'averageHomeDefence', 'averageAwayScore', 'averageAwayDefence']
data = pd.read_csv('./drive/MyDrive/Datasets/BasketballScores/clean_games.csv', names=columns)

In [4]:
data

Unnamed: 0,date,home,homeScore,away,awayScore,averageHomeScore,averageHomeDefence,averageAwayScore,averageAwayDefence
0,2015-11-13,Hawaii,87,Montana State,76,87.000000,76.000000,76.000000,87.000000
1,2015-11-13,Eastern Michigan,70,Vermont,50,70.000000,50.000000,50.000000,70.000000
2,2015-11-13,Yale,70,Fairfield,57,70.000000,57.000000,57.000000,70.000000
3,2015-11-13,Central Michigan,89,Jacksonville State,83,89.000000,83.000000,83.000000,89.000000
4,2015-11-13,Iowa State,68,Colorado,62,68.000000,62.000000,62.000000,68.000000
...,...,...,...,...,...,...,...,...,...
20155,2019-03-21,Charleston Southern,68,Florida Atlantic,66,80.058824,63.529412,67.777778,70.666667
20156,2019-03-21,Louisiana-Monroe,87,Kent State,77,85.062500,74.562500,71.466667,75.200000
20157,2019-03-21,Cal State Fullerton,58,Cal State Bakersfield,66,71.437500,65.500000,67.388889,72.666667
20158,2019-03-22,Southern Utah,80,Drake,73,80.666667,72.333333,70.200000,70.066667


In [5]:
# create results from home - defence
labels = data['averageHomeScore'] - data['averageHomeDefence']

In [6]:
labels

0        11.000000
1        20.000000
2        13.000000
3         6.000000
4         6.000000
           ...    
20155    16.529412
20156    10.500000
20157     5.937500
20158     8.333333
20159    10.476190
Length: 20160, dtype: float64

In [7]:
# SPLIT DATA INTO TRAIN/TEST/VALIDATE

# remove columns we wont use - date team names and scores?
data.drop(['date', 'home', 'homeScore', 'away', 'awayScore'], axis=1, inplace=True)
data.head(5)

Unnamed: 0,averageHomeScore,averageHomeDefence,averageAwayScore,averageAwayDefence
0,87.0,76.0,76.0,87.0
1,70.0,50.0,50.0,70.0
2,70.0,57.0,57.0,70.0
3,89.0,83.0,83.0,89.0
4,68.0,62.0,62.0,68.0


In [8]:
data['results'] = labels
data

Unnamed: 0,averageHomeScore,averageHomeDefence,averageAwayScore,averageAwayDefence,results
0,87.000000,76.000000,76.000000,87.000000,11.000000
1,70.000000,50.000000,50.000000,70.000000,20.000000
2,70.000000,57.000000,57.000000,70.000000,13.000000
3,89.000000,83.000000,83.000000,89.000000,6.000000
4,68.000000,62.000000,62.000000,68.000000,6.000000
...,...,...,...,...,...
20155,80.058824,63.529412,67.777778,70.666667,16.529412
20156,85.062500,74.562500,71.466667,75.200000,10.500000
20157,71.437500,65.500000,67.388889,72.666667,5.937500
20158,80.666667,72.333333,70.200000,70.066667,8.333333


In [9]:
# randomly split train 70, test 30
train = data.sample(frac=0.7, random_state=0)
# test set is just the data - train set
test = data.drop(train.index)

print('Training set size: {}'.format(len(train)))
print('Test set size: {}'.format(len(test)))

Training set size: 14112
Test set size: 6048


In [10]:
# create corresponding labels
train_labels = train.pop('results')
test_labels = test.pop('results')

In [11]:
# double check
print('Training label size: {}'.format(len(train_labels)))
print('Test label size: {}'.format(len(test_labels)))

Training label size: 14112
Test label size: 6048


In [12]:
# normalise data (X-u/sd)

# normalise training set
mean = train.mean(axis=0)
std = train.std(axis=0)
train_data = (train - mean)/std

# normalise test set
test_data = (test - mean)/std

In [13]:
train_data

Unnamed: 0,averageHomeScore,averageHomeDefence,averageAwayScore,averageAwayDefence
5951,0.463526,-0.567488,-0.108204,0.659785
15481,-1.818566,-0.045032,0.682442,0.725867
16660,1.889523,0.348884,0.335668,-0.556114
4896,-0.543132,-0.328374,-0.998081,1.066440
12061,0.537168,-0.322092,-0.274656,-0.080327
...,...,...,...,...
19703,-0.570093,0.108158,-0.774011,0.428500
14487,-0.468473,-0.451387,1.375992,0.329378
1891,0.638976,0.514743,-0.465767,0.292666
13716,0.263190,0.709628,0.212370,-0.162562


In [24]:
def create_model():
  """
  Create a sequential Neural Network with 1 input layer, 1 hidden layer and 1 output layer.

  Each dense layer has 32 neurons aside from the final output layer which has 1.

  :return model: Returns the generated model.
  """
  model = keras.models.Sequential([
    keras.layers.Dense(32, activation='relu', input_shape=[4]),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1)
  ])

  # set loss function
  loss = keras.losses.MeanSquaredError()

  # set optimizer
  opt = keras.optimizers.RMSprop()

  # set metrics for evaluation
  metrics = [
             keras.metrics.MeanAbsoluteError(),
             keras.metrics.Accuracy(),
             keras.metrics.MeanSquaredError()
  ]

  # compile model
  model.compile(loss=loss, optimizer=opt, metrics=metrics)

  return model

In [15]:
# simulate overfit model
bball_model = create_model()

In [16]:
# train model
bball_history = bball_model.fit(train_data, train_labels, epochs=100, validation_split=0.2, verbose=False)

In [19]:
# check results
# convert output to df
hist1 = pd.DataFrame(bball_history.history)

# make epoch column and set to idx
hist1['epoch'] = bball_history.epoch
hist1

Unnamed: 0,loss,mean_absolute_error,accuracy,mean_squared_error,val_loss,val_mean_absolute_error,val_accuracy,val_mean_squared_error,epoch
0,46.995575,4.490172,0.000000,46.995575,1.728603,0.902160,0.000000,1.728603,0
1,0.786715,0.544775,0.000000,0.786715,0.153058,0.266264,0.000354,0.153058,1
2,0.112935,0.205596,0.000000,0.112935,0.067415,0.210884,0.000000,0.067415,2
3,0.042518,0.141030,0.000000,0.042518,0.020115,0.097078,0.000000,0.020115,3
4,0.028097,0.118342,0.000000,0.028097,0.046999,0.183230,0.000000,0.046999,4
...,...,...,...,...,...,...,...,...,...
95,0.006553,0.058413,0.000000,0.006553,0.002322,0.037448,0.000000,0.002322,95
96,0.006430,0.055267,0.000000,0.006430,0.003527,0.043827,0.000000,0.003527,96
97,0.006523,0.056552,0.000177,0.006523,0.000180,0.010038,0.000000,0.000180,97
98,0.006476,0.054479,0.000000,0.006476,0.007730,0.072803,0.000000,0.007730,98


In [22]:
# compare with test set
_, bball_test_mae, _, _ = bball_model.evaluate(test_data, test_labels)
print('Original bball model MAE: {}'.format(bball_test_mae))

Original bball model MAE: 0.02979070134460926


In [28]:
# COMPARE DIFFERENT NUMBER OF NEURONS

# 24 neurons
def create_model_24N():
  """
  Create a sequential Neural Network with 1 input layer, 1 hidden layer and 1 output layer.

  Each dense layer has 24 neurons aside from the final output layer which has 1.

  :return model: Returns the generated model.
  """
  model = keras.models.Sequential([
    keras.layers.Dense(24, activation='relu', input_shape=[4]),
    keras.layers.Dense(24, activation='relu'),
    keras.layers.Dense(1)
  ])

  # set loss function
  loss = keras.losses.MeanSquaredError()

  # set optimizer
  opt = keras.optimizers.RMSprop()

  # set metrics for evaluation
  metrics = [
             keras.metrics.MeanAbsoluteError(),
             keras.metrics.Accuracy(),
             keras.metrics.MeanSquaredError()
  ]

  # compile model
  model.compile(loss=loss, optimizer=opt, metrics=metrics)

  return model

In [29]:
# 12 neurons
def create_model_12N():
  """
  Create a sequential Neural Network with 1 input layer, 1 hidden layer and 1 output layer.

  Each dense layer has 12 neurons aside from the final output layer which has 1.

  :return model: Returns the generated model.
  """
  model = keras.models.Sequential([
    keras.layers.Dense(12, activation='relu', input_shape=[4]),
    keras.layers.Dense(12, activation='relu'),
    keras.layers.Dense(1)
  ])

  # set loss function
  loss = keras.losses.MeanSquaredError()

  # set optimizer
  opt = keras.optimizers.RMSprop()

  # set metrics for evaluation
  metrics = [
             keras.metrics.MeanAbsoluteError(),
             keras.metrics.Accuracy(),
             keras.metrics.MeanSquaredError()
  ]

  # compile model
  model.compile(loss=loss, optimizer=opt, metrics=metrics)

  return model

In [30]:
# 6 neurons
def create_model_8N():
  """
  Create a sequential Neural Network with 1 input layer, 1 hidden layer and 1 output layer.

  Each dense layer has 8 neurons aside from the final output layer which has 1.

  :return model: Returns the generated model.
  """
  model = keras.models.Sequential([
    keras.layers.Dense(8, activation='relu', input_shape=[4]),
    keras.layers.Dense(8, activation='relu'),
    keras.layers.Dense(1)
  ])

  # set loss function
  loss = keras.losses.MeanSquaredError()

  # set optimizer
  opt = keras.optimizers.RMSprop()

  # set metrics for evaluation
  metrics = [
             keras.metrics.MeanAbsoluteError(),
             keras.metrics.Accuracy(),
             keras.metrics.MeanSquaredError()
  ]

  # compile model
  model.compile(loss=loss, optimizer=opt, metrics=metrics)

  return model

In [31]:
# train 24, 12, 8 models
m24 = create_model_24N()
m24_history = m24.fit(train_data, train_labels, epochs=100, validation_split=0.2, verbose=False)

m12 = create_model_12N()
m12_history = m12.fit(train_data, train_labels, epochs=100, validation_split=0.2, verbose=False)

m8 = create_model_8N()
m8_history = m8.fit(train_data, train_labels, epochs=100, validation_split=0.2, verbose=False)

In [33]:
# get results and compare
_, m24_test_mae, _, _ = m24.evaluate(test_data, test_labels, verbose=False)
print('24N model MAE: {}'.format(m24_test_mae))

_, m12_test_mae, _, _ = m12.evaluate(test_data, test_labels, verbose=False)
print('12N model MAE: {}'.format(m12_test_mae))

_, m8_test_mae, _, _ = m8.evaluate(test_data, test_labels, verbose=False)
print('8N model MAE: {}'.format(m8_test_mae))

print('Original bball model MAE: {}'.format(bball_test_mae))

24N model MAE: 0.024212807416915894
12N model MAE: 0.01603236049413681
8N model MAE: 0.06296402961015701
Original bball model MAE: 0.02979070134460926


In [41]:
# 12N model has lowest MAE

# COMPARE ACTIVATION FUNCTIONS

# sigmoid act
def create_model_sigmoid():
  """
  Create a sequential Neural Network with 1 input layer, 1 hidden layer and 1 output layer.

  Each dense layer has 12 neurons aside from the final output layer which has 1.

  :return model: Returns the generated model.
  """
  model = keras.models.Sequential([
    keras.layers.Dense(12, activation='sigmoid', input_shape=[4]),
    keras.layers.Dense(12, activation='sigmoid'),
    keras.layers.Dense(1)
  ])

  # set loss function
  loss = keras.losses.MeanSquaredError()

  # set optimizer
  opt = keras.optimizers.RMSprop()

  # set metrics for evaluation
  metrics = [
             keras.metrics.MeanAbsoluteError(),
             keras.metrics.Accuracy(),
             keras.metrics.MeanSquaredError()
  ]

  # compile model
  model.compile(loss=loss, optimizer=opt, metrics=metrics)

  return model

In [42]:
# softmax act
def create_model_softmax():
  """
  Create a sequential Neural Network with 1 input layer, 1 hidden layer and 1 output layer.

  Each dense layer has 12 neurons aside from the final output layer which has 1.

  :return model: Returns the generated model.
  """
  model = keras.models.Sequential([
    keras.layers.Dense(12, activation='softmax', input_shape=[4]),
    keras.layers.Dense(12, activation='softmax'),
    keras.layers.Dense(1)
  ])

  # set loss function
  loss = keras.losses.MeanSquaredError()

  # set optimizer
  opt = keras.optimizers.RMSprop()

  # set metrics for evaluation
  metrics = [
             keras.metrics.MeanAbsoluteError(),
             keras.metrics.Accuracy(),
             keras.metrics.MeanSquaredError()
  ]

  # compile model
  model.compile(loss=loss, optimizer=opt, metrics=metrics)

  return model

In [43]:
# train models
sigmoid_model = create_model_sigmoid()
sigmoid_history = sigmoid_model.fit(train_data, train_labels, epochs=100, validation_split=0.2, verbose=False)

softmax_model = create_model_softmax()
softmax_history = softmax_model.fit(train_data, train_labels, epochs=100, validation_split=0.2, verbose=False)


In [44]:
# get results and compare
_, sigmoid_test_mae, _, _ = sigmoid_model.evaluate(test_data, test_labels, verbose=False)
print('Sigmoid model with 12N MAE: {}'.format(sigmoid_test_mae))

_, softmax_test_mae, _, _ = softmax_model.evaluate(test_data, test_labels, verbose=False)
print('Softmax model with 12N MAE: {}'.format(softmax_test_mae))

print('Relu model with 12N MAE: {}'.format(m12_test_mae))

Sigmoid model with 12N MAE: 0.02881545200943947
Softmax model with 12N MAE: 0.18812431395053864
Relu model with 12N MAE: 0.01603236049413681


In [45]:
# relu with 12N seems the best

# COMPARE OPTIMIZERS

# SGD
def create_model_SGD():
  """
  Create a sequential Neural Network with 1 input layer, 1 hidden layer and 1 output layer.

  Each dense layer has 12 neurons aside from the final output layer which has 1.

  :return model: Returns the generated model.
  """
  model = keras.models.Sequential([
    keras.layers.Dense(12, activation='relu', input_shape=[4]),
    keras.layers.Dense(12, activation='relu'),
    keras.layers.Dense(1)
  ])

  # set loss function
  loss = keras.losses.MeanSquaredError()

  # set optimizer
  opt = keras.optimizers.SGD()

  # set metrics for evaluation
  metrics = [
             keras.metrics.MeanAbsoluteError(),
             keras.metrics.Accuracy(),
             keras.metrics.MeanSquaredError()
  ]

  # compile model
  model.compile(loss=loss, optimizer=opt, metrics=metrics)

  return model

In [46]:
# adam opt
def create_model_adam():
  """
  Create a sequential Neural Network with 1 input layer, 1 hidden layer and 1 output layer.

  Each dense layer has 12 neurons aside from the final output layer which has 1.

  :return model: Returns the generated model.
  """
  model = keras.models.Sequential([
    keras.layers.Dense(12, activation='relu', input_shape=[4]),
    keras.layers.Dense(12, activation='relu'),
    keras.layers.Dense(1)
  ])

  # set loss function
  loss = keras.losses.MeanSquaredError()

  # set optimizer
  opt = keras.optimizers.Adam()

  # set metrics for evaluation
  metrics = [
             keras.metrics.MeanAbsoluteError(),
             keras.metrics.Accuracy(),
             keras.metrics.MeanSquaredError()
  ]

  # compile model
  model.compile(loss=loss, optimizer=opt, metrics=metrics)

  return model

In [47]:
# adamax opt
def create_model_adamax():
  """
  Create a sequential Neural Network with 1 input layer, 1 hidden layer and 1 output layer.

  Each dense layer has 12 neurons aside from the final output layer which has 1.

  :return model: Returns the generated model.
  """
  model = keras.models.Sequential([
    keras.layers.Dense(12, activation='relu', input_shape=[4]),
    keras.layers.Dense(12, activation='relu'),
    keras.layers.Dense(1)
  ])

  # set loss function
  loss = keras.losses.MeanSquaredError()

  # set optimizer
  opt = keras.optimizers.Adamax()

  # set metrics for evaluation
  metrics = [
             keras.metrics.MeanAbsoluteError(),
             keras.metrics.Accuracy(),
             keras.metrics.MeanSquaredError()
  ]

  # compile model
  model.compile(loss=loss, optimizer=opt, metrics=metrics)

  return model

In [48]:
# train models
sgd_model = create_model_SGD()
sgd_history = sgd_model.fit(train_data, train_labels, epochs=100, validation_split=0.2, verbose=False)

adam_model = create_model_adam()
adam_history = adam_model.fit(train_data, train_labels, epochs=100, validation_split=0.2, verbose=False)

adamax_model = create_model_adamax()
adamax_history = adamax_model.fit(train_data, train_labels, epochs=100, validation_split=0.2, verbose=False)

In [51]:
# get results and compare
_, sgd_mae, _, _ = sgd_model.evaluate(test_data, test_labels, verbose=False)
print('SGD with 12N MAE: {}'.format(sgd_mae))

_, adam_mae, _, _ = adam_model.evaluate(test_data, test_labels, verbose=False)
print('Adam with 12N MAE: {}'.format(adam_mae))

_, adamax_mae, _, _ = adamax_model.evaluate(test_data, test_labels, verbose=False)
print('Adamax with 12N MAE: {}'.format(adamax_mae))

print('RMSProp with 12N MAE: {}'.format(m12_test_mae))

SGD with 12N MAE: 0.044501692056655884
Adam with 12N MAE: 0.015705224126577377
Adamax with 12N MAE: 0.0027221147902309895
RMSProp with 12N MAE: 0.01603236049413681


In [52]:
# adamax with 12N and relu act is best

# export best model as h5 model
adamax_model.save('bball_model.h5')

# to download
# files.download('bball_model.h5)