# Bonus: Neural Network on Small Training Set

In [1]:
from IPython.display import HTML
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

Out of plain curiosity, we wondered how a simple neural network would perform on our problem (Everyone wants to try deep learning nowadays). Because our training set is so small, we do not believe that the neural network will outperform any of our models from the previous part. In fact, it might overfit to the training set and perform worse than our other models. This notebook is just to experiment around with neural networks and see its performance on a small dataset. We will only look at the outcome at 90 minutes and Softmax in the test set, since it would be pretty complicated to come up with a scheme for the WC Playoff Model configuration, especially since there are so few WC playoff matches in our dataset.

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

%matplotlib inline

import keras 
from keras.models import Sequential
from keras.layers import Dense
from keras import regularizers
from keras.layers import Dropout
from keras.optimizers import Adam
from keras.utils import plot_model


Using TensorFlow backend.


In [3]:
train = pd.read_csv('../data/cleaned/train_final.csv')
test = pd.read_csv('../data/cleaned/test_final.csv')

In [4]:
columns = ['overall_diff', 'attack_away_defence_home_diff', 'attack_diff',
           'attack_home_defence_away_diff', 'defence_diff', 'midfield_diff',
           'prestige_diff', 'growth_diff', 'full_age_diff',
           'start_age_diff', 'value_euros_millions_diff',
           'wage_euros_thousands_diff', 'goalkeeper_overall_diff',
           'bup_dribbling_diff', 'bup_passing_diff', 'bup_speed_diff',
           'cc_crossing_diff', 'cc_passing_diff', 'cc_shooting_diff',
           'd_aggresion_diff', 'd_pressure_diff', 'd_width_diff', 
           'gdp_diff', 'is_home', 'raw_gdp_diff', 
           'win_momentum_past_1_games_diff', 'lose_momentum_past_1_games_diff',
           'win_momentum_past_2_games_diff', 'lose_momentum_past_2_games_diff',
           'win_momentum_past_3_games_diff', 'lose_momentum_past_3_games_diff',
           'win_momentum_past_4_games_diff', 'lose_momentum_past_4_games_diff',
           'win_momentum_past_5_games_diff', 'lose_momentum_past_5_games_diff',
           
]

train = train[columns + ['home_win']]
test = test[columns  + ['home_win', 'home_win_no_pk']]


In [5]:
#Same seed as baseline to ensure same train and validation sets so that comparisons are valid.
np.random.seed(14)
X_train, X_valid = train_test_split(train, test_size = 0.2)
y_train = X_train['home_win'].ravel()
X_train = X_train.drop(['home_win'], axis = 1).values
y_valid= X_valid['home_win'].ravel()
X_valid = X_valid.drop(['home_win'], axis = 1).values
y_test = test['home_win'].ravel()
y_test_no_pk = test['home_win_no_pk'].ravel()
X_test = test.drop(['home_win', 'home_win_no_pk'], axis = 1).values


X_train_mean = X_train.mean(axis = 0)
X_train_std = X_train.std(axis = 0)

X_train = (X_train - X_train_mean) / (X_train_std)
X_valid = (X_valid - X_train_mean) / (X_train_std)
X_test = (X_test - X_train_mean) / (X_train_std)

#keras.utils.to_categorical doesn't work with negative numbers
y_train = y_train + 1
y_valid = y_valid + 1
y_test_no_pk = y_test + 1

y_train = keras.utils.to_categorical(y_train)
y_valid = keras.utils.to_categorical(y_valid)
y_test_no_pk = keras.utils.to_categorical(y_test_no_pk)


Our simple neural network will have a total of 3 hidden node layers, with 15 nodes in each layer. We will add regularization on each layer and also add drop out layers to try to prevent overfitting.

In [6]:
model = Sequential()
model.add(Dense(15, activation="relu", input_dim = X_train.shape[1], kernel_regularizer=regularizers.l2(1e-6)))
model.add(Dropout(0.5))
model.add(Dense(15, activation="relu", kernel_regularizer = regularizers.l2(1e-6)))
model.add(Dropout(0.5))
model.add(Dense(15, activation="relu", activity_regularizer = regularizers.l2(1e-6)))
model.add(Dropout(0.5))
model.add(Dense(3, activation="sigmoid", kernel_regularizer = regularizers.l2(1e-6)))

print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 15)                540       
_________________________________________________________________
dropout_1 (Dropout)          (None, 15)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 15)                240       
_________________________________________________________________
dropout_2 (Dropout)          (None, 15)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 15)                240       
_________________________________________________________________
dropout_3 (Dropout)          (None, 15)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 48        
Total para

In [7]:
adam = Adam(amsgrad= True)
model.compile(loss= 'categorical_crossentropy', optimizer = adam, metrics=['accuracy'])
model.fit(X_train, y_train, epochs = 200, batch_size = 64, validation_data = (X_valid, y_valid), verbose = False)

<keras.callbacks.History at 0x103368c18>

In [8]:
print("Neural Network Train Accuracy: {}".format(model.evaluate(X_train, y_train, verbose = 0)[1]))
print("Neural Network Validation Accuracy: {}".format(model.evaluate(X_valid, y_valid, verbose = 0)[1]))


Neural Network Train Accuracy: 0.5570204350888611
Neural Network Validation Accuracy: 0.5447368411641372


In [9]:
y_pred_softmax = [np.where(np.argsort(val) == 1)[0][0]-1 if (i >= 48) & (np.argmax(val) == 1)
                           else np.argmax(val)-1 for i, val in enumerate(model.predict(X_test))] 

print("Neural Network Test Accuracy (90 Minutes): {}".format(model.evaluate(X_test, y_test_no_pk, verbose = 0)[1]))
print("Neural Network Test Accuracy (Softmax): {}".format(accuracy_score(y_test, y_pred_softmax)))

Neural Network Test Accuracy (90 Minutes): 0.625
Neural Network Test Accuracy (Softmax): 0.625


Surprisngly, the neural network actually did better than we thought it would. This most likely stems from the regularization we added as well as the dropout layers. However, the neural network did not perform as well as our best model.

We have thus shown that neural networks do not really help improve accuracy in this small dataset; it shows that we do not really need that complex of a model in this problem.