# Solving FIFA 19 overall prediction problem implementing a **deep neural network with ReLU** and tf.Keras

In [1]:
# Tensorflow and tf.keras
import tensorflow as tf
from tensorflow import keras

#Helper libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import os
import json
import time

# Google file system
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Mounted at /gdrive


In [2]:
ATT_FILE = "/gdrive/My Drive/ANNDP_Practicas/datasets/FootballPlayerPreparedCleanAttributes.csv"
LABEL_FILE = "/gdrive/My Drive/ANNDP_Practicas/datasets/FootballPlayerOneHotEncodedClasses.csv"
RESULT_PATH = "/gdrive/My Drive/ANNDP_Practicas/results/"

TRAIN_RATE=0.8

attributes = pd.read_csv(ATT_FILE)
label = pd.read_csv(LABEL_FILE)

n_instances = attributes.shape[0]
n_train = int(n_instances*TRAIN_RATE)
n_dev = int((n_instances-n_train)/2)

x_train = attributes.values[:n_train]
t_train = label.values[:n_train]

x_dev = attributes.values[n_train:n_train+n_dev]
t_dev = label.values[n_train:n_train+n_dev]

print ("x_train:",x_train.shape)
print ("t_train:",t_train.shape)

print ("x_dev:",x_dev.shape)
print ("t_dev:",t_dev.shape)

x_train: (12897, 29)
t_train: (12897, 4)
x_dev: (1612, 29)
t_dev: (1612, 4)


## Initialization

In [3]:
INPUTS = x_train.shape[1]
OUTPUTS = t_train.shape[1]
NUM_TRAINING_EXAMPLES = int(round(x_train.shape[0]/1))
NUM_DEV_EXAMPLES = int (round (x_dev.shape[0]/1))

Some data is displayed to test correctness:

In [4]:
x_train[:5]

array([[-0.01380291,  0.56406963, -0.17794853,  1.26290753,  1.57324563,
         1.15172677, -0.61107186,  0.43573095,  1.44480111,  1.27821484,
         1.19929225,  0.96009098,  1.18057395,  1.00314372,  1.18463574,
         0.1057117 ,  0.60530368,  2.02697734,  0.64211779,  0.35338874,
         0.30583756,  0.77525724,  0.76161254,  1.28027312,  0.70892989,
         1.34499199,  0.73687179,  0.57810618,  0.54073043],
       [ 0.83555   ,  0.50300826, -0.2641142 , -0.16788561,  0.35550729,
         0.43239082, -0.80916102,  0.90022362, -0.00821652, -0.11996544,
         0.60325077,  0.69839702,  0.93752643, -0.93584912,  1.75990762,
        -0.27226214,  1.81931034,  0.417103  ,  0.08645143,  0.60769425,
        -0.73252793, -0.66109335,  0.55717963,  0.35091223,  0.38919599,
        -0.82282273,  0.50454745, -0.20433476, -0.24178182],
       [ 0.26931473,  0.4419469 ,  0.7698739 ,  0.54751096,  0.96437646,
         0.27253838,  0.31334424,  0.70115533,  0.79901549,  0.37938466,
  

In [5]:
t_train[:10]

array([[0., 0., 0., 1.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 0., 1.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 0., 1., 0.]])

In [6]:
x_dev[:5]

array([[-8.45823158e-02,  7.47253736e-01, -5.61717678e-03,
         3.43111939e-01,  6.26115809e-01,  2.72538385e-01,
        -2.14893527e-01,  3.69374856e-01,  3.14676285e-01,
         7.97746011e-02, -1.35517125e+00, -1.22069197e+00,
        -1.08786956e+00,  5.46910108e-01, -1.19863345e+00,
         7.10469856e-01,  3.45159391e-01, -1.63995866e+00,
         6.42117786e-01,  9.89152503e-01,  1.96722234e+00,
         8.28455415e-01, -4.64984898e-01, -1.91214957e-01,
        -1.04713743e-02,  8.52306829e-01,  7.94952871e-01,
         8.38919833e-01,  9.05902810e-01],
       [-5.80038179e-01, -1.57307825e+00, -2.64114201e-01,
        -7.81082670e-01, -1.33579597e+00, -5.26723783e-01,
        -1.73357712e+00, -1.55495191e+00, -7.34725338e-01,
        -5.19445520e-01,  5.18101985e-01,  6.98397022e-01,
        -6.82790363e-01, -1.50614113e+00, -7.87724964e-01,
        -1.63296799e+00,  3.45159391e-01,  5.93531461e-02,
         1.65832339e-01, -1.74463166e+00, -8.01752294e-01,
        -1.29

In [7]:
t_dev[:5]

array([[0., 1., 0., 0.],
       [1., 0., 0., 0.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 0., 0., 1.]])

## Hyperparameters

The number of hidden layers and neurons per layer must be adjusted (hyperparameters). 

In [8]:
n_epochs = 2000 #600 #700
learning_rate = 0.0001 #0.1 #0.01
batch_size = 1024 #128 #512
reg_rate = 0.002 # 0.001 0.01
dropout_rate = .05
n_neurons_per_hlayer = [25,25,25,25,25] #Cualquier número de neuronas

## Build the deep neural model

First of all, a sequential model is created. This is the one of Keras models, for full-connected feedforward neural networks, in which layers are sequentially connected. This is called the *sequential* API.

In [9]:
model = keras.Sequential(name="Feedforward_NN")

Define the deep neural network topology. The model takes as input matrix tensors with *INPUTS* columns and *batch size* rows. *InputLayer* creates a placeholder where the data is ready to feed the network. Then the hidden layers with *tanh* activation function are created. Finally, the output layer with the *softmax* activation function is added. 

In [10]:
model.add(keras.layers.InputLayer(input_shape=(INPUTS,), batch_size=None))

for neurons in n_neurons_per_hlayer:
  model.add(keras.layers.Dense(neurons, activation="relu", #tanh
            kernel_regularizer=keras.regularizers.l2(reg_rate))) 
  model.add(keras.layers.Dropout(rate=dropout_rate))

model.add(keras.layers.Dense(OUTPUTS, activation="softmax",
          kernel_regularizer=keras.regularizers.l2(reg_rate)))

model.summary()

Model: "Feedforward_NN"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 25)                750       
                                                                 
 dropout (Dropout)           (None, 25)                0         
                                                                 
 dense_1 (Dense)             (None, 25)                650       
                                                                 
 dropout_1 (Dropout)         (None, 25)                0         
                                                                 
 dense_2 (Dense)             (None, 25)                650       
                                                                 
 dropout_2 (Dropout)         (None, 25)                0         
                                                                 
 dense_3 (Dense)             (None, 25)             

In [11]:
model.layers

[<keras.layers.core.dense.Dense at 0x7ff6087f4280>,
 <keras.layers.regularization.dropout.Dropout at 0x7ff6087f4520>,
 <keras.layers.core.dense.Dense at 0x7ff6003ca850>,
 <keras.layers.regularization.dropout.Dropout at 0x7ff6003ca9a0>,
 <keras.layers.core.dense.Dense at 0x7ff6003941c0>,
 <keras.layers.regularization.dropout.Dropout at 0x7ff60038edf0>,
 <keras.layers.core.dense.Dense at 0x7ff600394640>,
 <keras.layers.regularization.dropout.Dropout at 0x7ff600394f10>,
 <keras.layers.core.dense.Dense at 0x7ff60039f550>,
 <keras.layers.regularization.dropout.Dropout at 0x7ff60039f6a0>,
 <keras.layers.core.dense.Dense at 0x7ff60039f340>]

In [12]:
for l in model.layers: print (l.name)

dense
dropout
dense_1
dropout_1
dense_2
dropout_2
dense_3
dropout_3
dense_4
dropout_4
dense_5


All the parameters of a layer can bee accessed:

In [13]:
weights, biases = model.layers[0].get_weights()
weights.shape

(29, 25)

In [14]:
biases

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [15]:
biases.shape

(25,)

# Compiling the model

Compiling the model means specifying the *loss* function (the $log-loss$,  $cross-entropy$, the sum of log-loss is a loss) and the *optimizer* (Gradient Descent) to use. Optionally, you can also specify a list of extra *metrics* (Accuracy) to compute during training and evaluation. In this case, 

In [16]:
model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Nadam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.99), #Adam
              #optimizer=tf.keras.optimizers.SGD(lr=learning_rate, momentum=0.9, nesterov=True),
              #optimizer=tf.keras.optimizers.RMSprop(learning_rate=learning_rate),
              metrics=["categorical_accuracy"])


There are several loss functions, optimizers and metrics. Full lists are available at: https://keras.io/losses/, https://keras.io/optimizers/ and https://keras.io/metrics/.


## Training and validating the model with M-BGD

Note that an **epoch** is an iteration over the entire training dataset provided.

In [None]:
import time
start = time.perf_counter()
history = model.fit(x_train, t_train, batch_size=batch_size, epochs=n_epochs, verbose=1, validation_data=(x_dev, t_dev))    
print (time.perf_counter() - start)

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

In [None]:
results=pd.DataFrame(history.history)
results.plot(figsize=(8, 5))
plt.grid(True)
plt.xlabel ("Epochs")
plt.ylabel ("Accuracy - Mean Log Loss")
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
plt.show()

Note how the learning curves now decrease. 

In [None]:
history.params

In [None]:
results[-1:]

In [None]:
print ("Accuracy for the training set: ", results.categorical_accuracy.values[-1:][0])

In [None]:
print ("Accuracy for the development test set: ", results.val_categorical_accuracy.values[-1:][0])

In [None]:
dev_predictions=model.predict(x_dev).round(2)
dev_predictions[:20]


In [None]:
dev_rounded_predictions=np.round(dev_predictions)
indices = np.argmax(dev_predictions,1)
for row, index in zip(dev_rounded_predictions, indices): row[index]=1
dev_rounded_predictions[:20]

In [None]:
t_dev[:20] #target classes

In [None]:
dev_correct_predictions = np.equal(np.argmax(dev_rounded_predictions,1),np.argmax(t_dev,1))
print (dev_correct_predictions[:30])

In [None]:
from collections import Counter
print(Counter(dev_correct_predictions))
print(Counter(dev_correct_predictions)[True]/len(dev_correct_predictions))

This model makes less mistakes than the deep-tanh neural network.   

In [None]:

RESULT_FILENAME = "result_" + time.strftime("%d%m%Y_%H%M%S") + "_TA" +  str(int(1000 * (Counter(dev_correct_predictions)[True]/len(dev_correct_predictions))))

results.plot(figsize=(8, 5))
plt.grid(True)
plt.xlabel ("Epochs")
plt.ylabel ("Accuracy - Mean Log Loss")
plt.gca().set_ylim(0, 1) # set the vertical range to [0-1]
plt.savefig(RESULT_PATH + RESULT_FILENAME + ".png")

with open(RESULT_PATH + RESULT_FILENAME + ".json", mode="w") as fp:
    json.dump({"Results":{
        "categorical_accuracy": results.categorical_accuracy.values[-1:][0],
        "loss": results.loss.values[-1:][0],
        "categorical_accuracy": results.categorical_accuracy.values[-1:][0],
        "val_loss": results.val_loss.values[-1:][0],
        "TEST_ACC": Counter(dev_correct_predictions)[True]/len(dev_correct_predictions)
    },"Hyperparameters": {
        "n_epochs": n_epochs, 
        "learning_rate": learning_rate,
        "batch_size": batch_size,
        "n_neurons_per_hlayer": n_neurons_per_hlayer,
        "optimizer": str(model.optimizer),
        "model": json.loads(model.to_json())
    }}, fp, indent=4)
    fp.close()
