# Measuring the performance of a DNN on preprocessed_ratio_data.csv

## Import all dependencies

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from keras import models, layers
import matplotlib.pyplot as plt

# Model imports
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv1D, Flatten, MaxPooling1D
from keras.optimizers import Adam, Nadam, RMSprop
from keras.losses import logcosh, binary_crossentropy
from keras.activations import relu, elu, sigmoid

import talos as ta
from talos.model.normalizers import lr_normalizer
from talos.model.hidden_layers import hidden_layers

## Retrieve and transform the data

In [None]:
ufc_data_location = "../ufcdata/preprocessed_data.csv"

ufc_data = pd.read_csv(ufc_data_location)

In [None]:
ufc_data

In [None]:
"""
In this version of the DNN I am just training using the numerical
columns
"""
numerical_cols = []
categorical_cols = []
for col, col_type in zip(ufc_data.dtypes.keys(), ufc_data.dtypes):
    if col_type == "float64" or col_type == "int64":
        numerical_cols.append(col)
    else:
        categorical_cols.append(col)
        
print(numerical_cols)

In [None]:
# Create the features and labels column
labels = ufc_data["Winner"]
le = LabelEncoder()
le.fit(labels)

features = ufc_data[numerical_cols]
labels = le.transform(labels)

In [None]:
labels

In [None]:
# Retrieving train and test splits
X_train, X_test, y_train, y_test = train_test_split(features, labels, train_size=0.8)
X_train = X_train.values
X_test = X_test.values
print("X_train length {}, y_train length {}".format(X_train.shape, y_train.shape))
print("X_test length {}, y_test length {}".format(X_test.shape, y_test.shape))

## Create a model

In [None]:
model = models.Sequential()
model.add(layers.Dense(128, activation='relu', input_shape=(158,)))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

## Train the model

In [None]:
history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_test, y_test))

## Evaluate the model

In [None]:
plot_training_results(history)

## Summary of DNN on preprocessed_data.csv:

- This model was trained on 158 columns without any dimensinality reduction performed.
- With the raw preprocessed data, it seems like the DNN's is not strong enough to model all of these columns because we can't acheive higher than 80% on the training data. Usually these models should be able to overfit on the training data. We will need to perform some dimensionality reduction.

# Measuring the performance of a DNN on preprocessed_ratio_data.csv

## Retrieve and transform the data

In [None]:
ufc_data_location = "../ufcdata/preprocessed_ratio_data.csv"

ufc_data = pd.read_csv(ufc_data_location)
ufc_data = ufc_data.drop(ufc_data.columns[0], axis=1)

In [None]:
ufc_data

In [None]:
"""
Retrieve all of the feature columns
"""
numerical_cols = []
categorical_cols = []
for col, col_type in zip(ufc_data.dtypes.keys(), ufc_data.dtypes):
    if col_type == "float64" or col_type == "int64":
        numerical_cols.append(col)
    else:
        categorical_cols.append(col)
        
print(len(numerical_cols))

In [None]:
# Create the features and labels column
labels = ufc_data["Winner"]
le = LabelEncoder()
le.fit(labels)

features = ufc_data[numerical_cols]
labels = le.transform(labels)

In [None]:
# Retrieving train and test splits
X_train, X_test, y_train, y_test = train_test_split(features, labels, train_size=0.8)
X_train = X_train.values
X_test = X_test.values

print("X_train length {}, y_train length {}".format(X_train.shape, y_train.shape))
print("X_test length {}, y_test length {}".format(X_test.shape, y_test.shape))

## Create a model

In [None]:
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(122,)))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

## Train the model

In [None]:
history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_test, y_test))

## Evaluate the model

In [None]:
plot_training_results(history)

## Model Builder

In [1]:
def get_dnn(x_train, y_train, x_val, y_val, params):
    
    model = Sequential()
    # Input Layer
    model.add(Dense(params["first_neuron"], 
                    activation=params['activation'], 
                    input_dim=X_train.shape[1]))
    
    model.add(Dropout(params['dropout']))
    
    # Hidden Layers
    hidden_layers(model, params, 1)
    
    # Output Layers
    model.add(Dense(1, activation=params['last_activation']))
    
    model.compile(loss=params['losses'],
                  optimizer=params['optimizer'](lr=lr_normalizer(params['lr'], params['optimizer'])), 
                  metrics=['accuracy'])
                  
    history = model.fit(x_train, y_train,
                       validation_data=[x_val, y_val],
                       batch_size=params['batch_size'],
                       epochs=params['epochs'],
                       verbose=0)
    
    #print(model.summary())

    return history, model

# Helper functions

In [None]:
def plot_training_results(history_obj):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(1, len(acc) + 1)

    fig, (accuracy_axis, loss_axis) = plt.subplots(1, 2, figsize=(15, 5))

    accuracy_axis.plot(epochs, acc, 'bo', label='Training acc')
    accuracy_axis.plot(epochs, val_acc, 'b', label='Validation acc')
    accuracy_axis.set_title('Training and validation accuracy')
    accuracy_axis.set_ylabel('Accuracy')
    accuracy_axis.set_xlabel('Epoch')
    accuracy_axis.legend()

    loss_axis.plot(epochs, loss, 'bo', label='Training loss')
    loss_axis.plot(epochs, val_loss, 'b', label='Validation loss')
    loss_axis.set_title('Training and validation loss')
    loss_axis.set_ylim(0, 3)
    loss_axis.set_ylabel('Accuracy')
    loss_axis.set_xlabel('Epoch')
    loss_axis.legend()
    fig.show()