## How to use DNN:

1. Make sure to run the Model_Builder and Helper_functions cells to make those functions accessible
2. Do your thing in the pre-processing
3. Make sure to encode the labels vector as 0's and 1's using LabelEncoder
4. Call run_model(features, labels)

## Import all dependencies

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from keras import models, layers
import matplotlib.pyplot as plt

# Model imports
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv1D, Flatten, MaxPooling1D
from keras.optimizers import Adam, Nadam, RMSprop
from keras.losses import logcosh, binary_crossentropy
from keras.activations import relu, elu, sigmoid

import talos as ta
from talos.model.normalizers import lr_normalizer
from talos.model.hidden_layers import hidden_layers
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.metrics import precision_recall_fscore_support

Using TensorFlow backend.


# Measuring the performance of a DNN on preprocessed_ratio_data.csv

## Pre-process Data

In [2]:
# ufc_data_location = '../ufcdata/preprocessed_ratio_data.csv'
ufc_data_location = '../combined_data/combined_fight_data.csv'

# ufc_data = ufc_data.drop(ufc_data.columns[0], axis=1)
ufc_data = pd.read_csv(ufc_data_location)
ufc_data.drop(columns=['date', 'R_fighter', 'B_fighter', 'Referee', 'city', 'country', 'end_how'], inplace=True)

In [3]:
ufc_data

Unnamed: 0,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,...,R_Reach_cms,R_Weight_lbs,B_age,R_age,location_elevation,end_method,end_round,attendance,R_home_elevation,B_home_elevation
0,Red,False,Open Weight,1,0.0,1.0,0.0,4.00,3.00,9.00,...,,216.0,,34.0,1734.00,tko,,7800.0,1.0,146.0
1,Red,False,Open Weight,1,0.0,1.0,0.0,0.00,0.00,0.00,...,,175.0,29.0,26.0,1734.00,submission,,7800.0,27.0,1373.0
2,Red,False,Open Weight,1,0.0,0.0,0.0,,,,...,,190.0,,24.0,1734.00,submission,,7800.0,89.0,
3,Red,True,Catch Weight,1,0.0,2.0,0.0,0.50,0.50,0.00,...,,175.0,34.0,26.0,1734.00,submission,,7800.0,27.0,1.0
4,Red,False,Open Weight,1,0.0,0.0,0.0,,,,...,,216.0,24.0,34.0,1734.00,tko,,7800.0,1.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5057,Red,True,Bantamweight,5,0.0,4.0,0.0,9.20,6.00,0.20,...,162.56,135.0,31.0,32.0,182.02,tko,3.0,16083.0,331.0,
5058,Blue,False,Heavyweight,3,0.0,1.0,0.0,17.00,14.50,2.50,...,190.50,264.0,32.0,26.0,182.02,decision,3.0,16083.0,,2290.0
5059,Red,False,Bantamweight,3,0.0,0.0,0.0,,,,...,175.26,135.0,35.0,34.0,182.02,ko,2.0,16083.0,195.0,
5060,Blue,False,Featherweight,3,0.0,1.0,0.0,7.25,4.75,1.75,...,180.34,145.0,31.0,37.0,182.02,ko,1.0,16083.0,2.0,35.0


In [4]:
"""
Retrieve all of the feature columns
"""
numerical_cols = []
categorical_cols = []

for col, col_type in zip(ufc_data.dtypes.keys(), ufc_data.dtypes):
    if col_type == 'float64' or col_type == 'int64':
        numerical_cols.append(col)
    else:
        categorical_cols.append(col)
        
print(len(numerical_cols), len(categorical_cols))

140 6


In [5]:
# Create the features and labels columnin 
for col_name in categorical_cols:
    null_count = ufc_data[ufc_data[col_name].isnull()].shape[0]
    if (null_count > 0):
        ufc_data = pd.get_dummies(ufc_data, columns=[col_name])
    else:
        col_data = ufc_data[col_name]
        le = LabelEncoder().fit(col_data)
        ufc_data[col_name] = le.transform(col_data)

ufc_data = ufc_data.fillna(0)

for col_name in ufc_data.columns:
    null_count = ufc_data[ufc_data[col_name].isnull()].shape[0]
    if null_count > 0:
        print('{} has {} nulls'.format(col_name, ufc_data[ufc_data[col_name].isnull()].shape[0]))

features = ufc_data.drop(columns=['Winner']).to_numpy()
labels = ufc_data['Winner'].to_numpy()
print('Features shape {}, labels shape {}'.format(features.shape, labels.shape))

Features shape (5062, 158), labels shape (5062,)


## Train model and evaluate the results

In [11]:
results_df = run_model(features, labels)
dnn_cols = list(results_df.columns)
score_cols = ['precision', 'recall', 'fbeta_score', 'support']

new_df_data = []

for index, row_data in results_df.iterrows():
    new_row = dict()
    
    for col in dnn_cols:
        new_row[col] = row_data[col]
    
    for score_index, col in enumerate(score_cols):
        new_row[col] = scores[index][score_index]
        
    new_df_data.append(new_row)

combined_results_df = pd.DataFrame(new_df_data)

100%|██████████| 432/432 [23:42<00:00,  3.29s/it]


Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,activation,batch_size,dropout,epochs,first_neuron,hidden_layers,last_activation,losses,lr,optimizer,shapes,precision,recall,fbeta_score,support
0,10,0.612020,0.677196,0.625748,0.661645,<function relu at 0x7f34028daea0>,64,0.0,10,64,1,<function sigmoid at 0x7f34028ec048>,<function binary_crossentropy at 0x7f3402906950>,0.01,<class 'keras.optimizers.Adam'>,brick,0.717391,0.886846,0.793169,
1,10,0.702044,0.606120,0.699864,0.589281,<function relu at 0x7f34028daea0>,64,0.0,10,64,1,<function sigmoid at 0x7f34028ec048>,<function binary_crossentropy at 0x7f3402906950>,0.01,<class 'keras.optimizers.Adam'>,funnel,0.694444,0.777935,0.733823,
2,10,0.561994,0.700888,0.549824,0.707335,<function relu at 0x7f34028daea0>,64,0.0,10,64,1,<function sigmoid at 0x7f34028ec048>,<function binary_crossentropy at 0x7f3402906950>,0.10,<class 'keras.optimizers.Adam'>,brick,0.724444,0.922207,0.811450,
3,10,0.557733,0.697927,0.557859,0.682638,<function relu at 0x7f34028daea0>,64,0.0,10,64,1,<function sigmoid at 0x7f34028ec048>,<function binary_crossentropy at 0x7f3402906950>,0.10,<class 'keras.optimizers.Adam'>,funnel,0.697927,1.000000,0.822093,
4,10,0.595047,0.682132,0.383725,0.825883,<function relu at 0x7f34028daea0>,64,0.0,10,64,1,<function sigmoid at 0x7f34028ec048>,<function binary_crossentropy at 0x7f3402906950>,1.00,<class 'keras.optimizers.Adam'>,brick,0.771509,0.773692,0.772599,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,25,0.640354,0.690030,0.685650,0.637935,<function relu at 0x7f34028daea0>,128,0.3,25,128,2,<function sigmoid at 0x7f34028ec048>,<function binary_crossentropy at 0x7f3402906950>,0.01,<class 'keras.optimizers.Adam'>,funnel,0.699492,0.974540,0.814421,
428,25,0.553324,0.691017,0.567422,0.694739,<function relu at 0x7f34028daea0>,128,0.3,25,128,2,<function sigmoid at 0x7f34028ec048>,<function binary_crossentropy at 0x7f3402906950>,0.10,<class 'keras.optimizers.Adam'>,brick,0.716484,0.922207,0.806432,
429,25,0.564983,0.697927,0.597598,0.684120,<function relu at 0x7f34028daea0>,128,0.3,25,128,2,<function sigmoid at 0x7f34028ec048>,<function binary_crossentropy at 0x7f3402906950>,0.10,<class 'keras.optimizers.Adam'>,funnel,0.697927,1.000000,0.822093,
430,25,0.585805,0.681145,0.407623,0.807113,<function relu at 0x7f34028daea0>,128,0.3,25,128,2,<function sigmoid at 0x7f34028ec048>,<function binary_crossentropy at 0x7f3402906950>,1.00,<class 'keras.optimizers.Adam'>,brick,0.788288,0.742574,0.764749,


In [14]:
top_10_val_accuracy = combined_results_df.sort_values(by=['val_accuracy'], ascending=False).head(5)
top_10_val_accuracy.drop(columns=['activation', 'last_activation', 'optimizer', 'support', 'losses'])

Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,batch_size,dropout,epochs,first_neuron,hidden_layers,lr,shapes,precision,recall,fbeta_score
17,10,0.538036,0.722606,0.45972,0.777476,64,0.0,10,128,1,1.0,funnel,0.777344,0.844413,0.809492
134,25,0.542964,0.721619,0.477686,0.750803,64,0.1,25,128,1,0.1,brick,0.758202,0.882603,0.815686
335,15,0.561013,0.719645,0.436305,0.789331,128,0.1,15,128,2,1.0,funnel,0.778656,0.835926,0.806276
233,10,0.540647,0.718657,0.465869,0.768338,128,0.0,10,128,1,1.0,funnel,0.763092,0.865629,0.811133
316,15,0.556638,0.718657,0.424348,0.79674,128,0.1,15,64,1,1.0,brick,0.757317,0.878359,0.81336


In [15]:
top_10_val = combined_results_df.sort_values(by=['accuracy'], ascending=False).head(5)
top_10_val.drop(columns=['activation', 'last_activation', 'optimizer', 'support', 'losses'])

Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,batch_size,dropout,epochs,first_neuron,hidden_layers,lr,shapes,precision,recall,fbeta_score
286,25,1.757576,0.672261,0.024089,0.990121,128,0.0,25,128,2,1.0,brick,0.782805,0.734088,0.757664
70,25,1.810516,0.707799,0.024872,0.988886,64,0.0,25,128,2,1.0,brick,0.773635,0.821782,0.796982
58,25,1.616376,0.673248,0.037758,0.986663,64,0.0,25,64,2,1.0,brick,0.766289,0.765205,0.765747
280,25,0.97678,0.683119,0.078182,0.983947,128,0.0,25,128,1,1.0,brick,0.772599,0.773692,0.773145
274,25,1.349429,0.657453,0.068559,0.979748,128,0.0,25,64,2,1.0,brick,0.755682,0.752475,0.754075


## Model Builder

In [7]:
scores = []

def get_dnn(x_train, y_train, x_val, y_val, params):
    
    model = Sequential()
    # Input Layer
    model.add(Dense(params['first_neuron'], 
                    activation=params['activation'], 
                    input_dim=x_train.shape[1]))
    
    model.add(Dropout(params['dropout']))
    
    # Hidden Layers
    hidden_layers(model, params, 1)
    
    # Output Layers
    model.add(Dense(1, activation=params['last_activation']))
    
    model.compile(
        loss=params['losses'],
        optimizer=params['optimizer'](lr=lr_normalizer(params['lr'], 
        params['optimizer'])), 
        metrics=['accuracy']
    )
                  
    history = model.fit(x_train, y_train,
                       validation_data=[x_val, y_val],
                       batch_size=params['batch_size'],
                       epochs=params['epochs'],
                       verbose=0)
    
    
    
    score = precision_recall_fscore_support(y_val, model.predict_classes(x_val), average='binary')
    scores.append(score)

    return history, model

In [10]:
def run_model(features, labels):
    # Define hyperparameters to use in Grid Search
    dnn_params = {
         'lr': [0.01, 0.1, 1],
         'first_neuron': [64, 128],
         'hidden_layers': [1, 2],
         'batch_size': [64, 128],
         'epochs': [10, 15, 25],
         'dropout': [0, 0.1, 0.3],
         'optimizer': [Adam],
         'shapes':['brick', 'funnel'],
         'losses': [binary_crossentropy],
         'activation': [relu],
         'last_activation': [sigmoid]
    }

    new_features, new_labels = shuffle(np.array(features), labels)
    X_train, X_test, y_train, y_test = train_test_split(new_features, new_labels, random_state=0, train_size=0.80)
    scaler = StandardScaler().fit(X_train)
    
    
    # Create the Neural Network
    dnn_model = ta.Scan(
        x=scaler.transform(X_train),
        y=y_train,
        model=get_dnn,
        params=dnn_params,
        experiment_name='Winner_Predictor',
        x_val=scaler.transform(X_test),
        y_val=y_test,
    )
    
    return dnn_model.data

# Helper functions

In [9]:
def plot_training_results(history_obj):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(1, len(acc) + 1)

    fig, (accuracy_axis, loss_axis) = plt.subplots(1, 2, figsize=(15, 5))

    accuracy_axis.plot(epochs, acc, 'bo', label='Training acc')
    accuracy_axis.plot(epochs, val_acc, 'b', label='Validation acc')
    accuracy_axis.set_title('Training and validation accuracy')
    accuracy_axis.set_ylabel('Accuracy')
    accuracy_axis.set_xlabel('Epoch')
    accuracy_axis.legend()

    loss_axis.plot(epochs, loss, 'bo', label='Training loss')
    loss_axis.plot(epochs, val_loss, 'b', label='Validation loss')
    loss_axis.set_title('Training and validation loss')
    loss_axis.set_ylim(0, 3)
    loss_axis.set_ylabel('Accuracy')
    loss_axis.set_xlabel('Epoch')
    loss_axis.legend()
    fig.show()