## How to use DNN:

1. Make sure to run the Model_Builder and Helper_functions cells to make those functions accessible
2. Do your thing in the pre-processing
3. Make sure to encode the labels vector as 0's and 1's using LabelEncoder
4. Call run_model(features, labels)

## Import all dependencies

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from keras import models, layers
import matplotlib.pyplot as plt

# Model imports
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv1D, Flatten, MaxPooling1D
from keras.optimizers import Adam, Nadam, RMSprop
from keras.losses import logcosh, binary_crossentropy
from keras.activations import relu, elu, sigmoid

import talos as ta
from talos.model.normalizers import lr_normalizer
from talos.model.hidden_layers import hidden_layers
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.metrics import precision_recall_fscore_support

Using TensorFlow backend.


# Measuring the performance of a DNN on preprocessed_ratio_data.csv

## Pre-process Data

In [2]:
# ufc_data_location = '../ufcdata/preprocessed_ratio_data.csv'
ufc_data_location = '../combined_data/combined_fight_data.csv'

# ufc_data = ufc_data.drop(ufc_data.columns[0], axis=1)
ufc_data = pd.read_csv(ufc_data_location)
ufc_data.drop(columns=['date', 'R_fighter', 'B_fighter', 'Referee', 'city', 'country', 'end_how'], inplace=True)

In [3]:
ufc_data

Unnamed: 0,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,...,R_Reach_cms,R_Weight_lbs,B_age,R_age,location_elevation,end_method,end_round,attendance,R_home_elevation,B_home_elevation
0,Red,False,Open Weight,1,0.0,1.0,0.0,4.00,3.00,9.00,...,,216.0,,34.0,1734.00,tko,,7800.0,1.0,146.0
1,Red,False,Open Weight,1,0.0,1.0,0.0,0.00,0.00,0.00,...,,175.0,29.0,26.0,1734.00,submission,,7800.0,27.0,1373.0
2,Red,False,Open Weight,1,0.0,0.0,0.0,,,,...,,190.0,,24.0,1734.00,submission,,7800.0,89.0,
3,Red,True,Catch Weight,1,0.0,2.0,0.0,0.50,0.50,0.00,...,,175.0,34.0,26.0,1734.00,submission,,7800.0,27.0,1.0
4,Red,False,Open Weight,1,0.0,0.0,0.0,,,,...,,216.0,24.0,34.0,1734.00,tko,,7800.0,1.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5057,Red,True,Bantamweight,5,0.0,4.0,0.0,9.20,6.00,0.20,...,162.56,135.0,31.0,32.0,182.02,tko,3.0,16083.0,331.0,
5058,Blue,False,Heavyweight,3,0.0,1.0,0.0,17.00,14.50,2.50,...,190.50,264.0,32.0,26.0,182.02,decision,3.0,16083.0,,2290.0
5059,Red,False,Bantamweight,3,0.0,0.0,0.0,,,,...,175.26,135.0,35.0,34.0,182.02,ko,2.0,16083.0,195.0,
5060,Blue,False,Featherweight,3,0.0,1.0,0.0,7.25,4.75,1.75,...,180.34,145.0,31.0,37.0,182.02,ko,1.0,16083.0,2.0,35.0


In [4]:
"""
Retrieve all of the feature columns
"""
numerical_cols = []
categorical_cols = []

for col, col_type in zip(ufc_data.dtypes.keys(), ufc_data.dtypes):
    if col_type == 'float64' or col_type == 'int64':
        numerical_cols.append(col)
    else:
        categorical_cols.append(col)
        
print(len(numerical_cols), len(categorical_cols))

140 6


In [5]:
# Create the features and labels columnin 
for col_name in categorical_cols:
    null_count = ufc_data[ufc_data[col_name].isnull()].shape[0]
    if (null_count > 0):
        ufc_data = pd.get_dummies(ufc_data, columns=[col_name])
    else:
        col_data = ufc_data[col_name]
        le = LabelEncoder().fit(col_data)
        ufc_data[col_name] = le.transform(col_data)

ufc_data = ufc_data.fillna(0)

for col_name in ufc_data.columns:
    null_count = ufc_data[ufc_data[col_name].isnull()].shape[0]
    if null_count > 0:
        print('{} has {} nulls'.format(col_name, ufc_data[ufc_data[col_name].isnull()].shape[0]))

features = ufc_data.drop(columns=['Winner']).to_numpy()
labels = ufc_data['Winner'].to_numpy()
print('Features shape {}, labels shape {}'.format(features.shape, labels.shape))

Features shape (5062, 158), labels shape (5062,)


## Train model and evaluate the results

In [13]:
results_df = run_model(features, labels)
dnn_cols = list(results_df.columns)
score_cols = ['precision', 'recall', 'fbeta_score', 'support']

new_df_data = []

for index, row_data in results_df.iterrows():
    new_row = dict()
    
    for col in dnn_cols:
        new_row[col] = row_data[col]
    
    for score_index, col in enumerate(score_cols):
        new_row[col] = scores[index][score_index]
        
    new_df_data.append(new_row)

combined_results_df = pd.DataFrame(new_df_data)
combined_results_df

100%|██████████| 288/288 [11:00<00:00,  2.29s/it]


Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,activation,batch_size,dropout,epochs,first_neuron,hidden_layers,last_activation,losses,lr,optimizer,shapes,precision,recall,fbeta_score,support
0,10,0.637733,0.630800,0.637765,0.629538,<function relu at 0x7ff6e4d70f28>,64,0.0,10,64,1,<function sigmoid at 0x7ff6e4d810d0>,<function binary_crossentropy at 0x7ff6e4d9c9d8>,0.01,<class 'keras.optimizers.Adam'>,brick,0.712514,0.897727,0.794469,
1,10,0.653253,0.690030,0.647285,0.684613,<function relu at 0x7ff6e4d70f28>,64,0.0,10,64,1,<function sigmoid at 0x7ff6e4d810d0>,<function binary_crossentropy at 0x7ff6e4d9c9d8>,0.01,<class 'keras.optimizers.Adam'>,funnel,0.717537,0.766810,0.741355,
2,10,0.564772,0.702863,0.541694,0.706347,<function relu at 0x7ff6e4d70f28>,64,0.0,10,64,1,<function sigmoid at 0x7ff6e4d810d0>,<function binary_crossentropy at 0x7ff6e4d9c9d8>,0.10,<class 'keras.optimizers.Adam'>,brick,0.690030,1.000000,0.816589,
3,10,0.584973,0.690030,0.581955,0.684613,<function relu at 0x7ff6e4d70f28>,64,0.0,10,64,1,<function sigmoid at 0x7ff6e4d810d0>,<function binary_crossentropy at 0x7ff6e4d9c9d8>,0.10,<class 'keras.optimizers.Adam'>,funnel,0.728211,0.908441,0.808402,
4,10,0.621157,0.685094,0.614709,0.680909,<function relu at 0x7ff6e4d70f28>,64,0.0,10,64,2,<function sigmoid at 0x7ff6e4d810d0>,<function binary_crossentropy at 0x7ff6e4d9c9d8>,0.01,<class 'keras.optimizers.Adam'>,brick,0.690030,1.000000,0.816589,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,25,0.679755,0.692004,0.680670,0.685601,<function relu at 0x7ff6e4d70f28>,128,0.3,25,128,1,<function sigmoid at 0x7ff6e4d810d0>,<function binary_crossentropy at 0x7ff6e4d9c9d8>,0.10,<class 'keras.optimizers.Adam'>,funnel,0.726467,0.938484,0.818976,
284,25,0.603529,0.689042,0.646342,0.654483,<function relu at 0x7ff6e4d70f28>,128,0.3,25,128,2,<function sigmoid at 0x7ff6e4d810d0>,<function binary_crossentropy at 0x7ff6e4d9c9d8>,0.01,<class 'keras.optimizers.Adam'>,brick,0.691395,1.000000,0.817544,
285,25,0.677762,0.669299,0.706235,0.636947,<function relu at 0x7ff6e4d70f28>,128,0.3,25,128,2,<function sigmoid at 0x7ff6e4d810d0>,<function binary_crossentropy at 0x7ff6e4d9c9d8>,0.01,<class 'keras.optimizers.Adam'>,funnel,0.691617,0.991416,0.814815,
286,25,0.553862,0.699901,0.560527,0.701902,<function relu at 0x7ff6e4d70f28>,128,0.3,25,128,2,<function sigmoid at 0x7ff6e4d810d0>,<function binary_crossentropy at 0x7ff6e4d9c9d8>,0.10,<class 'keras.optimizers.Adam'>,brick,0.697397,0.919886,0.793337,


In [22]:
top_10_val_accuracy = combined_results_df.sort_values(by=['val_accuracy'], ascending=False).head(5)
top_10_val_accuracy.drop(columns=['activation', 'last_activation', 'optimizer', 'support', 'losses'])

Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,batch_size,dropout,epochs,first_neuron,hidden_layers,lr,shapes,precision,recall,fbeta_score
230,25,0.557207,0.721619,0.546991,0.712275,128,0.1,25,64,2,0.1,brick,0.69003,1.0,0.816589
58,10,0.550718,0.71767,0.532364,0.715238,64,0.1,10,128,1,0.1,brick,0.6958,0.876967,0.775949
282,25,0.551104,0.713722,0.55213,0.700667,128,0.3,25,128,1,0.1,brick,0.711656,0.829757,0.766182
166,15,0.556057,0.712734,0.535761,0.715238,128,0.0,15,64,2,0.1,brick,0.69003,1.0,0.816589
90,25,0.544086,0.712734,0.478464,0.755742,64,0.1,25,128,1,0.1,brick,0.69003,1.0,0.816589


In [21]:
top_10_val = combined_results_df.sort_values(by=['accuracy'], ascending=False).head(5)
top_10_val.drop(columns=['activation', 'last_activation', 'optimizer', 'support', 'losses'])

Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,batch_size,dropout,epochs,first_neuron,hidden_layers,lr,shapes,precision,recall,fbeta_score
46,25,0.594228,0.686081,0.36654,0.840454,64,0.0,25,128,2,0.1,brick,0.708134,0.846924,0.771336
190,25,0.557969,0.697927,0.428572,0.803408,128,0.0,25,128,2,0.1,brick,0.688488,0.872675,0.769716
42,25,0.560859,0.686081,0.436931,0.795258,64,0.0,25,128,1,0.1,brick,0.69003,1.0,0.816589
30,15,0.553661,0.700888,0.457369,0.782168,64,0.0,15,128,2,0.1,brick,0.689621,0.988555,0.812463
186,25,0.551564,0.707799,0.478606,0.767844,128,0.0,25,128,1,0.1,brick,0.69003,1.0,0.816589


## Model Builder

In [6]:
scores = []

def get_dnn(x_train, y_train, x_val, y_val, params):
    
    model = Sequential()
    # Input Layer
    model.add(Dense(params['first_neuron'], 
                    activation=params['activation'], 
                    input_dim=x_train.shape[1]))
    
    model.add(Dropout(params['dropout']))
    
    # Hidden Layers
    hidden_layers(model, params, 1)
    
    # Output Layers
    model.add(Dense(1, activation=params['last_activation']))
    
    model.compile(
        loss=params['losses'],
        optimizer=params['optimizer'](lr=lr_normalizer(params['lr'], 
        params['optimizer'])), 
        metrics=['accuracy']
    )
                  
    history = model.fit(x_train, y_train,
                       validation_data=[x_val, y_val],
                       batch_size=params['batch_size'],
                       epochs=params['epochs'],
                       verbose=0)
    
    
    
    score = precision_recall_fscore_support(y_val, model.predict_classes(x_val), average='binary')
    scores.append(score)

    return history, model

In [12]:
def run_model(features, labels):
    # Define hyperparameters to use in Grid Search
    dnn_params = {
         'lr': [0.01, 0.1],
         'first_neuron': [64, 128],
         'hidden_layers': [1, 2],
         'batch_size': [64, 128],
         'epochs': [10, 15, 25],
         'dropout': [0, 0.1, 0.3],
         'optimizer': [Adam],
         'shapes':['brick', 'funnel'],
         'losses': [binary_crossentropy],
         'activation': [relu],
         'last_activation': [sigmoid]
    }

    new_features, new_labels = shuffle(np.array(features), labels)
    X_train, X_test, y_train, y_test = train_test_split(new_features, new_labels, random_state=0, train_size=0.80)
    scaler = StandardScaler().fit(X_train)
    
    
    # Create the Neural Network
    dnn_model = ta.Scan(
        x=scaler.transform(X_train),
        y=y_train,
        model=get_dnn,
        params=dnn_params,
        experiment_name='Winner_Predictor',
        x_val=scaler.transform(X_test),
        y_val=y_test,
    )
    
    return dnn_model.data

# Helper functions

In [8]:
def plot_training_results(history_obj):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(1, len(acc) + 1)

    fig, (accuracy_axis, loss_axis) = plt.subplots(1, 2, figsize=(15, 5))

    accuracy_axis.plot(epochs, acc, 'bo', label='Training acc')
    accuracy_axis.plot(epochs, val_acc, 'b', label='Validation acc')
    accuracy_axis.set_title('Training and validation accuracy')
    accuracy_axis.set_ylabel('Accuracy')
    accuracy_axis.set_xlabel('Epoch')
    accuracy_axis.legend()

    loss_axis.plot(epochs, loss, 'bo', label='Training loss')
    loss_axis.plot(epochs, val_loss, 'b', label='Validation loss')
    loss_axis.set_title('Training and validation loss')
    loss_axis.set_ylim(0, 3)
    loss_axis.set_ylabel('Accuracy')
    loss_axis.set_xlabel('Epoch')
    loss_axis.legend()
    fig.show()