## Import all dependencies

In [8]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np

# Model imports
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.losses import binary_crossentropy
from keras.activations import relu, sigmoid

import talos as ta
from talos.model.normalizers import lr_normalizer
from talos.model.hidden_layers import hidden_layers
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.metrics import precision_recall_fscore_support

# Measuring the performance of a DNN on preprocessed_ratio_data.csv

## Pre-process Data

In [9]:
ufc_data_location = '../generated_data/combined_fight_data.csv'

ufc_data = pd.read_csv(ufc_data_location)
ufc_data.drop(columns=['date', 'R_fighter', 'B_fighter', 'Referee', 'city', 'country', 'end_how'], inplace=True)

In [10]:
ufc_data

Unnamed: 0,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,...,R_Reach_cms,R_Weight_lbs,B_age,R_age,location_elevation,end_method,end_round,attendance,R_home_elevation,B_home_elevation
0,Red,False,Open Weight,1,0.0,1.0,0.0,4.00,3.00,9.00,...,,216.0,,34.0,1734.00,tko,,7800.0,10.0,146.0
1,Red,False,Open Weight,1,0.0,1.0,0.0,0.00,0.00,0.00,...,,175.0,29.0,26.0,1734.00,submission,,7800.0,270.0,13730.0
2,Red,False,Open Weight,1,0.0,0.0,0.0,,,,...,,190.0,,24.0,1734.00,submission,,7800.0,890.0,
3,Red,True,Catch Weight,1,0.0,2.0,0.0,0.50,0.50,0.00,...,,175.0,34.0,26.0,1734.00,submission,,7800.0,270.0,10.0
4,Red,False,Open Weight,1,0.0,0.0,0.0,,,,...,,216.0,24.0,34.0,1734.00,tko,,7800.0,10.0,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5057,Red,True,Bantamweight,5,0.0,4.0,0.0,9.20,6.00,0.20,...,162.56,135.0,31.0,32.0,182.02,tko,3.0,16083.0,3310.0,
5058,Blue,False,Heavyweight,3,0.0,1.0,0.0,17.00,14.50,2.50,...,190.50,264.0,32.0,26.0,182.02,decision,3.0,16083.0,,22900.0
5059,Red,False,Bantamweight,3,0.0,0.0,0.0,,,,...,175.26,135.0,35.0,34.0,182.02,ko,2.0,16083.0,1950.0,
5060,Blue,False,Featherweight,3,0.0,1.0,0.0,7.25,4.75,1.75,...,180.34,145.0,31.0,37.0,182.02,ko,1.0,16083.0,20.0,350.0


In [11]:
# Retrieve all of the feature columns
numerical_cols = []
categorical_cols = []

for col, col_type in zip(ufc_data.dtypes.keys(), ufc_data.dtypes):
    if col_type == 'float64' or col_type == 'int64':
        numerical_cols.append(col)
    else:
        categorical_cols.append(col)
        
print(len(numerical_cols), len(categorical_cols))

140 6


In [12]:
# Create the features and labels columns 
for col_name in categorical_cols:
    null_count = ufc_data[ufc_data[col_name].isnull()].shape[0]
    if (null_count > 0):
        ufc_data = pd.get_dummies(ufc_data, columns=[col_name])
    else:
        col_data = ufc_data[col_name]
        le = LabelEncoder().fit(col_data)
        ufc_data[col_name] = le.transform(col_data)

ufc_data = ufc_data.fillna(0)

for col_name in ufc_data.columns:
    null_count = ufc_data[ufc_data[col_name].isnull()].shape[0]
    if null_count > 0:
        print('{} has {} nulls'.format(col_name, ufc_data[ufc_data[col_name].isnull()].shape[0]))

features = ufc_data.drop(columns=['Winner']).to_numpy()
labels = ufc_data['Winner'].to_numpy()
print('Features shape {}, labels shape {}'.format(features.shape, labels.shape))

Features shape (5062, 158), labels shape (5062,)


## Model Builder and Runner

In [13]:
scores = []

def get_dnn(x_train, y_train, x_val, y_val, params):
    
    model = Sequential()
    # Input Layer
    model.add(Dense(params['first_neuron'], 
                    activation=params['activation'], 
                    input_dim=x_train.shape[1]))
    
    model.add(Dropout(params['dropout']))
    
    # Hidden Layers
    hidden_layers(model, params, 1)
    
    # Output Layers
    model.add(Dense(1, activation=params['last_activation']))
    
    model.compile(
        loss=params['losses'],
        optimizer=params['optimizer'](lr=lr_normalizer(params['lr'], 
        params['optimizer'])), 
        metrics=['accuracy']
    )
                  
    history = model.fit(x_train, y_train,
                       validation_data=[x_val, y_val],
                       batch_size=params['batch_size'],
                       epochs=params['epochs'],
                       verbose=0)
    
    
    
    score = precision_recall_fscore_support(y_val, model.predict_classes(x_val), average='binary')
    scores.append(score)

    return history, model


def run_model(features, labels):
    # Define hyperparameters to use in Grid Search
    dnn_params = {
         'lr': [0.01, 0.1, 1],
         'first_neuron': [64, 128],
         'hidden_layers': [1, 2],
         'batch_size': [64, 128],
         'epochs': [10, 15, 25],
         'dropout': [0, 0.1, 0.3],
         'optimizer': [Adam],
         'shapes':['brick', 'funnel'],
         'losses': [binary_crossentropy],
         'activation': [relu],
         'last_activation': [sigmoid]
    }

    new_features, new_labels = shuffle(np.array(features), labels)
    X_train, X_test, y_train, y_test = train_test_split(new_features, new_labels, random_state=0, train_size=0.80)
    scaler = StandardScaler().fit(X_train)
    
    
    # Create the Neural Network
    dnn_model = ta.Scan(
        x=scaler.transform(X_train),
        y=y_train,
        model=get_dnn,
        params=dnn_params,
        experiment_name='Winner_Predictor',
        x_val=scaler.transform(X_test),
        y_val=y_test,
    )
    
    return dnn_model.data

## Train model and evaluate the results

In [14]:
results_df = run_model(features, labels)
dnn_cols = list(results_df.columns)
score_cols = ['precision', 'recall', 'fbeta_score', 'support']

new_df_data = []

for index, row_data in results_df.iterrows():
    new_row = dict()
    
    for col in dnn_cols:
        new_row[col] = row_data[col]
    
    for score_index, col in enumerate(score_cols):
        new_row[col] = scores[index][score_index]
        
    new_df_data.append(new_row)

combined_results_df = pd.DataFrame(new_df_data)


  0%|          | 0/432 [00:00<?, ?it/s][A
  0%|          | 1/432 [00:02<18:27,  2.57s/it][A
  0%|          | 2/432 [00:04<17:48,  2.48s/it][A
  1%|          | 3/432 [00:06<16:30,  2.31s/it][A
  1%|          | 4/432 [00:09<17:32,  2.46s/it][A
  1%|          | 5/432 [00:12<17:40,  2.48s/it][A
  1%|▏         | 6/432 [00:14<17:51,  2.52s/it][A
  2%|▏         | 7/432 [00:18<20:04,  2.83s/it][A
  2%|▏         | 8/432 [00:20<18:34,  2.63s/it][A
  2%|▏         | 9/432 [00:22<17:33,  2.49s/it][A
  2%|▏         | 10/432 [00:24<16:45,  2.38s/it][A
  3%|▎         | 11/432 [00:26<16:11,  2.31s/it][A
  3%|▎         | 12/432 [00:28<15:30,  2.22s/it][A
  3%|▎         | 13/432 [00:30<15:01,  2.15s/it][A
  3%|▎         | 14/432 [00:32<14:33,  2.09s/it][A
  3%|▎         | 15/432 [00:35<15:21,  2.21s/it][A
  4%|▎         | 16/432 [00:37<15:49,  2.28s/it][A
  4%|▍         | 17/432 [00:40<17:15,  2.50s/it][A
  4%|▍         | 18/432 [00:43<17:23,  2.52s/it][A
  4%|▍         | 19/432 [00:4

In [15]:
top_10_val_accuracy = combined_results_df.sort_values(by=['val_accuracy'], ascending=False).head(5)
top_10_val_accuracy.drop(columns=['activation', 'last_activation', 'optimizer', 'support', 'losses'])

Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,batch_size,dropout,epochs,first_neuron,hidden_layers,lr,shapes,precision,recall,fbeta_score
188,15,0.553277,0.720632,0.574968,0.680662,64,0.3,15,128,2,0.1,brick,0.738623,0.91474,0.817301
134,25,0.5291,0.716683,0.481343,0.757718,64,0.1,25,128,1,0.1,brick,0.748466,0.881503,0.809555
422,25,0.542733,0.715696,0.553883,0.705359,128,0.3,25,128,1,0.1,brick,0.729545,0.927746,0.816794
92,10,0.54462,0.714709,0.538246,0.714003,64,0.1,10,128,2,0.1,brick,0.72514,0.937861,0.817895
395,15,0.531167,0.713722,0.554812,0.71672,128,0.3,15,64,2,1.0,funnel,0.725843,0.933526,0.816688


In [16]:
top_10_val = combined_results_df.sort_values(by=['accuracy'], ascending=False).head(5)
top_10_val.drop(columns=['activation', 'last_activation', 'optimizer', 'support', 'losses'])

Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,batch_size,dropout,epochs,first_neuron,hidden_layers,lr,shapes,precision,recall,fbeta_score
286,25,1.738132,0.691017,0.022021,0.991603,128,0.0,25,128,2,1.0,brick,0.75166,0.817919,0.783391
64,25,1.217835,0.688055,0.04298,0.989627,64,0.0,25,128,1,1.0,brick,0.748021,0.819364,0.782069
70,25,1.978865,0.667325,0.028984,0.987651,64,0.0,25,128,2,1.0,brick,0.751773,0.765896,0.758769
280,25,1.092236,0.692991,0.070343,0.985675,128,0.0,25,128,1,1.0,brick,0.750329,0.825145,0.78596
58,25,1.753901,0.684107,0.043594,0.983947,64,0.0,25,64,2,1.0,brick,0.739691,0.82948,0.782016
