## Import all dependencies

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np

# Model imports
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.losses import binary_crossentropy
from keras.activations import relu, sigmoid

import talos as ta
from talos.model.normalizers import lr_normalizer
from talos.model.hidden_layers import hidden_layers
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.metrics import precision_recall_fscore_support

Using TensorFlow backend.


# Measuring the performance of a DNN on preprocessed_ratio_data.csv

## Pre-process Data

In [2]:
ufc_data_location = '../generated_data/combined_fight_data.csv'

ufc_data = pd.read_csv(ufc_data_location)
ufc_data.drop(columns=['date', 'R_fighter', 'B_fighter', 'Referee', 'city', 'country', 'end_how'], inplace=True)

In [3]:
ufc_data

Unnamed: 0,Winner,title_bout,weight_class,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,...,R_Reach_cms,R_Weight_lbs,B_age,R_age,location_elevation,end_method,end_round,attendance,R_home_elevation,B_home_elevation
0,Red,False,Open Weight,1,0.0,1.0,0.0,4.00,3.00,9.00,...,,216.0,,34.0,1734.00,tko,,7800.0,1.0,146.0
1,Red,False,Open Weight,1,0.0,1.0,0.0,0.00,0.00,0.00,...,,175.0,29.0,26.0,1734.00,submission,,7800.0,27.0,1373.0
2,Red,False,Open Weight,1,0.0,0.0,0.0,,,,...,,190.0,,24.0,1734.00,submission,,7800.0,89.0,
3,Red,True,Catch Weight,1,0.0,2.0,0.0,0.50,0.50,0.00,...,,175.0,34.0,26.0,1734.00,submission,,7800.0,27.0,1.0
4,Red,False,Open Weight,1,0.0,0.0,0.0,,,,...,,216.0,24.0,34.0,1734.00,tko,,7800.0,1.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5057,Red,True,Bantamweight,5,0.0,4.0,0.0,9.20,6.00,0.20,...,162.56,135.0,31.0,32.0,182.02,tko,3.0,16083.0,331.0,
5058,Blue,False,Heavyweight,3,0.0,1.0,0.0,17.00,14.50,2.50,...,190.50,264.0,32.0,26.0,182.02,decision,3.0,16083.0,,2290.0
5059,Red,False,Bantamweight,3,0.0,0.0,0.0,,,,...,175.26,135.0,35.0,34.0,182.02,ko,2.0,16083.0,195.0,
5060,Blue,False,Featherweight,3,0.0,1.0,0.0,7.25,4.75,1.75,...,180.34,145.0,31.0,37.0,182.02,ko,1.0,16083.0,2.0,35.0


In [4]:
# Retrieve all of the feature columns
numerical_cols = []
categorical_cols = []

for col, col_type in zip(ufc_data.dtypes.keys(), ufc_data.dtypes):
    if col_type == 'float64' or col_type == 'int64':
        numerical_cols.append(col)
    else:
        categorical_cols.append(col)
        
print(len(numerical_cols), len(categorical_cols))

140 6


In [5]:
# Create the features and labels columns 
for col_name in categorical_cols:
    null_count = ufc_data[ufc_data[col_name].isnull()].shape[0]
    if (null_count > 0):
        ufc_data = pd.get_dummies(ufc_data, columns=[col_name])
    else:
        col_data = ufc_data[col_name]
        le = LabelEncoder().fit(col_data)
        ufc_data[col_name] = le.transform(col_data)

ufc_data = ufc_data.fillna(0)

for col_name in ufc_data.columns:
    null_count = ufc_data[ufc_data[col_name].isnull()].shape[0]
    if null_count > 0:
        print('{} has {} nulls'.format(col_name, ufc_data[ufc_data[col_name].isnull()].shape[0]))

features = ufc_data.drop(columns=['Winner']).to_numpy()
labels = ufc_data['Winner'].to_numpy()
print('Features shape {}, labels shape {}'.format(features.shape, labels.shape))

Features shape (5062, 158), labels shape (5062,)


## Model Builder and Runner

In [6]:
scores = []

def get_dnn(x_train, y_train, x_val, y_val, params):
    
    model = Sequential()
    # Input Layer
    model.add(Dense(params['first_neuron'], 
                    activation=params['activation'], 
                    input_dim=x_train.shape[1]))
    
    model.add(Dropout(params['dropout']))
    
    # Hidden Layers
    hidden_layers(model, params, 1)
    
    # Output Layers
    model.add(Dense(1, activation=params['last_activation']))
    
    model.compile(
        loss=params['losses'],
        optimizer=params['optimizer'](lr=lr_normalizer(params['lr'], 
        params['optimizer'])), 
        metrics=['accuracy']
    )
                  
    history = model.fit(x_train, y_train,
                       validation_data=[x_val, y_val],
                       batch_size=params['batch_size'],
                       epochs=params['epochs'],
                       verbose=0)
    
    
    
    score = precision_recall_fscore_support(y_val, model.predict_classes(x_val), average='binary')
    scores.append(score)

    return history, model


def run_model(features, labels):
    # Define hyperparameters to use in Grid Search
    dnn_params = {
         'lr': [0.01, 0.1, 1],
         'first_neuron': [64, 128],
         'hidden_layers': [1, 2],
         'batch_size': [64, 128],
         'epochs': [10, 15, 25],
         'dropout': [0, 0.1, 0.3],
         'optimizer': [Adam],
         'shapes':['brick', 'funnel'],
         'losses': [binary_crossentropy],
         'activation': [relu],
         'last_activation': [sigmoid]
    }

    new_features, new_labels = shuffle(np.array(features), labels)
    X_train, X_test, y_train, y_test = train_test_split(new_features, new_labels, random_state=0, train_size=0.80)
    scaler = StandardScaler().fit(X_train)
    
    
    # Create the Neural Network
    dnn_model = ta.Scan(
        x=scaler.transform(X_train),
        y=y_train,
        model=get_dnn,
        params=dnn_params,
        experiment_name='Winner_Predictor',
        x_val=scaler.transform(X_test),
        y_val=y_test,
    )
    
    return dnn_model.data

## Train model and evaluate the results

In [7]:
results_df = run_model(features, labels)
dnn_cols = list(results_df.columns)
score_cols = ['precision', 'recall', 'fbeta_score', 'support']

new_df_data = []

for index, row_data in results_df.iterrows():
    new_row = dict()
    
    for col in dnn_cols:
        new_row[col] = row_data[col]
    
    for score_index, col in enumerate(score_cols):
        new_row[col] = scores[index][score_index]
        
    new_df_data.append(new_row)

combined_results_df = pd.DataFrame(new_df_data)

100%|██████████| 432/432 [21:52<00:00,  3.04s/it]


In [8]:
top_10_val_accuracy = combined_results_df.sort_values(by=['val_accuracy'], ascending=False).head(5)
top_10_val_accuracy.drop(columns=['activation', 'last_activation', 'optimizer', 'support', 'losses'])

Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,batch_size,dropout,epochs,first_neuron,hidden_layers,lr,shapes,precision,recall,fbeta_score
269,25,0.62503,0.716683,0.364006,0.827365,128,0.0,25,64,1,1.0,funnel,0.769333,0.835022,0.800833
122,25,0.56244,0.715696,0.508402,0.728822,64,0.1,25,64,1,0.1,brick,0.735673,0.910275,0.813713
140,25,0.564013,0.715696,0.477293,0.754013,64,0.1,25,128,2,0.1,brick,0.739027,0.901592,0.812256
356,25,0.560448,0.712734,0.497638,0.738701,128,0.1,25,128,2,0.1,brick,0.738663,0.895803,0.80968
69,25,0.558761,0.71076,0.460739,0.763892,64,0.0,25,128,2,0.1,funnel,0.73467,0.901592,0.809617


In [9]:
top_10_val = combined_results_df.sort_values(by=['accuracy'], ascending=False).head(5)
top_10_val.drop(columns=['activation', 'last_activation', 'optimizer', 'support', 'losses'])

Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,batch_size,dropout,epochs,first_neuron,hidden_layers,lr,shapes,precision,recall,fbeta_score
286,25,1.615961,0.677196,0.030068,0.988392,128,0.0,25,128,2,1.0,brick,0.763006,0.76411,0.763557
64,25,1.129097,0.69003,0.059125,0.9837,64,0.0,25,128,1,1.0,brick,0.757162,0.803184,0.779494
70,25,1.858232,0.645607,0.038078,0.981724,64,0.0,25,128,2,1.0,brick,0.750755,0.719247,0.734664
280,25,0.966187,0.692991,0.085617,0.979007,128,0.0,25,128,1,1.0,brick,0.760274,0.803184,0.78114
274,25,1.385498,0.687068,0.067136,0.97876,128,0.0,25,64,2,1.0,brick,0.765625,0.780029,0.77276
