# In this Notebook we are attempting to predict if a person will win their next fight depending on how they performed on their last 5 fights

## Library Imports

In [93]:
import sys
sys.path.insert(1, '../combined_data')
sys.path.insert(1, '../predict_winner')
from make_career import make_career
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import talos as ta
from talos.model.normalizers import lr_normalizer
from talos.model.hidden_layers import hidden_layers

## Loading the dataset

In [2]:
features, labels = make_career(pd.read_csv('../combined_data/combined_fight_data_zerod_nans.csv'))
labels = labels.reshape(-1,)

Creating careers using 5 fight intervals and predicting 1 future fights
Original fight data shape: (5062, 154)
Fights 2x shape: (10124, 96)

Features shape: (3220, 5)
Labels shape: (3220, 1)

Features is a 3D matrix with 3220 rows
Each row contains has 5 fights, and each fight has 96 cols

Labels is a 2D matrix with 3220 rows
Each row contains the the prediction for the next 1 fight(s)


In [5]:
print(features.shape)
print(type(features))

(3220, 5)
<class 'numpy.ndarray'>


In [6]:
print(labels.shape)
print(type(labels))

(3220,)
<class 'numpy.ndarray'>


## Collapse each row
## Generates np.array of shape (3220, 5 * features)

In [14]:
def collapse_n_fights():
    num_cols, cat_cols = get_column_types()
    
    new_features = []
    for window in features:
        window_arr = list(window.reshape(1,-1))[0]
        fight_window = np.array([])
        for fight in window_arr:
            fight_arr = np.array([fight[i] for i in num_cols])
            np.nan_to_num(fight_arr, copy=False)
            fight_window = np.append(fight_window, fight_arr)
        new_features.append(fight_window)
    
    new_features = np.array(new_features)
    X_train, X_test, y_train, y_test = train_test_split(new_features, labels, random_state=0, train_size=0.80)
    print("X_train length {}, y_train length {}".format(X_train.shape, y_train.shape))
    print("X_test length {}, y_test length {}".format(X_test.shape, y_test.shape))
    
    return X_train, X_test, y_train, y_test

## Generate np.array of shape (3220, 5, features)

In [157]:
def n_fights_to_array():
    num_cols, cat_cols = get_column_types()
    
    new_features = []
    for window in features:
        window_arr = list(window.reshape(1,-1))[0]
        fight_window = []
        for fight in window_arr:
            fight_arr = np.array([fight[i] for i in num_cols])
            np.nan_to_num(fight_arr, copy=False)
            fight_window.append(fight_arr)
        new_features.append(fight_window)
    
    new_features = np.array(new_features)
    X_train, X_test, y_train, y_test = train_test_split(new_features, labels, random_state=0, train_size=0.80)
    print("X_train length {}, y_train length {}".format(X_train.shape, y_train.shape))
    print("X_test length {}, y_test length {}".format(X_test.shape, y_test.shape))

    return X_train, X_test, y_train, y_test

## Building a DNN to predict the winner using last n fights

In [147]:
# Get the data
X_train, X_test, y_train, y_test = collapse_n_fights()

X_train length (2576, 425), y_train length (2576,)
X_test length (644, 425), y_test length (644,)


In [148]:
# Define hyperparameters to use in Grid Search
dnn_params = {'lr': (0.2, 1, 2),
     'first_neuron': [128, 256],
     'hidden_layers': [1, 2],
     'batch_size': [64, 128, 256],
     'epochs': [10, 20, 40],
     'dropout': (0, 0.5, 3),
     'optimizer': [Adam],
     'shapes':['brick', 'funnel'],
     'losses': [binary_crossentropy],
     'activation': [relu],
     'last_activation': [sigmoid]}

In [None]:
# Create the Neural Network
model = ta.Scan(x=X_train,
               y=y_train,
               model=get_dnn,
               params=dnn_params,
               experiment_name="UFC_5_Fight_Predictor")

In [152]:
results_df = model.data

In [153]:
top_10_dnn = results_df.sort_values(by=['val_accuracy'], ascending=False).head(10)

In [154]:
top_10_dnn

Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,activation,batch_size,dropout,epochs,first_neuron,hidden_layers,last_activation,losses,lr,optimizer,shapes
1,1,0.69087,0.605433,11.293909,0.559623,<function relu at 0x1370a5d90>,64,0.0,10,128,1,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,funnel
406,1,243.485651,0.60414,558.958144,0.496395,<function relu at 0x1370a5d90>,256,0.333333,20,128,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.6,<class 'keras.optimizers.Adam'>,brick
196,1,234.743301,0.60414,473.667471,0.484193,<function relu at 0x1370a5d90>,128,0.166667,10,128,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,brick
167,1,2.487747,0.60414,151.037975,0.536883,<function relu at 0x1370a5d90>,128,0.0,20,128,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.6,<class 'keras.optimizers.Adam'>,funnel
5,1,0.691875,0.60414,36.233989,0.542429,<function relu at 0x1370a5d90>,64,0.0,10,128,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,funnel
147,1,0.691528,0.60414,381.887882,0.503605,<function relu at 0x1370a5d90>,128,0.0,10,128,1,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.6,<class 'keras.optimizers.Adam'>,funnel
370,1,178.835083,0.60414,427.591131,0.523572,<function relu at 0x1370a5d90>,256,0.166667,40,128,1,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.6,<class 'keras.optimizers.Adam'>,brick
316,1,311.591751,0.60414,812.915545,0.459235,<function relu at 0x1370a5d90>,256,0.0,20,256,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,brick
121,1,0.846175,0.60414,626.760367,0.520799,<function relu at 0x1370a5d90>,64,0.333333,20,256,1,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,funnel
281,1,1.100348,0.60414,77.89518,0.528563,<function relu at 0x1370a5d90>,128,0.333333,40,256,1,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,funnel


In [155]:
top_10_dnn.to_csv("../model_results/UFC_Predict_5_Fights_DNN.csv")

## Building a CNN to predict the winner using last n fights

In [183]:
# Get the data
X_train, X_test, y_train, y_test = n_fights_to_array()

X_train length (2576, 5, 85), y_train length (2576,)
X_test length (644, 5, 85), y_test length (644,)


In [195]:
# Define hyperparameters to use in Grid Search
cnn_params = {'lr': (0.2, 1, 2),
     'first_neuron': [64, 128],
     'kernel_size': [2],
     'batch_size': [64, 128],
     'epochs': [10, 20],
     'dropout': (0, 0.5, 2),
     'flatten_layer': [100, 150], 
     'optimizer': [Adam],
     'shapes':['brick', 'funnel'],
     'losses': [binary_crossentropy],
     'activation': [relu],
     'last_activation': [sigmoid]}

In [196]:
# Create the Neural Network
model = ta.Scan(x=X_train,
                y=y_train,
                model=get_cnn,
                params=cnn_params,
                experiment_name="UFC_5_Fight_Predictor_CNN")














  0%|          | 0/128 [00:00<?, ?it/s][A[A[A[A[A[A[A[A[A[A[A[A[A












  1%|          | 1/128 [00:02<05:40,  2.68s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












  2%|▏         | 2/128 [00:05<05:31,  2.63s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












  2%|▏         | 3/128 [00:07<05:27,  2.62s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












  3%|▎         | 4/128 [00:10<05:15,  2.54s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












  4%|▍         | 5/128 [00:12<05:09,  2.51s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












  5%|▍         | 6/128 [00:15<05:05,  2.50s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












  5%|▌         | 7/128 [00:17<05:00,  2.48s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












  6%|▋         | 8/128 [00:19<04:56,  2.47s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












  7%|▋         | 9/128 [00:23<05:17,  2.66s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












  8%|

 64%|██████▍   | 82/128 [04:42<02:04,  2.70s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












 65%|██████▍   | 83/128 [04:45<02:03,  2.74s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












 66%|██████▌   | 84/128 [04:47<02:02,  2.78s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












 66%|██████▋   | 85/128 [04:50<02:01,  2.82s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












 67%|██████▋   | 86/128 [04:53<01:59,  2.84s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












 68%|██████▊   | 87/128 [04:56<01:56,  2.85s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












 69%|██████▉   | 88/128 [04:59<01:55,  2.88s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












 70%|██████▉   | 89/128 [05:03<01:59,  3.07s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












 70%|███████   | 90/128 [05:06<02:01,  3.21s/it][A[A[A[A[A[A[A[A[A[A[A[A[A












 71%|███████   | 91/128 [05:10<02:02,  3.30s/it][A[A[A[A[A[A[A[A[A[A[A[A[A














In [197]:
results_df = model.data

In [198]:
top_10_dnn = results_df.sort_values(by=['val_accuracy'], ascending=False) # .head(10)

In [199]:
top_10_dnn

Unnamed: 0,round_epochs,val_loss,val_accuracy,loss,accuracy,activation,batch_size,dropout,epochs,first_neuron,flatten_layer,kernel_size,last_activation,losses,lr,optimizer,shapes
73,10,20.194645,0.564036,14.986706,0.554631,<function relu at 0x1370a5d90>,128,0.00,10,128,100,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,funnel
89,20,48.209050,0.558862,10.964302,0.607321,<function relu at 0x1370a5d90>,128,0.00,20,128,100,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,funnel
72,10,17.260539,0.554981,14.494129,0.550749,<function relu at 0x1370a5d90>,128,0.00,10,128,100,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,brick
63,20,0.763766,0.553687,1.935268,0.588464,<function relu at 0x1370a5d90>,64,0.25,20,128,150,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.6,<class 'keras.optimizers.Adam'>,funnel
28,20,28.297188,0.553687,12.986392,0.585136,<function relu at 0x1370a5d90>,64,0.00,20,128,150,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,brick
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33,10,24.471606,0.465718,49.191561,0.532446,<function relu at 0x1370a5d90>,64,0.25,10,64,100,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,funnel
10,10,18.548293,0.457956,11.492632,0.576816,<function relu at 0x1370a5d90>,64,0.00,10,128,100,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.6,<class 'keras.optimizers.Adam'>,brick
29,20,15.411590,0.454075,15.631584,0.604548,<function relu at 0x1370a5d90>,64,0.00,20,128,150,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.2,<class 'keras.optimizers.Adam'>,funnel
91,20,17.098046,0.452781,13.324951,0.565169,<function relu at 0x1370a5d90>,128,0.00,20,128,100,2,<function sigmoid at 0x1370a5ea0>,<function binary_crossentropy at 0x137054730>,0.6,<class 'keras.optimizers.Adam'>,funnel


# Models

In [162]:
# Model imports
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv1D, Flatten, MaxPooling1D
from keras.optimizers import Adam, Nadam, RMSprop
from keras.losses import logcosh, binary_crossentropy
from keras.activations import relu, elu, sigmoid

Analyzing last n fights using a Dense Neural Network

In [146]:
def get_dnn(x_train, y_train, x_val, y_val, params):
    
    model = Sequential()
    # Input Layer
    model.add(Dense(params["first_neuron"], 
                    activation=params['activation'], 
                    input_dim=X_train.shape[1]))
    
    model.add(Dropout(params['dropout']))
    
    # Hidden Layers
    hidden_layers(model, params, 1)
    
    # Output Layers
    model.add(Dense(1, activation=params['last_activation']))
    
    model.compile(loss=params['losses'],
                  optimizer=params['optimizer'](lr=lr_normalizer(params['lr'], params['optimizer'])), 
                  metrics=['accuracy'])
                  
    history = model.fit(x_train, y_train,
                       validation_data=[x_val, y_val],
                       batch_size=params['batch_size'],
                       epochs=params['epochs'],
                       verbose=0)
    
    #print(model.summary())

    return history, model

Analyzing last n fights using a Convolutional Neural Network

In [192]:
def get_cnn(x_train, y_train, x_val, y_val, params):
    model = models.Sequential()
    
    model.add(Conv1D(filters=params["first_neuron"], kernel_size=params["kernel_size"], activation='relu', input_shape=(5, 85)))
    model.add(Conv1D(filters=params["first_neuron"], kernel_size=params["kernel_size"], activation='relu'))
    model.add(Dropout(params['dropout']))
    
    model.add(Flatten())
    model.add(Dense(params["flatten_layer"], activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss=params['losses'],
                  optimizer=params['optimizer'](lr=lr_normalizer(params['lr'], params['optimizer'])), 
                  metrics=['accuracy'])
    
    #     model.summary()
    
    history = model.fit(x_train, y_train,
                       validation_data=[x_val, y_val],
                       batch_size=params['batch_size'],
                       epochs=params['epochs'],
                       verbose=0)

    return history, model

In [181]:
# Define hyperparameters to use in Grid Search
p = {'lr': (0.2, 1, 2),
     'first_neuron': [64, 128],
     'kernel_size': [1, 2, 3],
     'batch_size': [64, 128],
     'epochs': [10, 20, 40],
     'dropout': (0, 0.5, 3),
     'flatten_layer': [100, 150], 
     'optimizer': [Adam],
     'shapes':['brick', 'funnel'],
     'losses': [binary_crossentropy],
     'activation': [relu],
     'last_activation': [sigmoid]}

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_11 (Conv1D)           (None, 4, 128)            21888     
_________________________________________________________________
conv1d_12 (Conv1D)           (None, 3, 128)            32896     
_________________________________________________________________
flatten_6 (Flatten)          (None, 384)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 100)               38500     
_________________________________________________________________
dense_12 (Dense)             (None, 1)                 101       
Total params: 93,385
Trainable params: 93,385
Non-trainable params: 0
_________________________________________________________________


<keras.engine.sequential.Sequential at 0x13d9f8ac8>

## Helper Functions

In [12]:
def plot_training_results(history_obj):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(1, len(acc) + 1)

    fig, (accuracy_axis, loss_axis) = plt.subplots(1, 2, figsize=(15, 5))

    accuracy_axis.plot(epochs, acc, 'bo', label='Training acc')
    accuracy_axis.plot(epochs, val_acc, 'b', label='Validation acc')
    accuracy_axis.set_title('Training and validation accuracy')
    accuracy_axis.set_ylabel('Accuracy')
    accuracy_axis.set_xlabel('Epoch')
    accuracy_axis.legend()

    loss_axis.plot(epochs, loss, 'bo', label='Training loss')
    loss_axis.plot(epochs, val_loss, 'b', label='Validation loss')
    loss_axis.set_title('Training and validation loss')
    loss_axis.set_ylim(0, 3)
    loss_axis.set_ylabel('Accuracy')
    loss_axis.set_xlabel('Epoch')
    loss_axis.legend()
    fig.show()

## Extract numerical and categorical columns

In [11]:
# Get the first fight window and the first fight in that window
def get_column_types():
    num_cols = []
    cat_cols = []
    for fight in features[0][0]:
        feature_type = type(features[1][1][fight])
        if feature_type is not float and feature_type is not int:
            cat_cols.append(fight)
        else:
            num_cols.append(fight)
            
    return num_cols, cat_cols