# In this Notebook we are attempting to predict a persons next N fights

## Library Imports

In [198]:
import sys
sys.path.insert(1, '../combined_data')
sys.path.insert(1, '../predict_winner')
from make_career import make_career
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import talos as ta
from talos.model.normalizers import lr_normalizer
from talos.model.hidden_layers import hidden_layers
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

## Loading the dataset

In [199]:
features, labels = make_career(pd.read_csv('../combined_data/combined_fight_data_zerod_nans.csv'),
                               N_FIGHT_CAREER=10, 
                               N_FUTURE_LABELS=5)
labels = encode_all_labels()

Creating careers using 10 fight intervals and predicting 5 future fights
Original fight data shape: (5062, 154)
Fights 2x shape: (10124, 96)

Features shape: (600, 10)
Labels shape: (600, 5)

Features is a 2D matrix with 600 rows
Each row contains has 10 fights, and each fight has 96 keys

Labels is a 2D matrix with 600 rows
Each row contains the the prediction for the next 5 fight(s)


In [200]:
print(features.shape)
print(type(features))

(600, 10)
<class 'numpy.ndarray'>


In [201]:
print(labels.shape)
print(type(labels))

(600, 5)
<class 'numpy.ndarray'>


In [202]:
for i in features[0]:
    print(i["Winner"])

True
True
True
True
False
True
True
False
True
False


In [204]:
print(labels.shape)

(600, 5)


In [205]:
labels.reshape(600,-1,5)

array([[[1, 1, 1, 1, 1]],

       [[1, 1, 1, 1, 1]],

       [[1, 1, 1, 1, 1]],

       ...,

       [[1, 1, 1, 0, 0]],

       [[0, 1, 0, 0, 0]],

       [[0, 1, 0, 1, 1]]])

In [224]:
def convert_labels(y_train):  
    fights_1 = np.array(y_train[:,0])
    fights_2 = np.array(y_train[:,1])
    fights_3 = np.array(y_train[:,2])
    fights_4 = np.array(y_train[:,3])
    fights_5 = np.array(y_train[:,4])
    
    y_train = [fights_1, fights_2, fights_3, fights_4, fights_5]
    
    return y_train

In [134]:
def encode_all_labels():
    encoded_labels = []
    for row in labels:
        new_labels_list = []
        for elem in row:
            if elem:
                new_labels_list.append(1)
            else:
                new_labels_list.append(0)
        encoded_labels.append(new_labels_list)
    encoded_labels = np.array(encoded_labels)
    
    return encoded_labels

## Preprocess Features

## Generates np.array of shape (3220, 5 * features)

In [170]:
from sklearn.utils import shuffle

def collapse_n_fights():
    num_cols, cat_cols = get_column_types()
    
    # Collect only the features we want
    new_features = []
    for window in features:
        window_arr = list(window.reshape(1,-1))[0]
        fight_window = np.array([])
        for fight in window_arr:
            fight_arr = np.array([fight[i] for i in num_cols])
            np.nan_to_num(fight_arr, copy=False)
            fight_window = np.append(fight_window, fight_arr)
        new_features.append(fight_window)
    
    new_features, new_labels = shuffle(np.array(new_features), labels)
    X_train, X_test, y_train, y_test = train_test_split(new_features, new_labels, random_state=0, train_size=0.95)
    
    # Scale the data
    scaler = StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    
    print("X_train length {}, y_train length {}".format(X_train.shape, y_train.shape))
    print("X_test length {}, y_test length {}".format(X_test.shape, y_test.shape))
    
    return X_train, X_test, y_train, y_test

## Building a CNN to predict the winner using last n fights

In [258]:
# Get the data
X_train, X_test, y_train, y_test = collapse_n_fights()
X_train = X_train.reshape(X_train.shape[0], 10, 85)
X_test = X_test.reshape(X_test.shape[0], 10, 85)

print("New: X_train length {}, y_train length {}".format(X_train.shape, y_train.shape))
print("New: X_test length {}, y_test length {}".format(X_test.shape, y_test.shape))

X_train length (570, 850), y_train length (570, 5)
X_test length (30, 850), y_test length (30, 5)
New: X_train length (570, 10, 85), y_train length (570, 5)
New: X_test length (30, 10, 85), y_test length (30, 5)


In [259]:
y_train = np.array(convert_labels(y_train))
y_test = np.array(convert_labels(y_test))

In [266]:
y_train[0].shape

(570,)

In [254]:
print(y_train.shape)

(570, 5)


In [255]:
# Define hyperparameters to use in Grid Search
cnn_params = {'lr': [0.01, 0.1, 1],
     'num_filters': [64, 128],
     'kernel_size': [2],
     'batch_size': [64, 128],
     'epochs': [5, 15],
     'dropout': [0.01],
     'flatten_layer': [100, 150], 
     'optimizer': [Adam],
     'losses': [binary_crossentropy],
     'activation': [relu],
     'last_activation': [sigmoid]}

In [267]:
model = get_ensemble()

Model: "model_29"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Input (InputLayer)              (None, 10, 85)       0                                            
__________________________________________________________________________________________________
Conv_1D (Conv1D)                (None, 9, 128)       21888       Input[0][0]                      
__________________________________________________________________________________________________
Dropout (Dropout)               (None, 9, 128)       0           Conv_1D[0][0]                    
__________________________________________________________________________________________________
Flatten (Flatten)               (None, 1152)         0           Dropout[0][0]                    
___________________________________________________________________________________________

In [269]:
history = model.fit(X_train,
                    {'Fight_1': y_train[0], 
                     'Fight_2': y_train[1], 
                     'Fight_3': y_train[2], 
                     'Fight_4': y_train[3],
                     'Fight_5': y_train[4],},
                   batch_size=32,
                   epochs=10,
                   verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [275]:
# predictions = model.predict_classes(X_test)
y_prob = model.predict(X_test) 

In [278]:
np.array(y_prob).shape

(5, 30, 1)

In [None]:
top_10_val_cnn = results_df.sort_values(by=['val_accuracy'], ascending=False).head(10)
top_10_acc_cnn = results_df.sort_values(by=['accuracy'], ascending=False).head(10)

In [None]:
top_10_val_cnn

In [None]:
top_10_acc_cnn

In [None]:
top_10_cnn.to_csv("../model_results/UFC_Predict_5_Fights_CNN_2.csv")

# Model Builders

In [23]:
# Model imports
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Conv1D, Flatten, MaxPooling1D, LSTM, Input
from keras.optimizers import Adam, Nadam, RMSprop
from keras.losses import logcosh, binary_crossentropy
from keras.activations import relu, elu, sigmoid

### Analyzing last n fights using a Convolutional Neural Network

### Build a multi-output model
### Bottom is a CNN, Top is 5 DNNs

In [218]:
def get_ensemble():
    # x_train, y_train, x_val, y_val, params, test_model=False
    fight_input = Input(shape=(10, 85), name="Input")
    x = Conv1D(filters=128, kernel_size=2, activation='relu', name="Conv_1D")(fight_input)
    x = Dropout(0.01, name="Dropout")(x)
    x = Flatten(name="Flatten")(x)

    # Global Dense Network
    x = Dense(128, activation='relu', name="Global_Dense")(x)

    # 5 output layers
    fight_1 = Dense(1, activation='sigmoid', name="Fight_1")(x)
    fight_2 = Dense(1, activation='sigmoid', name="Fight_2")(x)
    fight_3 = Dense(1, activation='sigmoid', name="Fight_3")(x)
    fight_4 = Dense(1, activation='sigmoid', name="Fight_4")(x)
    fight_5 = Dense(1, activation='sigmoid', name="Fight_5")(x)

    model = Model(inputs=fight_input,
                  outputs=[fight_1, fight_2, fight_3, fight_4, fight_5])
    
    model.compile(optimizer='rmsprop',
                 loss={"Fight_1": "binary_crossentropy", 
                       "Fight_2": "binary_crossentropy",
                       "Fight_3": "binary_crossentropy",
                       "Fight_4": "binary_crossentropy",
                       "Fight_5": "binary_crossentropy"},
                 loss_weights={"Fight_1": 1., 
                               "Fight_2": 1.,
                               "Fight_3": 1.,
                               "Fight_4": 1.,
                               "Fight_5": 1.},
                 metrics={"Fight_1": "accuracy", 
                          "Fight_2": "accuracy",
                          "Fight_3": "accuracy",
                          "Fight_4": "accuracy",
                          "Fight_5": "accuracy"})

    model.summary()
    return model

In [None]:
def get_cnn(x_train, y_train, x_val, y_val, params, test_model=False):
    model = Sequential()
    
    # Convolutional Layers
    model.add(Conv1D(filters=params["num_filters"], kernel_size=params["kernel_size"], activation='relu', input_shape=(10, 85)))
    model.add(Conv1D(filters=params["num_filters"], kernel_size=params["kernel_size"], activation='relu'))
    model.add(Dropout(params['dropout']))
#     model.add(MaxPooling1D(pool_size=2))
    
    # Flatten Layers
    model.add(Flatten())
    model.add(Dense(params["flatten_layer"], activation='relu'))
#     model.add(Dense(params["flatten_layer"], activation='relu'))
    
    # Output Layer
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss=params['losses'],
                  optimizer=params['optimizer'](lr=lr_normalizer(params['lr'], params['optimizer'])), 
                  metrics=['accuracy'])
    
    history = model.fit(x_train, y_train,
                       validation_data=[x_val, y_val],
                       batch_size=params['batch_size'],
                       epochs=params['epochs'],
                       verbose=0)
    
    if test_model:
        plot_training_results(history)
        model.summary()

    return history, model

## Helper Functions

In [None]:
def plot_training_results(history_obj):
    acc = history_obj.history['accuracy']
    val_acc = history_obj.history['val_accuracy']
    loss = history_obj.history['loss']
    val_loss = history_obj.history['val_loss']
    
    epochs = range(1, len(acc) + 1)

    fig, (accuracy_axis, loss_axis) = plt.subplots(1, 2, figsize=(20, 5))

    accuracy_axis.plot(epochs, acc, 'bo', label='Training acc')
    accuracy_axis.plot(epochs, val_acc, 'b', label='Validation acc')
    accuracy_axis.set_title('Training and validation accuracy')
    accuracy_axis.set_ylabel('Accuracy')
    accuracy_axis.set_xlabel('Epoch')
    accuracy_axis.legend()

    loss_axis.plot(epochs, loss, 'bo', label='Training loss')
    loss_axis.plot(epochs, val_loss, 'b', label='Validation loss')
    loss_axis.set_title('Training and validation loss')
    loss_axis.set_ylim(0, 3)
    loss_axis.set_ylabel('Loss')
    loss_axis.set_xlabel('Epoch')
    loss_axis.legend()
    fig.show()

## Extract numerical and categorical columns

In [12]:
# Get the first fight window and the first fight in that window
def get_column_types():
    num_cols = []
    cat_cols = []
    for fight in features[0][0]:
        feature_type = type(features[1][1][fight])
        if feature_type is not float and feature_type is not int:
            cat_cols.append(fight)
        else:
            num_cols.append(fight)
            
    return num_cols, cat_cols

In [13]:
x, y = get_column_types()