In [None]:
# some_file.py
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '../combined_data')
sys.path.insert(1, '../predict_winner')
from make_career import make_career
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras import models, layers
import matplotlib.pyplot as plt

# In this Notebook we are attempting to predict if a person will win their next fight depending on how they performed on their last 5 fights

## Loading the dataset

In [None]:
features, labels = make_career(pd.read_csv('../combined_data/combined_fight_data_zerod_nans.csv'))
labels = labels.reshape(-1,)

In [None]:
print(features.shape)

In [None]:
print(labels.shape)

## Collapse each row
## Generates np.array of shape (3220, 5 * features)

In [None]:
def collapse_n_fights():
    num_cols, cat_cols = get_column_types()
    
    new_features = []
    for window in features:
        window_arr = list(window.reshape(1,-1))[0]
        fight_window = np.array([])
        for fight in window_arr:
            fight_arr = np.array([fight[i] for i in num_cols])
            np.nan_to_num(fight_arr, copy=False)
            fight_window = np.append(fight_window, fight_arr)
        new_features.append(fight_window)
    
    new_features = np.array(new_features)
    X_train, X_test, y_train, y_test = train_test_split(new_features, labels, random_state=0, train_size=0.80)
    print("X_train length {}, y_train length {}".format(X_train.shape, y_train.shape))
    print("X_test length {}, y_test length {}".format(X_test.shape, y_test.shape))
    
    return X_train, X_test, y_train, y_test

## Generate np.array of shape (3220, 5, features)

In [None]:
def n_fights_to_array():
    num_cols, cat_cols = get_column_types()
    
    new_features = []
    for window in features:
        window_arr = list(window.reshape(1,-1))[0]
        fight_window = []
        for fight in window_arr:
            fight_arr = np.array([fight[i] for i in num_cols])
            np.nan_to_num(fight_arr, copy=False)
            fight_window.append(fight_arr)
        new_features.append(fight_window)
    
    new_features = np.array(new_features)
    X_train, X_test, y_train, y_test = train_test_split(new_features, labels, random_state=0, train_size=0.80)
    print("X_train length {}, y_train length {}".format(X_train.shape, y_train.shape))
    print("X_test length {}, y_test length {}".format(X_test.shape, y_test.shape))

    return X_train, X_test, y_train, y_test

## Building a DNN to predict the winner using last n fights

In [None]:
# Get the data
X_train, X_test, y_train, y_test = collapse_n_fights()

In [None]:
# Create the Neural Network
model = get_dnn()

In [None]:
# Train Neural Network
history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_test, y_test))

In [None]:
# Plot it's accuracy
plot_training_results(history)

## Building a CNN to predict the winner using last n fights

In [None]:
# Get the data
X_train, X_test, y_train, y_test = n_fights_to_array()

In [None]:
# Create the Neural Network
model = get_cnn()

In [None]:
# Train the Neural Network

In [None]:
# Plot it's accuracy

# Models

Dense Neural Network

In [None]:
def get_dnn():
    model = models.Sequential()
    model.add(layers.Dense(128, activation='relu', input_shape=(425,)))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.summary()

    return model

In [None]:
# Find optimal parameters here

Convolutional Neural Network

In [None]:
def get_cnn():
    model = models.Sequential()
    model.add(layers.Conv1D(filters=32, kernel_size=85, strides=85, activation='relu', input_shape=(425,)))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.summary()

    return model

In [None]:
# Find optimal parameters here

## Helper Functions

In [None]:
def plot_training_results(history_obj):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(1, len(acc) + 1)

    fig, (accuracy_axis, loss_axis) = plt.subplots(1, 2, figsize=(15, 5))

    accuracy_axis.plot(epochs, acc, 'bo', label='Training acc')
    accuracy_axis.plot(epochs, val_acc, 'b', label='Validation acc')
    accuracy_axis.set_title('Training and validation accuracy')
    accuracy_axis.set_ylabel('Accuracy')
    accuracy_axis.set_xlabel('Epoch')
    accuracy_axis.legend()

    loss_axis.plot(epochs, loss, 'bo', label='Training loss')
    loss_axis.plot(epochs, val_loss, 'b', label='Validation loss')
    loss_axis.set_title('Training and validation loss')
    loss_axis.set_ylim(0, 3)
    loss_axis.set_ylabel('Accuracy')
    loss_axis.set_xlabel('Epoch')
    loss_axis.legend()
    fig.show()

## Extract numerical and categorical columns

In [None]:
# Get the first fight window and the first fight in that window
def get_column_types():
    num_cols = []
    cat_cols = []
    for fight in features[0][0]:
        feature_type = type(features[1][1][fight])
        if feature_type is not float and feature_type is not int:
            cat_cols.append(fight)
        else:
            num_cols.append(fight)
            
    return num_cols, cat_cols