In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from keras.models import Sequential
from keras.layers import Dense, Dropout, InputLayer

Using TensorFlow backend.


In [2]:
# Loading the data
ufc_data = pd.read_csv("UFC-data/ufc-fights-model.csv")
ufc_data = ufc_data.drop(["Unnamed: 0"], axis=1)

# Seeing if the data set is usable
mcg_fights = pd.concat([ufc_data.loc[ufc_data["Name"] == "Conor McGregor"], ufc_data.loc[ufc_data["Name.1"] == "Conor McGregor"]])
mcg_fights

# Which it is :)

Unnamed: 0,Name,Height,Weight,Reach,Stance,DOB,SLpm,SAcc,SApm,StrDef,...,SLpm.1,SAcc.1,SApm.1,StrDef.1,TDAvg.1,TDAcc.1,TDDef.1,SubAvg.1,AvgTime2Win.1,Win
973,Conor McGregor,1.7526,155,74,Southpaw,1988,5.27,0.48,4.43,0.55,...,4.32,0.41,4.39,0.55,2.92,0.36,0.92,0.5,10.11,1
1042,Conor McGregor,1.7526,155,74,Southpaw,1988,5.27,0.48,4.43,0.55,...,4.62,0.45,3.78,0.53,1.19,0.3,0.42,1.4,8.24,1
1254,Conor McGregor,1.7526,155,74,Southpaw,1988,5.27,0.48,4.43,0.55,...,3.47,0.44,3.18,0.65,0.64,0.65,0.91,0.1,13.61,1
1380,Conor McGregor,1.7526,155,74,Southpaw,1988,5.27,0.48,4.43,0.55,...,2.78,0.49,2.35,0.67,4.13,0.55,1.0,0.3,8.61,1
1544,Conor McGregor,1.7526,155,74,Southpaw,1988,5.27,0.48,4.43,0.55,...,3.87,0.32,2.67,0.65,0.87,0.32,0.65,0.3,11.72,1
393,Khabib Nurmagomedov,1.778,155,70,Orthodox,1988,4.11,0.49,1.65,0.67,...,5.27,0.48,4.43,0.55,0.75,0.62,0.7,0.0,8.01,1
1186,Nate Diaz,1.8288,170,76,Southpaw,1985,4.62,0.45,3.78,0.53,...,5.27,0.48,4.43,0.55,0.75,0.62,0.7,0.0,8.01,1
1643,Dustin Poirier,1.7526,155,72,Southpaw,1989,5.51,0.49,3.97,0.56,...,5.27,0.48,4.43,0.55,0.75,0.62,0.7,0.0,8.01,0
2036,Max Holloway,1.8034,145,69,Orthodox,1991,6.66,0.44,4.45,0.62,...,5.27,0.48,4.43,0.55,0.75,0.62,0.7,0.0,8.01,0


In [None]:
# Apply categorical labeling with hot 1 encoding
# There are 4 stances, so the labels will be an array of 4 items

stance_open_0 = []
stance_orthodox_0 = []
stance_southpaw_0 = []
stance_switch_0 = []

stance_open_1 = []
stance_orthodox_1 = []
stance_southpaw_1 = []
stance_switch_1 = []

for i, row in ufc_data.iterrows():
    if row["Stance"] == "Open Sta":
        stance_open_0.append(1)
        stance_orthodox_0.append(0)
        stance_southpaw_0.append(0)
        stance_switch_0.append(0)
    elif row["Stance"] == "Orthodox":
        stance_open_0.append(0)
        stance_orthodox_0.append(1)
        stance_southpaw_0.append(0)
        stance_switch_0.append(0)
    elif row["Stance"] == "Southpaw":
        stance_open_0.append(0)
        stance_orthodox_0.append(0)
        stance_southpaw_0.append(1)
        stance_switch_0.append(0)
    elif row["Stance"] == "Switch":
        stance_open_0.append(0)
        stance_orthodox_0.append(0)
        stance_southpaw_0.append(0)
        stance_switch_0.append(1)

    if row["Stance.1"] == "Open Sta":
        stance_open_1.append(1)
        stance_orthodox_1.append(0)
        stance_southpaw_1.append(0)
        stance_switch_1.append(0)
    elif row["Stance.1"] == "Orthodox":
        stance_open_1.append(0)
        stance_orthodox_1.append(1)
        stance_southpaw_1.append(0)
        stance_switch_1.append(0)
    elif row["Stance.1"] == "Southpaw":
        stance_open_1.append(0)
        stance_orthodox_1.append(0)
        stance_southpaw_1.append(1)
        stance_switch_1.append(0)
    elif row["Stance.1"] == "Switch":
        stance_open_1.append(0)
        stance_orthodox_1.append(0)
        stance_southpaw_1.append(0)
        stance_switch_1.append(1)

ufc_data["stance_open_0"] = stance_open_0
ufc_data["stance_orthodox_0"] = stance_orthodox_0
ufc_data["stance_southpaw_0"] = stance_southpaw_0
ufc_data["stance_switch_0"] = stance_switch_0

ufc_data["stance_open_1"] = stance_open_1
ufc_data["stance_orthodox_1"] = stance_orthodox_1
ufc_data["stance_southpaw_1"] = stance_southpaw_1
ufc_data["stance_switch_1"] = stance_switch_1

In [None]:
# Normalizing the data
columns_to_normalize = ufc_data.columns
columns_to_normalize = columns_to_normalize.drop(["Name", "Stance", "DOB", "Name.1", "Stance.1", "DOB.1", "Win"])
print(columns_to_normalize)

ufc_data[columns_to_normalize] = ufc_data[columns_to_normalize].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

In [None]:
x = ufc_data[columns_to_normalize].to_numpy()
y = ufc_data["Win"].to_numpy().reshape((-1, 1))

x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.25, random_state=2020)

print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
# Building the model
model = Sequential()
model.add(InputLayer(input_shape=(32,)))
model.add(Dense(128, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(1, activation="sigmoid"))

In [None]:
# Compiling the model
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
arr_metrics = model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test, y_test))

In [None]:
evaluation = model.evaluate(x_test, y_test)

In [None]:
model.save("models/attempt0.h5")

print(arr_metrics.history.keys())

In [None]:
plt.plot(arr_metrics.history["accuracy"])
plt.plot(arr_metrics.history["val_accuracy"])
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Train", "Val"])
plt.show()

plt.plot(arr_metrics.history["loss"])
plt.plot(arr_metrics.history["val_loss"])
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["Traing", "Val"])
plt.show()