In [0]:
%matplotlib inline
from google.colab import drive
drive.mount('/content/drive')

import sys
!{sys.executable} -m pip install neupy


Loading data

In [0]:
import pandas as pd

pd.set_option('display.max_columns', None)

df = pd.read_csv("/content/drive/My Drive/AI/MOW/UFC_fight_predictions/ufc_data/data.csv")


Data type

In [0]:
df.info(verbose=True)

Static data analyze

In [0]:
df.describe()

Amount of data nulls

In [0]:
df.isnull().sum()

Histogram plots

In [0]:
df.select_dtypes(exclude=['object']).plot.hist(subplots=True, layout=(135,1), figsize=(100, 100), bins=100)

Filling null values

In [0]:
df.fillna(method='ffill', inplace=True)
df.isnull().sum()

Changing date to number columns

In [0]:
df['date']= pd.to_datetime(df['date'])

df['year'] = [d.year for d in df['date']]
df['month'] = [d.month for d in df['date']]
df['day'] = [d.day for d in df['date']]

In [0]:
df = df.drop('date', 1)
df.head()

Changing categoric data to value

In [0]:
from sklearn import preprocessing

columns = list(df.select_dtypes(include=['object']).columns)
for column in columns:
  le = preprocessing.LabelEncoder()
  le.fit(df[column])
  df[column] = le.transform(df[column])

df.head()



Data normalization to (0;1)

In [0]:
columns = list(df.columns)
for column in columns:
  x = df[[column]].values.astype(float)
  min_max_scaler =  preprocessing.MinMaxScaler()
  x_scaled = min_max_scaler.fit_transform(x)
  df[[column]] = pd.DataFrame(x_scaled)


df.head()


New histograms after normalization

In [0]:
df.plot.hist(subplots=True, layout=(147,1), figsize=(100, 100), bins = 100)


Genetic Algorithm - Feature selection

In [0]:
from sklearn.feature_selection import SelectKBest, f_regression

y = df['Winner']
X = df.drop(columns="Winner")


selector = SelectKBest(f_regression ,k=100)
selector = selector.fit(X, y)
cols = selector.get_support(indices=True)
df_new = df.iloc[:,cols]

df_new.info()


In [0]:
df_new.head()

Column reduction using PCA

In [0]:
from sklearn.decomposition import PCA
pca = PCA(.97)
df_new_components = pca.fit_transform(df)

pcaDf = pd.DataFrame(data = df_new_components)
pcaDf.head()


Cross-validation for 10 element groups. 9 - train, 1 - validation.



In [0]:
from sklearn.model_selection import train_test_split, cross_validate
X_train, X_test, y_train, y_test = train_test_split(pcaDf, y, test_size = 0.1, random_state = 42)

Changing data from contiunos, to int (as 0 or 1)

In [0]:
y_train = y_train.astype(int)
y_test = y_test.astype(int)

#SVC
Training and prediction

In [0]:
from sklearn import svm
clv = svm.NuSVC()

In [0]:
clv.fit(X_train, y_train )
pred_clv = clv.predict(X_test)

Cross validation

In [0]:
scores = cross_validate(clv, X_train, y_train, cv=10, scoring='f1_macro')

In [0]:
print(scores)

Metrics

In [0]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [0]:
print(classification_report(y_test, pred_clv))
print(confusion_matrix(y_test, pred_clv))

#Neural Network

In [0]:
from sklearn.neural_network import MLPClassifier
mlpc = MLPClassifier(hidden_layer_sizes=(11, 11, 11), max_iter = 500)

In [0]:
mlpc.fit(X_train, y_train)
mlpc_predict = mlpc.predict(X_test)

In [0]:
print(classification_report(y_test, mlpc_predict))
print(confusion_matrix(y_test, mlpc_predict))

#PNN

In [0]:
from neupy import algorithms
pnn = algorithms.PNN(std=10, verbose=False)
pnn.train(X_train, y_train)

In [0]:
pnn_pred = pnn.predict(X_test)

In [0]:
print(classification_report(y_test, pnn_pred))
print(confusion_matrix(y_test, mlpc_predict))

#GA Tuning
###We already reached 100% Accuracy, in consequence GA parameter tuning is redundant :)