In [77]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder

In [78]:
df_original = pd.read_csv('../CSV Files/df_ufc_masters_w_reversed.csv')

In [79]:
df = df_original.copy()

In [80]:
df['finish'].value_counts()

finish
U-DEC         3556
KO/TKO        2990
SUB           1706
S-DEC          964
M-DEC           64
DQ              28
Overturned       4
Name: count, dtype: int64

In [81]:
df = df.dropna(subset=['finish'])
df = df[~df['finish'].isin(['DQ', 'Overturned'])]

In [82]:
finish_mapping = {
    'U-DEC': 'Decision',
    'S-DEC': 'Decision',
    'M-DEC': 'Decision',
    'KO/TKO': 'KO',
    'SUB': 'Submission'
}
df['finish'] = df['finish'].map(finish_mapping)


In [83]:
df_encoded = pd.get_dummies(df['finish'], prefix='finish')
df_encoded = df_encoded.astype(int)
df_encoded['finish_type'] = df_encoded['finish_Finish']
df_encoded = df_encoded.drop(['finish_Decision', 'finish_Finish'], axis=1)


In [84]:
#finish_mapping = {'finish_Submission': 0,'finish_KO': 1,'finish_Decision': 2}
#df_encoded['finish_type'] = df_encoded[['finish_Submission', 'finish_KO', 'finish_Decision']].idxmax(axis=1).map(finish_mapping)

In [85]:
df_result =pd.concat([df, df_encoded], axis=1)

In [86]:
df_result['B_win_by_Decision'] = df_result['B_win_by_Decision_Split'] + df_result['B_win_by_Decision_Majority'] + df_result['B_win_by_Decision_Unanimous']
df_result['R_win_by_Decision'] = df_result['R_win_by_Decision_Split'] + df_result['R_win_by_Decision_Majority'] + df_result['R_win_by_Decision_Unanimous']

features= ['B_avg_SIG_STR_landed',
    'B_avg_SIG_STR_pct',
    'B_avg_TD_pct',
    'R_avg_SIG_STR_pct',
    'R_avg_TD_pct',
    'B_avg_SUB_ATT',
    'B_avg_TD_landed',
    'R_avg_SIG_STR_landed',
    'R_avg_SUB_ATT',
    'R_avg_TD_landed',
    'B_Height_cms',
    'B_Reach_cms',
    'R_Height_cms',
    'R_Reach_cms', 
    'R_age',
    'B_age']
#features=['B_win_by_Decision', 'R_win_by_Decision','B_win_by_KO/TKO','R_win_by_KO/TKO','B_win_by_Submission', 'R_win_by_Submission']

In [87]:
# List of columns to apply np.log
columns_to_log = ['B_avg_SIG_STR_landed', 'B_avg_SUB_ATT', 'B_avg_TD_landed', 
                  'R_avg_SIG_STR_landed', 'R_avg_SUB_ATT', 'R_avg_TD_landed']

# Applying np.log to each column in the list
for col in columns_to_log:
    # Adding a small constant to avoid log(0) which is undefined
    df[col] = np.log(df[col] + 1e-5)

In [88]:
 #'B_win_by_Decision', 'R_win_by_Decision','B_win_by_KO/TKO','R_win_by_KO/TKO','B_win_by_Submission', 'R_win_by_Submission'

In [89]:
train_end_date = '2020-09-05'
test_start_date = '2020-09-06'
df_train = df_result[(df_result['date'] <= train_end_date)]
df_test = df_result[(df_result['date'] >= test_start_date)]

In [90]:
X_train =df_train[features]
y_train= df_train['finish_type']
X_test =df_test[features]
y_test= df_test['finish_type']

In [91]:
sScaler = StandardScaler()
X_train_scaled = sScaler.fit_transform(X_train)
X_test_scaled = sScaler.transform(X_test)

In [92]:
random_forest_model = RandomForestClassifier(n_estimators=50,max_depth=3,min_samples_split=5, random_state=42)
random_forest_model.fit(X_train_scaled, y_train)

In [93]:
y_pred_percent= random_forest_model.predict_proba(X_test_scaled)
y_pred_percent

array([[0.33338112, 0.66661888],
       [0.45944771, 0.54055229],
       [0.40101156, 0.59898844],
       ...,
       [0.41312218, 0.58687782],
       [0.43815675, 0.56184325],
       [0.56037321, 0.43962679]])

In [98]:
y_pred_proba_df = pd.DataFrame(y_pred_percent, columns=['Probability_Decision', 'Probability_Finish'])

In [95]:
y_pred_test = random_forest_model.predict(X_test_scaled)
y_pred_train = random_forest_model.predict(X_train_scaled)

In [96]:
accuracy = accuracy_score(y_train, y_pred_train)
conf_matrix = confusion_matrix(y_train, y_pred_train)

print(f"Accuracy: {accuracy:.4f}")
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.5792
Confusion Matrix:
 [[2472 1586]
 [1874 2290]]


In [97]:
accuracy = accuracy_score(y_test, y_pred_test)
conf_matrix = confusion_matrix(y_test, y_pred_test)

print(f"Accuracy: {accuracy:.4f}")
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.5406
Confusion Matrix:
 [[291 235]
 [251 281]]
