In [49]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np

In [50]:
df = pd.read_csv('../CSV Files/df_ufc_masters_w_reversed.csv')

In [51]:
features = [
    'B_avg_SIG_STR_landed',
    'B_avg_SIG_STR_pct',
    'B_avg_SUB_ATT',
    'B_avg_TD_landed',
    'B_avg_TD_pct',
    'R_avg_SIG_STR_landed',
    'R_avg_SIG_STR_pct',
    'R_avg_SUB_ATT',
    'R_avg_TD_landed',
    'R_avg_TD_pct',
    'B_Height_cms',
    'B_Reach_cms',
    'R_Height_cms',
    'R_Reach_cms',
    'R_age',
    'B_age'
]

# List of columns to apply np.log
columns_to_log = ['B_avg_SIG_STR_landed', 'B_avg_SUB_ATT', 'B_avg_TD_landed', 
                  'R_avg_SIG_STR_landed', 'R_avg_SUB_ATT', 'R_avg_TD_landed']

# Applying np.log to each column in the list
for col in columns_to_log:
    # Adding a small constant to avoid log(0) which is undefined
    df[col] = np.log(df[col] + 1e-5)



In [52]:
X =df[features]
y= df['Winner']

In [53]:
y_encoded = y.apply(lambda x: 1 if x == 'Red' else 0)

In [54]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [55]:
sScaler = StandardScaler()
X_train_scaled = sScaler.fit_transform(X_train)
X_test_scaled = sScaler.transform(X_test)

In [56]:
naive_bayes_model = GaussianNB()
naive_bayes_model.fit(X_train_scaled, y_train)

In [57]:
y_pred_proba= naive_bayes_model.predict_proba(X_test_scaled)

In [58]:
y_pred_percent = y_pred_proba * 100
y_pred_percent[:5]

array([[42.01371984, 57.98628016],
       [44.94222666, 55.05777334],
       [42.44770113, 57.55229887],
       [73.73256799, 26.26743201],
       [55.62035037, 44.37964963]])

In [59]:
y_pred = naive_bayes_model.predict(X_test_scaled)

In [60]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

Accuracy: 0.5603
Confusion Matrix:
 [[558 407]
 [454 539]]
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.58      0.56       965
           1       0.57      0.54      0.56       993

    accuracy                           0.56      1958
   macro avg       0.56      0.56      0.56      1958
weighted avg       0.56      0.56      0.56      1958

