## Goal:  Evaluate predictive performance, calibration, and failure of the UFC fight outcome model

Import libraries

In [14]:
import pandas as pd
import numpy as np
import joblib

Import model and dataset

In [21]:
bundle = joblib.load('models\\ufc_lr_pipeline_2026-01-03_130730.joblib')
pipe = bundle['model']
features_col = bundle['feature_cols']
type(pipe)

df = pd.read_csv('data_processed\\model_data\\fight_model_v2b.csv')
df["event_date"] = pd.to_datetime(df["event_date"], errors="coerce")
df = df.sort_values("event_date")

Build training and test set

In [39]:
cutoff_date = pd.Timestamp("2022-01-01")
train_mask = df['event_date'] <= cutoff_date
test_mask = df['event_date'] > cutoff_date

df_train = df[train_mask].copy()
df_test = df[test_mask].copy()

y_train = df_train['red_win'].astype(int)
y_test = df_test['red_win'].astype(int)

X_train = df_train[bundle["feature_cols"]].copy()
train_means = X_train.mean(numeric_only=True)

X_test = df_test[bundle["feature_cols"]].copy()
X_test = X_test.fillna(train_means)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(6405, 33) (6405,)
(2068, 33) (2068,)


Analysis of logistic regression

In [52]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss

test_proba = pipe.predict_proba(X_test)[:, 1]
print("Accuracy:", accuracy_score(y_test, (test_proba >= 0.5)))
print("Log loss:", log_loss(y_test, test_proba))

model_log_loss = log_loss(y_test, test_proba)




Accuracy: 0.5720502901353965
Log loss: 0.6720874293744729


dumb model

In [60]:

from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, log_loss

dummy = DummyClassifier(strategy="uniform", random_state=42)
dummy.fit(X_train, y_train)

pred = dummy.predict(X_test)
proba = dummy.predict_proba(X_test)

print("dummy accuracy:", accuracy_score(y_test, pred))
print("dummy log loss:", log_loss(y_test, proba))
baseline_log_loss = log_loss(y_test, proba)
print("Δ log loss:", baseline_log_loss - model_log_loss)

dummy accuracy: 0.5091876208897486
dummy log loss: 0.6931471805599452
Δ log loss: 0.02105975118547232


experience model

In [59]:
diff = df_test['diff_prior_fights']
exp_pred = (diff > 0).astype(int) 
print("Experience baseline accuracy:", accuracy_score(y_test, exp_pred))

Experience baseline accuracy: 0.47050290135396516
