In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, f1_score

from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE

from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

import matplotlib.pyplot as plt

In [4]:
data = pd.read_csv('./csv/df_all_fights.csv')
data.columns = data.columns.str.strip()

num_cols = [
    'strike_accuracy_diff',
    'total_strike_accuracy_diff',
    'knockdowns_diff',
    'takedown_accuracy_diff',
    'takedowns_diff',
    'submission_attempts_diff'
]
cat_cols = ['weight_class', 'method']

data_filtered = data[num_cols + cat_cols].dropna()

X1 = data_filtered.copy()
y1 = np.ones(len(X1))

X0 = data_filtered.copy()
X0[num_cols] = -X0[num_cols]
y0 = np.zeros(len(X0))

X_all = pd.concat([X1, X0], axis=0).reset_index(drop=True)
y_all = np.concatenate([y1, y0])


In [6]:
preprocess = ColumnTransformer([
    ('scale', StandardScaler(), num_cols),
    ('onehot', OneHotEncoder(handle_unknown='ignore'), cat_cols)
])

models_to_test = {
    'NB': GaussianNB(),
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'RF': RandomForestClassifier(n_estimators=100, random_state=42),
    'LR': LogisticRegression(max_iter=1000, random_state=42)
}

In [8]:
plt.figure(figsize=(8, 5))
bars = plt.bar(model_scores.keys(), model_scores.values())
plt.ylim(0, 1)
plt.title("F1 Scores for Each Model")
plt.ylabel("F1 Score")

for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval + 0.02, f"{yval:.2f}", ha='center')

plt.tight_layout()
plt.show()


NameError: name 'model_scores' is not defined

<Figure size 800x500 with 0 Axes>

In [10]:
import pandas as pd
import numpy as np

from sklearn.model_selection import cross_val_predict
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.metrics import classification_report, brier_score_loss
import matplotlib.pyplot as plt


In [12]:
df = pd.read_csv('df_all_fights.csv')
df.columns = df.columns.str.strip()

num_cols = [
    'strike_accuracy_diff',
    'total_strike_accuracy_diff',
    'knockdowns_diff',
    'takedown_accuracy_diff',
    'takedowns_diff',
    'submission_attempts_diff'
]

cat_cols = ['weight_class', 'method']

base_feats = df[num_cols + cat_cols].dropna()

y_win = np.ones(len(base_feats), dtype=int)

flipped_feats = base_feats.copy()
flipped_feats[num_cols] = -flipped_feats[num_cols]

y_lose = np.zeros(len(flipped_feats), dtype=int)

X_total = pd.concat([base_feats, flipped_feats], ignore_index=True)
y_total = np.concatenate([y_win, y_lose])

FileNotFoundError: [Errno 2] No such file or directory: 'df_all_fights.csv'

In [14]:
feat_transform = ColumnTransformer([
    ('std', StandardScaler(), num_cols),
    ('onehot', OneHotEncoder(handle_unknown='ignore'), cat_cols)
])

pipe_rf = Pipeline([
    ('xform', feat_transform),
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42))
])

cal_rf = CalibratedClassifierCV(estimator=pipe_rf, method='sigmoid', cv=5)

In [None]:
prob_preds = cross_val_predict(cal_rf, X_total, y_total, cv=5, method='predict_proba')[:, 1]
final_preds = (prob_preds >= 0.5).astype(int)

print("== Random Forest with Calibration ==")
print(classification_report(y_total, final_preds))
print("Brier Score:", brier_score_loss(y_total, prob_preds))


In [None]:
cal_y, cal_x = calibration_curve(y_total, prob_preds, n_bins=10)

plt.figure(figsize=(6, 6))
plt.plot(cal_x, cal_y, 'o-', label='RF Calibrated')
plt.plot([0, 1], [0, 1], '--', color='gray', label='Ideal')
plt.title("Calibrated Probability Curve")
plt.xlabel("Predicted Win Probability")
plt.ylabel("True Win Rate")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()