In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [2]:
df = pd.read_csv("boxing_data.csv")

df["ReachDiff"] = df["ReachA"] - df["ReachB"]
df["AgeDiff"] = df["AgeA"] - df["AgeB"]
df["HeightDiff"] = df["HeightA"] - df["HeightB"]
df["WeightDiff"] = df["WeightA"] - df["WeightB"]
df["KODiff"] = df["KOpercA"] - df["KOpercB"]
df["WinDiff"] = df["WinsA"] - df["WinsB"]

X = df[["ReachDiff", "AgeDiff", "HeightDiff", "WeightDiff", "KODiff", "WinDiff"]]
y = df["Result"]   # 1 = FighterA wins, 0 = FighterB wins

KeyError: 'KOpercA'

In [3]:
model = make_pipeline(
    StandardScaler(),
    RandomForestClassifier(
        n_estimators=200,  #number of trees
        max_depth=None,    #let it grow fully
        random_state=42,   
        class_weight="balanced"   #handle imbalance
    )
)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model.fit(X_train, y_train)

print("Train accuracy:", model.score(X_train, y_train))
print("Train accuracy:", model.score(X_test, y_test))

Train accuracy: 1.0
Train accuracy: 0.7868852459016393


In [4]:
fighters_stats = {}

for _, row in df.iterrows():
    fighters_stats[row["FighterA"]] = {
        "Reach": row["ReachA"],
        "Age": row["AgeA"],
        "Height": row["HeightA"],
        "Weight": row["WeightA"],
        "KO%": row["KOpercA"],
        "Wins": row["WinsA"]
    }
    fighters_stats[row["FighterB"]] = {
        "Reach": row["ReachB"],
        "Age": row["AgeB"],
        "Height": row["HeightB"],
        "Weight": row["WeightB"],
        "KO%": row["KOpercB"],
        "Wins": row["WinsB"]
    }

In [5]:
def predict_fight(fighterA, fighterB, model, fighters_stats):
    statsA = fighters_stats[fighterA]
    statsB = fighters_stats[fighterB]

    features = {
        "ReachDiff": statsA["Reach"] - statsB["Reach"],
        "AgeDiff": statsA["Age"] - statsB["Age"],
        "HeightDiff": statsA["Height"] - statsB["Height"],
        "WeightDiff": statsA["Weight"] - statsB["Weight"],
        "KODiff": statsA["KO%"] - statsB["KO%"],
        "WinDiff": statsA["Wins"] - statsB["Wins"]
    }

    X_pred = pd.DataFrame([features])
    pred = model.predict(X_pred)[0]
    prob = model.predict_proba(X_pred)[0].max()

    winner = fighterA if pred == 1 else fighterB
    return winner, prob

In [7]:
winner, confidence = predict_fight("Mike Tyson", "Muhammad Ali", model, fighters_stats)
print(f"Predicted winner: {winner} (Confidence: {confidence:.2f})")

Predicted winner: Mike Tyson (Confidence: 0.83)
