In [2]:
import pandas as pd
import joblib
import os
import sys
import numpy as np
from ufc_predictor.utils import get_data_path, preprocess_data, get_fighter_stats

# Add parent directory to path
sys.path.append(os.path.abspath(os.path.join('..')))

# Load model
model_path = os.path.join(os.path.abspath('..'), 'models', 'ufc_predictor_v2.pkl')
model = joblib.load(model_path)

# Historical fight test cases - add more fights here
historical_fights = [
    # (Red Fighter, Blue Fighter, Date, Actual Winner, Weight Class)
    ("Islam Makhachev", "Dustin Poirier", "2024-06-01", "Red", "Lightweight"),
    ("Alex Pereira", "Jiří Procházka", "2023-11-11", "Red", "Light Heavyweight"),
    ("Leon Edwards", "Kamaru Usman", "2022-08-20", "Red", "Welterweight"),
    ("Charles Oliveira", "Justin Gaethje", "2022-05-07", "Red", "Lightweight"),
    ("Khabib Nurmagomedov", "Conor McGregor", "2018-10-06", "Red", "Lightweight"),
    ("Holly Holm", "Ronda Rousey", "2015-11-15", "Red", "Bantamweight"),
    ("Georges St-Pierre", "Michael Bisping", "2017-11-04", "Red", "Middleweight"),
    ("Anderson Silva", "Chael Sonnen", "2010-08-07", "Red", "Middleweight"),
    # Add controversial/upset fights
    ("Julianna Peña", "Amanda Nunes", "2021-12-11", "Red", "Bantamweight"),
    ("Matt Serra", "Georges St-Pierre", "2007-04-07", "Red", "Welterweight"),
    # Add same-fighter rematches
    ("Stipe Miocic", "Daniel Cormier", "2019-08-17", "Red", "Heavyweight"),
    ("Deiveson Figueiredo", "Brandon Moreno", "2021-06-12", "Blue", "Flyweight")
]

def predict_historical_fight(red, blue, date, weight_class):
    """Predict a historical fight with context"""
    try:
        # Get fighter stats
        red_stats = get_fighter_stats(red)
        blue_stats = get_fighter_stats(blue)

        if not red_stats or not blue_stats:
            return None

        # Weight class mapping
        weight_classes = {
            'Strawweight': 0, 'Flyweight': 1, 'Bantamweight': 2, 'Featherweight': 3,
            'Lightweight': 4, 'Welterweight': 5, 'Middleweight': 6, 'Light Heavyweight': 7,
            'Heavyweight': 8, 'Catch Weight': 4, 'Openweight': 4
        }

        # Create input DataFrame
        input_data = pd.DataFrame([{
            'RedOdds': -150,
            'BlueOdds': 130,
            'WinStreakDif': red_stats.get('win_streak', 0) - blue_stats.get('win_streak', 0),
            'RedAge': red_stats.get('age', 30),
            'BlueAge': blue_stats.get('age', 30),
            'NumberOfRounds': 5 if "Title" in weight_class else 3,
            'TitleBout': 1 if "Title" in weight_class else 0,
            'HeightAdvRed': red_stats.get('height', 180) - blue_stats.get('height', 180),
            'ReachAdvRed': red_stats.get('reach', 180) - blue_stats.get('reach', 180),
            'StanceMatch': 1 if red_stats.get('stance', 'Orthodox') == blue_stats.get('stance', 'Orthodox') else 0,
            'WeightClassAdvRed': weight_classes.get(weight_class.split()[0], 4) - weight_classes.get(weight_class.split()[0], 4),
            'ExpAdvRed': red_stats.get('total_fights', 0) - blue_stats.get('total_fights', 0),
            'GrappleAdvRed': (red_stats.get('avg_sub_att', 0) - blue_stats.get('avg_sub_att', 0)) +
                             (red_stats.get('avg_td_pct', 0) - blue_stats.get('avg_td_pct', 0))
        }])

        # Create derived features
        input_data['OddsRatio'] = input_data['RedOdds'] / input_data['BlueOdds']
        input_data['SizeAdvRed'] = (input_data['HeightAdvRed'] + input_data['ReachAdvRed']) / 2

        # Select features for model
        model_features = [
            'RedOdds', 'BlueOdds', 'OddsRatio', 'WinStreakDif',
            'HeightAdvRed', 'ReachAdvRed', 'SizeAdvRed', 'StanceMatch',
            'RedAge', 'BlueAge', 'NumberOfRounds', 'TitleBout',
            'WeightClassAdvRed', 'ExpAdvRed', 'GrappleAdvRed'
        ]
        model_input = input_data[model_features]

        # Make prediction
        prediction = model.predict(model_input)[0]
        prediction_proba = model.predict_proba(model_input)[0]

        return {
            'red_win_prob': prediction_proba[1],
            'blue_win_prob': prediction_proba[0],
            'predicted_winner': 'Red' if prediction == 1 else 'Blue'
        }

    except Exception as e:
        print(f"Error predicting {red} vs {blue}: {str(e)}")
        return None

# Test all historical fights
results = []
for fight in historical_fights:
    red, blue, date, actual_winner, weight_class = fight
    prediction = predict_historical_fight(red, blue, date, weight_class)
    if prediction:
        correct = 1 if prediction['predicted_winner'][0] == actual_winner[0] else 0
        results.append({
            'Red': red,
            'Blue': blue,
            'Date': date,
            'WeightClass': weight_class,
            'Actual': actual_winner,
            'Predicted': prediction['predicted_winner'],
            'Red Prob': f"{prediction['red_win_prob']*100:.1f}%",
            'Blue Prob': f"{prediction['blue_win_prob']*100:.1f}%",
            'Correct': '✓' if correct else '✗'
        })

# Create results table
results_df = pd.DataFrame(results)
print("\nHistorical Fight Test Results:")
display(results_df)

# Calculate accuracy
accuracy = results_df['Correct'].apply(lambda x: 1 if x == '✓' else 0).mean()
print(f"\nOverall Accuracy: {accuracy*100:.1f}%")

# Save problematic fights for analysis
problem_fights = results_df[results_df['Correct'] == '✗']
problem_fights.to_csv('../notebooks/problem_fights.csv', index=False)


Historical Fight Test Results:


Unnamed: 0,Red,Blue,Date,WeightClass,Actual,Predicted,Red Prob,Blue Prob,Correct
0,Islam Makhachev,Dustin Poirier,2024-06-01,Lightweight,Red,Red,56.6%,43.4%,✓
1,Leon Edwards,Kamaru Usman,2022-08-20,Welterweight,Red,Blue,49.4%,50.6%,✗
2,Charles Oliveira,Justin Gaethje,2022-05-07,Lightweight,Red,Red,63.8%,36.2%,✓
3,Khabib Nurmagomedov,Conor McGregor,2018-10-06,Lightweight,Red,Red,52.1%,47.9%,✓
4,Holly Holm,Ronda Rousey,2015-11-15,Bantamweight,Red,Blue,41.6%,58.4%,✗
5,Georges St-Pierre,Michael Bisping,2017-11-04,Middleweight,Red,Red,58.1%,41.9%,✓
6,Anderson Silva,Chael Sonnen,2010-08-07,Middleweight,Red,Red,61.9%,38.1%,✓
7,Matt Serra,Georges St-Pierre,2007-04-07,Welterweight,Red,Red,61.0%,39.0%,✓
8,Stipe Miocic,Daniel Cormier,2019-08-17,Heavyweight,Red,Red,66.3%,33.7%,✓
9,Deiveson Figueiredo,Brandon Moreno,2021-06-12,Flyweight,Blue,Blue,44.1%,55.9%,✓



Overall Accuracy: 80.0%
