In [9]:
# UFC Fight Predictor
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

print("UFC FIGHT PREDICTION SYSTEM")
print("="*50)

UFC FIGHT PREDICTION SYSTEM


In [10]:
# Cell 1: Setup and Load Models
import pandas as pd
import numpy as np
import pickle
import os
from datetime import datetime

class UFCFightPredictor:
    def __init__(self):
        self.models = {}
        self.fighter_data = None
        self.load_fighter_data()
        self.load_models()
    
    def load_fighter_data(self):
        """Load current fighter profiles"""
        try:
            self.fighter_data = pd.read_csv("build/fighter_profiles_clean.csv")
            print(f"✓ Loaded {len(self.fighter_data)} fighter profiles")
        except FileNotFoundError:
            print("❌ fighter_profiles_clean.csv not found!")
            return
    
    def load_models(self):
        """Load all trained models"""
        model_files = {
            'logistic': 'models/logistic_regression_model.pkl',
            'decision_tree': 'models/decision_tree_model.pkl', 
            'random_forest': 'models/random_forest_model.pkl'
        }
        
        for name, filepath in model_files.items():
            try:
                with open(filepath, 'rb') as f:
                    self.models[name] = pickle.load(f)
                print(f"✓ Loaded {name} model")
            except FileNotFoundError:
                print(f"⚠️  {name} model not found")
    
    def find_fighter(self, name):
        """Find fighter by name with fuzzy matching"""
        name_lower = name.lower().strip()
        
        # Exact match
        exact = self.fighter_data[
            self.fighter_data['fighter_name'].str.lower() == name_lower
        ]
        if not exact.empty:
            return exact.iloc[0]
        
        # Partial match
        partial = self.fighter_data[
            self.fighter_data['fighter_name'].str.lower().str.contains(name_lower, na=False)
        ]
        
        if partial.empty:
            print(f"❌ Fighter '{name}' not found!")
            return None
        elif len(partial) > 1:
            print(f"Multiple matches for '{name}':")
            for _, fighter in partial.head(3).iterrows():
                print(f"  - {fighter['fighter_name']} (ELO: {fighter['current_elo_rating']:.0f})")
        
        return partial.iloc[0]
    
    def create_fight_features(self, fighter_a, fighter_b):
        """Create feature vector for a fight"""
        features = {}
        
        # Core features that both models use
        features['current_elo_rating_A'] = fighter_a['current_elo_rating']
        features['current_elo_rating_B'] = fighter_b['current_elo_rating']
        features['current_elo_rating_diff'] = fighter_a['current_elo_rating'] - fighter_b['current_elo_rating']
        
        features['age_A'] = fighter_a.get('age', 30)  # Default age if missing
        features['age_B'] = fighter_b.get('age', 30)
        features['age_diff'] = features['age_A'] - features['age_B']
        
        features['reach_inches_A'] = fighter_a.get('reach_inches', 72)  # Default reach
        features['reach_inches_B'] = fighter_b.get('reach_inches', 72)
        features['reach_inches_diff'] = features['reach_inches_A'] - features['reach_inches_B']
        
        features['height_inches_A'] = fighter_a.get('height_inches', 70)
        features['height_inches_B'] = fighter_b.get('height_inches', 70)
        features['height_inches_diff'] = features['height_inches_A'] - features['height_inches_B']
        
        features['win_rate_A'] = fighter_a['win_rate']
        features['win_rate_B'] = fighter_b['win_rate']
        features['win_rate_diff'] = fighter_a['win_rate'] - fighter_b['win_rate']
        
        features['total_fights_A'] = fighter_a['total_fights']
        features['total_fights_B'] = fighter_b['total_fights']
        features['total_wins_A'] = fighter_a['total_wins']
        features['total_wins_B'] = fighter_b['total_wins']
        features['total_losses_A'] = fighter_a['total_losses']
        features['total_losses_B'] = fighter_b['total_losses']
        
        # Missing value indicators
        features['has_age_A'] = 1 if pd.notna(fighter_a.get('age')) else 0
        features['has_age_B'] = 1 if pd.notna(fighter_b.get('age')) else 0
        features['has_reach_A'] = 1 if pd.notna(fighter_a.get('reach_inches')) else 0
        features['has_reach_B'] = 1 if pd.notna(fighter_b.get('reach_inches')) else 0
        
        return features
    
    def predict_with_model(self, features, model_name):
        """Make prediction with specific model"""
        if model_name not in self.models:
            return None
        
        model_data = self.models[model_name]
        model = model_data['model']
        model_features = model_data['features']
        
        # Create feature vector in correct order
        feature_vector = []
        for feature in model_features:
            feature_vector.append(features.get(feature, 0))
        
        X = np.array(feature_vector).reshape(1, -1)
        
        # Apply scaling if logistic regression
        if model_name == 'logistic' and 'scaler' in model_data and model_data['scaler']:
            X = model_data['scaler'].transform(X)
        
        # Get prediction
        prediction = model.predict(X)[0]
        try:
            prob_a_wins = model.predict_proba(X)[0][1]
        except:
            # Fallback for models without predict_proba
            prob_a_wins = 0.7 if prediction == 1 else 0.3
        
        return {
            'prediction': prediction,
            'prob_a_wins': prob_a_wins,
            'prob_b_wins': 1 - prob_a_wins
        }
    
    def predict_fight(self, fighter_a_name, fighter_b_name):
        """Predict fight outcome using all available models"""
        print(f"\n{'='*60}")
        print(f"PREDICTING: {fighter_a_name} vs {fighter_b_name}")
        print(f"{'='*60}")
        
        # Find fighters
        fighter_a = self.find_fighter(fighter_a_name)
        fighter_b = self.find_fighter(fighter_b_name)
        
        if fighter_a is None or fighter_b is None:
            return None
        
        print(f"✓ {fighter_a['fighter_name']} (ELO: {fighter_a['current_elo_rating']:.0f})")
        print(f"✓ {fighter_b['fighter_name']} (ELO: {fighter_b['current_elo_rating']:.0f})")
        
        # Create features
        features = self.create_fight_features(fighter_a, fighter_b)
        
        # Get predictions from all models
        predictions = {}
        for model_name in self.models:
            result = self.predict_with_model(features, model_name)
            if result:
                predictions[model_name] = result
        
        # Display results
        print(f"\n📊 PREDICTIONS:")
        print("-" * 40)
        
        ensemble_prob = 0
        model_count = 0
        
        for model_name, pred in predictions.items():
            winner = fighter_a['fighter_name'] if pred['prediction'] == 1 else fighter_b['fighter_name']
            confidence = max(pred['prob_a_wins'], pred['prob_b_wins'])
            
            print(f"{model_name.upper():15}: {winner}")
            print(f"{'':15}  Confidence: {confidence:.1%}")
            print(f"{'':15}  {fighter_a['fighter_name']}: {pred['prob_a_wins']:.1%}")
            print(f"{'':15}  {fighter_b['fighter_name']}: {pred['prob_b_wins']:.1%}")
            print()
            
            ensemble_prob += pred['prob_a_wins']
            model_count += 1
        
        # Ensemble prediction
        if model_count > 0:
            ensemble_prob /= model_count
            ensemble_winner = fighter_a['fighter_name'] if ensemble_prob > 0.5 else fighter_b['fighter_name']
            ensemble_confidence = max(ensemble_prob, 1-ensemble_prob)
            
            print("🏆 ENSEMBLE PREDICTION:")
            print(f"   Winner: {ensemble_winner}")
            print(f"   Confidence: {ensemble_confidence:.1%}")
            print(f"   {fighter_a['fighter_name']}: {ensemble_prob:.1%}")
            print(f"   {fighter_b['fighter_name']}: {1-ensemble_prob:.1%}")
        
        # Key factors
        print(f"\n🔍 KEY FACTORS:")
        elo_diff = features['current_elo_rating_diff']
        print(f"   ELO Advantage: {elo_diff:+.0f} (favors {fighter_a['fighter_name'] if elo_diff > 0 else fighter_b['fighter_name']})")
        print(f"   Age Difference: {features['age_diff']:+.1f} years")
        print(f"   Reach Advantage: {features['reach_inches_diff']:+.1f} inches")
        print(f"   Win Rate: {fighter_a['win_rate']:.1%} vs {fighter_b['win_rate']:.1%}")
        
        return {
            'fighter_a': fighter_a['fighter_name'],
            'fighter_b': fighter_b['fighter_name'],
            'predictions': predictions,
            'ensemble_prob_a': ensemble_prob if model_count > 0 else 0.5,
            'features': features
        }

# Initialize predictor
predictor = UFCFightPredictor()

✓ Loaded 2624 fighter profiles
⚠️  logistic model not found
⚠️  decision_tree model not found
⚠️  random_forest model not found


In [11]:
# Cell 2: Make Predictions

# Single fight prediction
result = predictor.predict_fight("Jon Jones", "Tom Aspinall")

# Multiple fights
upcoming_fights = [
    ("Islam Makhachev", "Arman Tsarukyan"),
    ("Alex Pereira", "Magomed Ankalaev"),
    ("Sean O'Malley", "Merab Dvalishvili")
]

for fighter_a, fighter_b in upcoming_fights:
    predictor.predict_fight(fighter_a, fighter_b)


PREDICTING: Jon Jones vs Tom Aspinall
✓ Jon Jones (ELO: 1738)
✓ Tom Aspinall (ELO: 1605)

📊 PREDICTIONS:
----------------------------------------

🔍 KEY FACTORS:
   ELO Advantage: +133 (favors Jon Jones)
   Age Difference: +6.0 years
   Reach Advantage: +6.0 inches
   Win Rate: 95.7% vs 88.9%

PREDICTING: Islam Makhachev vs Arman Tsarukyan
✓ Islam Makhachev (ELO: 1686)
✓ Arman Tsarukyan (ELO: 1605)

📊 PREDICTIONS:
----------------------------------------

🔍 KEY FACTORS:
   ELO Advantage: +81 (favors Islam Makhachev)
   Age Difference: +5.0 years
   Reach Advantage: -2.0 inches
   Win Rate: 93.8% vs 81.8%

PREDICTING: Alex Pereira vs Magomed Ankalaev
✓ Alex Pereira (ELO: 1616)
✓ Magomed Ankalaev (ELO: 1630)

📊 PREDICTIONS:
----------------------------------------

🔍 KEY FACTORS:
   ELO Advantage: -14 (favors Magomed Ankalaev)
   Age Difference: +5.0 years
   Reach Advantage: +4.0 inches
   Win Rate: 83.3% vs 84.6%

PREDICTING: Sean O'Malley vs Merab Dvalishvili
✓ Sean O'Malley (ELO: 15

In [12]:
# Cell 3: Batch Predictions and Analysis

def predict_full_card(predictor, fight_card, event_name="UFC Event"):
    """Predict an entire fight card"""
    print(f"\n{'='*70}")
    print(f"PREDICTING FULL CARD: {event_name}")
    print(f"{'='*70}")
    
    results = []
    for i, (fighter_a, fighter_b) in enumerate(fight_card, 1):
        print(f"\nFIGHT {i}:")
        result = predictor.predict_fight(fighter_a, fighter_b)
        if result:
            results.append(result)
    
    return results

# Example usage
ufc_310_card = [
    ("Belal Muhammad", "Shavkat Rakhmonov"),
    ("Ciryl Gane", "Alexander Volkov"),
    ("Bryce Mitchell", "Kron Gracie"),
    ("Anthony Smith", "Dominick Reyes")
]

card_results = predict_full_card(predictor, ufc_310_card, "UFC 310")


PREDICTING FULL CARD: UFC 310

FIGHT 1:

PREDICTING: Belal Muhammad vs Shavkat Rakhmonov
✓ Belal Muhammad (ELO: 1643)
✓ Shavkat Rakhmonov (ELO: 1605)

📊 PREDICTIONS:
----------------------------------------

🔍 KEY FACTORS:
   ELO Advantage: +38 (favors Belal Muhammad)
   Age Difference: +7.0 years
   Reach Advantage: -5.0 inches
   Win Rate: 78.9% vs 100.0%

FIGHT 2:

PREDICTING: Ciryl Gane vs Alexander Volkov
✓ Ciryl Gane (ELO: 1618)
✓ Alexander Volkov (ELO: 1599)

📊 PREDICTIONS:
----------------------------------------

🔍 KEY FACTORS:
   ELO Advantage: +19 (favors Ciryl Gane)
   Age Difference: -1.0 years
   Reach Advantage: +1.0 inches
   Win Rate: 83.3% vs 70.6%

FIGHT 3:

PREDICTING: Bryce Mitchell vs Kron Gracie
✓ Bryce Mitchell (ELO: 1552)
✓ Kron Gracie (ELO: 1478)

📊 PREDICTIONS:
----------------------------------------

🔍 KEY FACTORS:
   ELO Advantage: +74 (favors Bryce Mitchell)
   Age Difference: -7.0 years
   Reach Advantage: +0.0 inches
   Win Rate: 69.2% vs 25.0%

FIGHT 