In [5]:
# notebooks/model_evaluation.ipynb

import os
import sys
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    roc_curve, auc, precision_recall_curve, average_precision_score
)

# Setup paths and directories
NOTEBOOKS_DIR = os.path.dirname(os.path.abspath(''))
RESULTS_DIR = os.path.join(NOTEBOOKS_DIR, 'evaluation_results')
os.makedirs(RESULTS_DIR, exist_ok=True)

# Initialize results file
results_file = os.path.join(RESULTS_DIR, 'evaluation_summary.txt')
with open(results_file, 'w') as f:
    f.write("UFC Fight Predictor - Model Evaluation Report\n")
    f.write("="*60 + "\n\n")

def save_result(section_title, content):
    """Save results to text file with section formatting"""
    with open(results_file, 'a') as f:
        f.write(f"\n{section_title}\n")
        f.write("-"*len(section_title) + "\n\n")
        if isinstance(content, pd.DataFrame):
            f.write(content.to_string() + "\n\n")
        else:
            f.write(str(content) + "\n\n")

# Add parent directory to path to import modules
sys.path.append(os.path.abspath(os.path.join('..')))

# Import utility functions
from ufc_predictor.utils import get_data_path, preprocess_data, get_fighter_stats

# %% [markdown]
# ## 1. Load Model and Data

# %%
# Load the trained model
model_path = os.path.join(os.path.abspath('..'), 'models', 'ufc_predictor_v1.pkl')
model = joblib.load(model_path)

# Load and preprocess data
df = pd.read_csv(get_data_path())
df_processed = preprocess_data(df)

# Save dataset info
dataset_info = f"""
Dataset Information:
- Original shape: {df.shape}
- Processed shape: {df_processed.shape}
- Features: {len(df_processed.columns) - 1}
- Fights: {len(df_processed)}
- Time period: {df['Date'].min()} to {df['Date'].max()}
"""
save_result("DATASET INFORMATION", dataset_info)

# Separate features and target
X = df_processed.drop(columns=['Target'])
y = df_processed['Target']

# %% [markdown]
# ## 2. Make Predictions

# %%
# Generate predictions
y_pred = model.predict(X)
y_pred_proba = model.predict_proba(X)[:, 1]  # Probability of Red winning

# %% [markdown]
# ## 3. Basic Performance Metrics

# %%
# Calculate accuracy
accuracy = accuracy_score(y, y_pred)

# Classification report
class_report = classification_report(y, y_pred, target_names=['Blue Win', 'Red Win'])

# Confusion matrix
cm = confusion_matrix(y, y_pred)
cm_df = pd.DataFrame(cm,
                    index=['Actual Blue', 'Actual Red'],
                    columns=['Predicted Blue', 'Predicted Red'])

# Save results
save_result("ACCURACY", f"Overall Accuracy: {accuracy:.4f}")
save_result("CLASSIFICATION REPORT", class_report)
save_result("CONFUSION MATRIX", cm_df)

# %% [markdown]
# ## 4. Advanced Metrics

# %%
# ROC Curve and AUC
fpr, tpr, thresholds = roc_curve(y, y_pred_proba)
roc_auc = auc(fpr, tpr)

# Precision-Recall Curve
precision, recall, _ = precision_recall_curve(y, y_pred_proba)
avg_precision = average_precision_score(y, y_pred_proba)

# Save results
roc_info = f"""
ROC Curve Analysis:
- AUC Score: {roc_auc:.4f}
- Interpretation: {'Excellent (>0.9)' if roc_auc > 0.9 else
                   'Good (0.8-0.9)' if roc_auc > 0.8 else
                   'Fair (0.7-0.8)' if roc_auc > 0.7 else
                   'Poor (<0.7)'}
"""
pr_info = f"""
Precision-Recall Analysis:
- Average Precision: {avg_precision:.4f}
- Baseline: {len(y[y==1])/len(y):.4f} (class distribution)
"""
save_result("ROC CURVE ANALYSIS", roc_info)
save_result("PRECISION-RECALL ANALYSIS", pr_info)

# %% [markdown]
# ## 5. Feature Importance Analysis

# %%
feature_importances = ""
if hasattr(model.named_steps['classifier'], 'feature_importances_'):
    try:
        # Get feature names
        preprocessor = model.named_steps['preprocessor']
        feature_names = preprocessor.transformers_[0][2]

        # Get feature importances
        importances = model.named_steps['classifier'].feature_importances_

        # Create importance DataFrame
        importance_df = pd.DataFrame({
            'Feature': feature_names,
            'Importance': importances
        }).sort_values('Importance', ascending=False).head(10)

        feature_importances = importance_df
    except Exception as e:
        feature_importances = f"Error extracting feature importances: {str(e)}"
else:
    feature_importances = "Feature importances not available for this model type"

save_result("TOP 10 FEATURE IMPORTANCES", feature_importances)

# %% [markdown]
# ## 6. Prediction Distribution

# %%
# Analyze prediction distribution
prob_df = pd.DataFrame({
    'Predicted Probability': y_pred_proba,
    'Actual Outcome': y.map({0: 'Blue Win', 1: 'Red Win'})
})

# Calculate calibration
prob_df['Probability Bin'] = pd.cut(prob_df['Predicted Probability'],
                                    bins=np.arange(0, 1.1, 0.1),
                                    include_lowest=True)

calibration = prob_df.groupby('Probability Bin')['Actual Outcome'].value_counts(
    normalize=True).unstack().fillna(0)
calibration['Total Predictions'] = prob_df.groupby('Probability Bin').size()

save_result("PREDICTION DISTRIBUTION", prob_df['Predicted Probability'].describe())
save_result("CALIBRATION ANALYSIS", calibration)

# %% [markdown]
# ## 7. Performance Over Time

# %%
# Add fight date back for temporal analysis
df_full = pd.read_csv(get_data_path())
df_full['Date'] = pd.to_datetime(df_full['Date'])
df_full['Year'] = df_full['Date'].dt.year
df_full['Prediction'] = y_pred
df_full['Correct'] = (df_full['Winner'].map({'Red': 1, 'Blue': 0}) == df_full['Prediction']).astype(int)

# Calculate yearly accuracy
yearly_accuracy = df_full.groupby('Year')['Correct'].mean().reset_index()
yearly_accuracy.columns = ['Year', 'Accuracy']

# Calculate fight count per year
yearly_counts = df_full['Year'].value_counts().sort_index().reset_index()
yearly_counts.columns = ['Year', 'Fight Count']

# Combine results
yearly_performance = yearly_accuracy.merge(yearly_counts, on='Year')
save_result("YEARLY PERFORMANCE", yearly_performance)

# %% [markdown]
# ## 8. Historical Superfight Predictions

# %%
def predict_fight(red_fighter, blue_fighter):
    """Predict a fight between two fighters"""
    # Get fighter stats
    red_stats = get_fighter_stats(red_fighter)
    blue_stats = get_fighter_stats(blue_fighter)

    if not red_stats or not blue_stats:
        return None

    # Create input DataFrame
    input_data = pd.DataFrame([{
        'RedOdds': -150,
        'BlueOdds': 130,
        'WinStreakDif': red_stats.get('win_streak', 0) - blue_stats.get('win_streak', 0),
        'RedAge': red_stats.get('age', 30),
        'BlueAge': blue_stats.get('age', 30),
        'NumberOfRounds': 3,
        'TitleBout': 0,
        'RedHeightCms': red_stats.get('height', 180),
        'BlueHeightCms': blue_stats.get('height', 180),
        'RedReachCms': red_stats.get('reach', 180),
        'BlueReachCms': blue_stats.get('reach', 180),
        'RedStance': red_stats.get('stance', 'Orthodox'),
        'BlueStance': blue_stats.get('stance', 'Orthodox')
    }])

    # Create features
    input_data['OddsRatio'] = input_data['RedOdds'] / input_data['BlueOdds']
    input_data['HeightAdvRed'] = input_data['RedHeightCms'] - input_data['BlueHeightCms']
    input_data['ReachAdvRed'] = input_data['RedReachCms'] - input_data['BlueReachCms']
    input_data['SizeAdvRed'] = (input_data['HeightAdvRed'] + input_data['ReachAdvRed']) / 2
    input_data['StanceMatch'] = (input_data['RedStance'] == input_data['BlueStance']).astype(int)

    # Select features for model
    model_features = model.named_steps['preprocessor'].transformers_[0][2]
    model_input = input_data[model_features]

    # Make prediction
    prediction = model.predict(model_input)[0]
    prediction_proba = model.predict_proba(model_input)[0]

    return {
        'red_win_prob': prediction_proba[1],
        'blue_win_prob': prediction_proba[0],
        'predicted_winner': 'Red' if prediction == 1 else 'Blue'
    }

# Define historical superfights
superfights = [
    {"red": "Conor McGregor", "blue": "Khabib Nurmagomedov", "actual": "Blue"},
    {"red": "Jon Jones", "blue": "Daniel Cormier", "actual": "Red"},
    {"red": "Israel Adesanya", "blue": "Alex Pereira", "actual": "Blue"},
    {"red": "Ronda Rousey", "blue": "Holly Holm", "actual": "Blue"},
    {"red": "Georges St-Pierre", "blue": "Michael Bisping", "actual": "Red"},
    {"red": "Anderson Silva", "blue": "Chael Sonnen", "actual": "Red"},
    {"red": "Nate Diaz", "blue": "Conor McGregor", "actual": "Blue"},
    {"red": "Amanda Nunes", "blue": "Ronda Rousey", "actual": "Red"}
]

# Test superfights
results = []
for fight in superfights:
    result = predict_fight(fight["red"], fight["blue"])
    if result:
        correct = fight['actual'][0] == result['predicted_winner'][0]
        results.append({
            "Red Fighter": fight["red"],
            "Blue Fighter": fight["blue"],
            "Actual Winner": fight['actual'],
            "Predicted Winner": result['predicted_winner'],
            "Red Win Prob": f"{result['red_win_prob']*100:.1f}%",
            "Blue Win Prob": f"{result['blue_win_prob']*100:.1f}%",
            "Correct": correct
        })

# Create results table
results_df = pd.DataFrame(results)
superfight_accuracy = results_df['Correct'].mean()

# Save results
save_result("HISTORICAL SUPERFIGHT PREDICTIONS", results_df)
save_result("SUPERFIGHT ACCURACY",
           f"Accuracy on historical superfights: {superfight_accuracy*100:.1f}%")

# %% [markdown]
# ## 9. Betting Strategy Simulation

# %%
# Add actual odds to our evaluation data
df_full = pd.read_csv(get_data_path())
df_eval = df_processed.copy()
df_eval['RedOddsActual'] = df_full['RedOdds']
df_eval['BlueOddsActual'] = df_full['BlueOdds']
df_eval['PredictedWinProb'] = y_pred_proba

# Filter only fights where we have odds
df_eval = df_eval.dropna(subset=['RedOddsActual', 'BlueOddsActual'])

# Calculate implied probabilities from odds
def odds_to_prob(odds):
    if odds > 0:
        return 100 / (odds + 100)
    else:
        return -odds / (-odds + 100)

df_eval['RedImpliedProb'] = df_eval['RedOddsActual'].apply(odds_to_prob)
df_eval['BlueImpliedProb'] = df_eval['BlueOddsActual'].apply(odds_to_prob)

# Define betting strategy
def betting_strategy(row):
    """Value betting strategy"""
    if row['PredictedWinProb'] > row['RedImpliedProb'] + 0.05:
        return 'Red'
    elif (1 - row['PredictedWinProb']) > row['BlueImpliedProb'] + 0.05:
        return 'Blue'
    else:
        return 'No Bet'

df_eval['BetOn'] = df_eval.apply(betting_strategy, axis=1)

# Calculate returns
def calculate_return(row):
    if row['BetOn'] == 'No Bet':
        return 0
    elif row['BetOn'] == 'Red' and row['Target'] == 1:
        return (100 / row['RedImpliedProb']) - 100 if row['RedOddsActual'] < 0 else row['RedOddsActual']
    elif row['BetOn'] == 'Blue' and row['Target'] == 0:
        return (100 / row['BlueImpliedProb']) - 100 if row['BlueOddsActual'] < 0 else row['BlueOddsActual']
    else:
        return -100

df_eval['Return'] = df_eval.apply(calculate_return, axis=1)

# Analyze results
total_bets = len(df_eval[df_eval['BetOn'] != 'No Bet'])
total_stake = total_bets * 100
total_return = df_eval['Return'].sum()
roi = (total_return / total_stake) * 100 if total_bets > 0 else 0
win_percentage = len(df_eval[(df_eval['Return'] > 0) & (df_eval['BetOn'] != 'No Bet')]) / total_bets

# Save results
betting_results = f"""
BETTING STRATEGY RESULTS:
- Total fights with odds: {len(df_eval)}
- Bets placed: {total_bets}
- Win rate: {win_percentage*100:.1f}%
- Total return: ${total_return:.2f}
- Total stake: ${total_stake:.2f}
- ROI: {roi:.1f}%
"""
save_result("BETTING SIMULATION RESULTS", betting_results)

# %% [markdown]
# ## 10. Final Summary

# %%
# Create final summary
final_summary = f"""
FINAL MODEL EVALUATION SUMMARY:

1. MODEL PERFORMANCE:
   - Accuracy: {accuracy:.4f}
   - ROC AUC: {roc_auc:.4f}
   - Avg Precision: {avg_precision:.4f}
   - Superfight Accuracy: {superfight_accuracy:.4f}

2. BETTING PERFORMANCE:
   - ROI: {roi:.1f}%
   - Win Rate: {win_percentage*100:.1f}%
   - Total Return: ${total_return:.2f} (on ${total_stake:.2f} stake)

3. RECOMMENDATIONS:
   - {"✅ STRONG MODEL: Ready for deployment" if roi > 10 else
      "⚠️ MODERATE MODEL: Needs minor improvements" if roi > 0 else
      "❌ WEAK MODEL: Requires significant improvements"}
"""

# Save final summary
with open(os.path.join(RESULTS_DIR, 'FINAL_SUMMARY.txt'), 'w') as f:
    f.write(final_summary)

# Print completion message
print("✅ Evaluation complete!")
print(f"📝 Results saved to: {RESULTS_DIR}")
print(final_summary)

  calibration = prob_df.groupby('Probability Bin')['Actual Outcome'].value_counts(
  calibration['Total Predictions'] = prob_df.groupby('Probability Bin').size()


✅ Evaluation complete!
📝 Results saved to: /Users/sukhmandeep/PycharmProjects/ufc-fight-predictor/evaluation_results

FINAL MODEL EVALUATION SUMMARY:

1. MODEL PERFORMANCE:
   - Accuracy: 0.6578
   - ROC AUC: 0.7328
   - Avg Precision: 0.7919
   - Superfight Accuracy: 0.3750

2. BETTING PERFORMANCE:
   - ROI: 26.4%
   - Win Rate: 53.1%
   - Total Return: $48500.34 (on $183900.00 stake)

3. RECOMMENDATIONS:
   - ✅ STRONG MODEL: Ready for deployment

