In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

print("=== HORIZON EUROPE MODEL RESULTS ANALYSIS ===")

# 1. Colour Settings
SOFT_COLORS = ['#7B9DB8', '#A8C5B0', '#D4B896']
plt.style.use('default')
plt.rcParams.update({
    'font.size': 10,
    'axes.titlesize': 12,
    'axes.labelsize': 10,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 10,
    'figure.titlesize': 14
})

# 2. File Location
plots_dir = r"D:\\KU Leuven\\Stats\\MDA\\horizon-funding\\data\\plots"
if not os.path.exists(plots_dir):
    os.makedirs(plots_dir)
    print(f" Created directory: {plots_dir}")

# 3. Load the Models Results
try:
    rf_results = catalog.load("rf_test_metrics")
    xgb_results = catalog.load("xgb_test_metrics")
    logistic_results = catalog.load("logistic_test_metrics")
    
    results = {
        'Random Forest': rf_results,
        'XGBoost': xgb_results,
        'Logistic Regression': logistic_results
    }
    print(" All model results loaded successfully")
    
except Exception as e:
    print(f" Error loading results: {e}")

# 4. Show Performance Summary
print("\n=== MODEL PERFORMANCE SUMMARY ===")
summary_data = []
for model_name, model_results in results.items():
    summary_data.append({
        'Model': model_name,
        'Accuracy': f"{model_results['accuracy']:.4f}",
        'F1 Score': f"{model_results['f1']:.4f}",
        'Precision': f"{model_results['precision']:.4f}",
        'Recall': f"{model_results['recall']:.4f}"
    })

summary_df = pd.DataFrame(summary_data)
print(summary_df.to_string(index=False))

# 5. Generate performance comparison chart 
models = list(results.keys())
accuracies = [results[model]['accuracy'] for model in models]
f1_scores = [results[model]['f1'] for model in models]

fig, axes = plt.subplots(1, 2, figsize=(16, 8))
fig.suptitle('Model Performance Comparison - Horizon Europe', 
             fontsize=16, fontweight='bold', y=0.95) 

# Accuracy
bars1 = axes[0].bar(models, accuracies, color=SOFT_COLORS, alpha=0.8, 
                    edgecolor='white', linewidth=2)
axes[0].set_title('Accuracy Comparison', fontweight='bold', pad=20)  
axes[0].set_ylabel('Accuracy', fontweight='bold')
axes[0].set_ylim(0, 1.1)  
axes[0].grid(True, alpha=0.3)
axes[0].tick_params(axis='x', rotation=15)  

for bar, acc in zip(bars1, accuracies):
    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
                f'{acc:.3f}', ha='center', va='bottom', fontweight='bold')

# F1-Score
bars2 = axes[1].bar(models, f1_scores, color=SOFT_COLORS, alpha=0.8, 
                    edgecolor='white', linewidth=2)
axes[1].set_title('F1 Score Comparison', fontweight='bold', pad=20) 
axes[1].set_ylabel('F1 Score', fontweight='bold')
axes[1].set_ylim(0, 1.1) 
axes[1].grid(True, alpha=0.3)
axes[1].tick_params(axis='x', rotation=15) 

for bar, f1 in zip(bars2, f1_scores):
    axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
                f'{f1:.3f}', ha='center', va='bottom', fontweight='bold')

# Layout
plt.subplots_adjust(left=0.08, right=0.95, top=0.85, bottom=0.15, wspace=0.25)
plt.savefig(f"{plots_dir}/model_performance_simple.png", dpi=300, bbox_inches='tight')
print(f" Performance plot saved to: {plots_dir}/model_performance_simple.png")
plt.show()

# 6. Generate confusion matrix 
class_labels = ['Small\n(≤€2M)', 'Medium\n(€2-4M)', 'Large\n(>€4M)'] 


fig, axes = plt.subplots(1, 3, figsize=(20, 7))
fig.suptitle('Confusion Matrices', fontsize=16, fontweight='bold', y=0.95)

for i, (model_name, model_results) in enumerate(results.items()):
    if 'confusion_matrix' in model_results:
        cm = np.array(model_results['confusion_matrix'])
        
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                   xticklabels=class_labels, yticklabels=class_labels,
                   ax=axes[i], cbar_kws={'shrink': 0.8},
                   linewidths=1, linecolor='white',
                   annot_kws={'fontsize': 11, 'fontweight': 'bold'})
        
        axes[i].set_title(f'{model_name}', fontweight='bold', pad=20) 
        axes[i].set_xlabel('Predicted Class', fontweight='bold', labelpad=10)
        axes[i].set_ylabel('True Class', fontweight='bold', labelpad=10)
        
        
        axes[i].tick_params(axis='x', labelsize=9, rotation=0)
        axes[i].tick_params(axis='y', labelsize=9, rotation=0)


plt.subplots_adjust(left=0.06, right=0.98, top=0.85, bottom=0.15, wspace=0.3)
plt.savefig(f"{plots_dir}/confusion_matrices_simple.png", dpi=300, bbox_inches='tight')
print(f" Confusion matrices saved to: {plots_dir}/confusion_matrices_simple.png")
plt.show()

# 7. Select the best
best_model = max(results.items(), key=lambda x: x[1]['f1'])
print(f"\n BEST PERFORMING MODEL: {best_model[0]}")
print(f"   F1 Score: {best_model[1]['f1']:.4f}")
print(f"   Accuracy: {best_model[1]['accuracy']:.4f}")

print(f"\n All files saved to: {plots_dir}")
print(" Analysis complete!")