# Model Comparison: XGBoost vs Logistic Regression

This notebook creates a comprehensive comparison figure between the best performing XGBoost model and baseline Logistic Regression for insufficient pain management prediction.

In [None]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    roc_auc_score, roc_curve, precision_recall_curve, auc,
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report
)
import xgboost as xgb
import warnings
warnings.filterwarnings('ignore')

# Add the parent directory to the path
sys.path.append('/Users/jk1/icu_research/PreHosp')

from analgesia.prediction_of_insufficient_pain_management.data_preprocessing import load_and_preprocess_data

# Set style for publication-quality figures
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.dpi'] = 300
plt.rcParams['font.size'] = 10
plt.rcParams['axes.titlesize'] = 12
plt.rcParams['axes.labelsize'] = 10
plt.rcParams['xtick.labelsize'] = 9
plt.rcParams['ytick.labelsize'] = 9
plt.rcParams['legend.fontsize'] = 9

In [None]:
# Load and prepare the data
print("📊 Loading and preprocessing data for model comparison...")
data_path = '/Users/jk1/Library/CloudStorage/OneDrive-unige.ch/icu_research/prehospital/analgesia/data/trauma_categories_Rega Pain Study15.09.2025_v2.xlsx'
processed_data, processor = load_and_preprocess_data(data_path)

# Get the modeling data splits
X_train, X_test, y_train, y_test = processor.prepare_modeling_data()

print(f"✅ Data loaded:")
print(f"   Training set: {X_train.shape[0]} samples")
print(f"   Test set: {X_test.shape[0]} samples")
print(f"   Features: {X_train.shape[1]}")
print(f"   Target balance - Training: {y_train.mean():.1%}, Test: {y_test.mean():.1%}")

In [None]:
X_train

In [None]:
# Train XGBoost model (using parameters from previous testing)
print("🚀 Training XGBoost model...")
xgb_model = xgb.XGBClassifier(
    random_state=42,
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric='logloss'
)

xgb_model.fit(X_train, y_train)

# Get predictions
xgb_pred = xgb_model.predict(X_test)
xgb_pred_proba = xgb_model.predict_proba(X_test)[:, 1]

print("✅ XGBoost model trained")

In [None]:
# Train Logistic Regression model
print("📈 Training Logistic Regression model...")
lr_model = LogisticRegression(
    random_state=42,
    max_iter=1000,
    class_weight='balanced'  # Handle class imbalance
)

lr_model.fit(X_train, y_train)

# Get predictions
lr_pred = lr_model.predict(X_test)
lr_pred_proba = lr_model.predict_proba(X_test)[:, 1]

print("✅ Logistic Regression model trained")

In [None]:
# Calculate performance metrics for both models
def calculate_metrics(y_true, y_pred, y_pred_proba, model_name):
    """Calculate comprehensive performance metrics."""
    metrics = {
        'Model': model_name,
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred),
        'Recall': recall_score(y_true, y_pred),
        'F1-Score': f1_score(y_true, y_pred),
        'ROC-AUC': roc_auc_score(y_true, y_pred_proba)
    }
    
    # Calculate PR-AUC
    precision_vals, recall_vals, _ = precision_recall_curve(y_true, y_pred_proba)
    metrics['PR-AUC'] = auc(recall_vals, precision_vals)
    
    return metrics

# Calculate metrics for both models
xgb_metrics = calculate_metrics(y_test, xgb_pred, xgb_pred_proba, 'XGBoost')
lr_metrics = calculate_metrics(y_test, lr_pred, lr_pred_proba, 'Logistic Regression')

# Create comparison DataFrame
comparison_df = pd.DataFrame([xgb_metrics, lr_metrics])

print("📊 Model Performance Comparison:")
print("=" * 60)
for col in ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC', 'PR-AUC']:
    print(f"{col:12} | XGBoost: {xgb_metrics[col]:.3f} | LogReg: {lr_metrics[col]:.3f}")
    
print("\n✅ Performance metrics calculated")

In [None]:
# Create comprehensive comparison figure
fig = plt.figure(figsize=(16, 12))

# Define colors for consistency
xgb_color = '#2E86AB'  # Blue
lr_color = '#A23B72'   # Magenta

# 1. ROC Curves (top left)
ax1 = plt.subplot(2, 3, 1)
# XGBoost ROC
fpr_xgb, tpr_xgb, _ = roc_curve(y_test, xgb_pred_proba)
plt.plot(fpr_xgb, tpr_xgb, color=xgb_color, linewidth=2, 
         label=f'XGBoost (AUC = {xgb_metrics["ROC-AUC"]:.3f})')

# Logistic Regression ROC
fpr_lr, tpr_lr, _ = roc_curve(y_test, lr_pred_proba)
plt.plot(fpr_lr, tpr_lr, color=lr_color, linewidth=2,
         label=f'Logistic Regression (AUC = {lr_metrics["ROC-AUC"]:.3f})')

# Reference line
plt.plot([0, 1], [0, 1], 'k--', alpha=0.5, linewidth=1)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves')
plt.legend()
plt.grid(True, alpha=0.3)

# 2. Precision-Recall Curves (top middle)
ax2 = plt.subplot(2, 3, 2)
# XGBoost PR
precision_xgb, recall_xgb, _ = precision_recall_curve(y_test, xgb_pred_proba)
plt.plot(recall_xgb, precision_xgb, color=xgb_color, linewidth=2,
         label=f'XGBoost (AUC = {xgb_metrics["PR-AUC"]:.3f})')

# Logistic Regression PR
precision_lr, recall_lr, _ = precision_recall_curve(y_test, lr_pred_proba)
plt.plot(recall_lr, precision_lr, color=lr_color, linewidth=2,
         label=f'Logistic Regression (AUC = {lr_metrics["PR-AUC"]:.3f})')

# Baseline (proportion of positive class)
baseline = y_test.mean()
plt.axhline(y=baseline, color='k', linestyle='--', alpha=0.5, 
           label=f'Baseline ({baseline:.3f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curves')
plt.legend()
plt.grid(True, alpha=0.3)

# 3. Performance Metrics Bar Chart (top right)
ax3 = plt.subplot(2, 3, 3)
metrics_to_plot = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
x_pos = np.arange(len(metrics_to_plot))
width = 0.35

xgb_values = [xgb_metrics[m] for m in metrics_to_plot]
lr_values = [lr_metrics[m] for m in metrics_to_plot]

bars1 = plt.bar(x_pos - width/2, xgb_values, width, label='XGBoost', 
                color=xgb_color, alpha=0.8)
bars2 = plt.bar(x_pos + width/2, lr_values, width, label='Logistic Regression',
                color=lr_color, alpha=0.8)

# Add value labels on bars
for i, (bar1, bar2) in enumerate(zip(bars1, bars2)):
    plt.text(bar1.get_x() + bar1.get_width()/2, bar1.get_height() + 0.01,
             f'{xgb_values[i]:.3f}', ha='center', va='bottom', fontsize=8)
    plt.text(bar2.get_x() + bar2.get_width()/2, bar2.get_height() + 0.01,
             f'{lr_values[i]:.3f}', ha='center', va='bottom', fontsize=8)

plt.xlabel('Metrics')
plt.ylabel('Score')
plt.title('Performance Metrics Comparison')
plt.xticks(x_pos, metrics_to_plot, rotation=45)
plt.legend()
plt.ylim(0, 1.1)
plt.grid(True, alpha=0.3, axis='y')

# 4. Confusion Matrices (bottom left and middle)
ax4 = plt.subplot(2, 3, 4)
cm_xgb = confusion_matrix(y_test, xgb_pred)
sns.heatmap(cm_xgb, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Adequate', 'Insufficient'],
            yticklabels=['Adequate', 'Insufficient'])
plt.title('XGBoost Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')

ax5 = plt.subplot(2, 3, 5)
cm_lr = confusion_matrix(y_test, lr_pred)
sns.heatmap(cm_lr, annot=True, fmt='d', cmap='Reds',
            xticklabels=['Adequate', 'Insufficient'],
            yticklabels=['Adequate', 'Insufficient'])
plt.title('Logistic Regression Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')

# 6. Model Summary Table (bottom right)
ax6 = plt.subplot(2, 3, 6)
ax6.axis('tight')
ax6.axis('off')

# Create summary table
summary_data = [
    ['Metric', 'XGBoost', 'Logistic Regression', 'Difference'],
    ['Accuracy', f'{xgb_metrics["Accuracy"]:.3f}', f'{lr_metrics["Accuracy"]:.3f}', 
     f'{xgb_metrics["Accuracy"] - lr_metrics["Accuracy"]:+.3f}'],
    ['ROC-AUC', f'{xgb_metrics["ROC-AUC"]:.3f}', f'{lr_metrics["ROC-AUC"]:.3f}',
     f'{xgb_metrics["ROC-AUC"] - lr_metrics["ROC-AUC"]:+.3f}'],
    ['PR-AUC', f'{xgb_metrics["PR-AUC"]:.3f}', f'{lr_metrics["PR-AUC"]:.3f}',
     f'{xgb_metrics["PR-AUC"] - lr_metrics["PR-AUC"]:+.3f}'],
    ['F1-Score', f'{xgb_metrics["F1-Score"]:.3f}', f'{lr_metrics["F1-Score"]:.3f}',
     f'{xgb_metrics["F1-Score"] - lr_metrics["F1-Score"]:+.3f}'],
    ['Precision', f'{xgb_metrics["Precision"]:.3f}', f'{lr_metrics["Precision"]:.3f}',
     f'{xgb_metrics["Precision"] - lr_metrics["Precision"]:+.3f}'],
    ['Recall', f'{xgb_metrics["Recall"]:.3f}', f'{lr_metrics["Recall"]:.3f}',
     f'{xgb_metrics["Recall"] - lr_metrics["Recall"]:+.3f}']
]

table = ax6.table(cellText=summary_data[1:], colLabels=summary_data[0],
                  cellLoc='center', loc='center')
table.auto_set_font_size(False)
table.set_fontsize(9)
table.scale(1.2, 1.5)

# Color code the difference column
for i in range(1, len(summary_data)):
    diff_val = float(summary_data[i][3])
    if diff_val > 0:
        table[(i, 3)].set_facecolor('#E8F5E8')  # Light green for positive
    else:
        table[(i, 3)].set_facecolor('#FFE8E8')  # Light red for negative

plt.title('Performance Summary', pad=20)

plt.tight_layout()
plt.show()

print("✅ Comprehensive model comparison figure created")

In [None]:
# Save the figure
output_path = "/Users/jk1/icu_research/PreHosp/analgesia/prediction_of_insufficient_pain_management/model_comparison_xgboost_vs_logistic.png"
plt.savefig(output_path, dpi=300, bbox_inches='tight', facecolor='white')
print(f"💾 Figure saved to: {output_path}")

# Also save as PDF for publication
pdf_path = output_path.replace('.png', '.pdf')
plt.savefig(pdf_path, bbox_inches='tight', facecolor='white')
print(f"📄 PDF version saved to: {pdf_path}")

In [None]:
# Print detailed comparison summary
print("\n" + "="*80)
print("🏆 DETAILED MODEL COMPARISON SUMMARY")
print("="*80)

print("\n📊 Dataset Information:")
print(f"   • Training samples: {X_train.shape[0]:,}")
print(f"   • Test samples: {X_test.shape[0]:,}")
print(f"   • Features: {X_train.shape[1]}")
print(f"   • Insufficient pain management rate: {y_test.mean():.1%}")

print("\n🔍 Key Performance Differences:")
print(f"   • Accuracy improvement: {(xgb_metrics['Accuracy'] - lr_metrics['Accuracy'])*100:+.1f} percentage points")
print(f"   • ROC-AUC improvement: {(xgb_metrics['ROC-AUC'] - lr_metrics['ROC-AUC'])*100:+.1f} percentage points")
print(f"   • PR-AUC improvement: {(xgb_metrics['PR-AUC'] - lr_metrics['PR-AUC'])*100:+.1f} percentage points")
print(f"   • F1-Score improvement: {(xgb_metrics['F1-Score'] - lr_metrics['F1-Score'])*100:+.1f} percentage points")

print("\n⚖️ Trade-offs Analysis:")
print(f"   • XGBoost Precision: {xgb_metrics['Precision']:.3f} vs LogReg: {lr_metrics['Precision']:.3f}")
print(f"   • XGBoost Recall: {xgb_metrics['Recall']:.3f} vs LogReg: {lr_metrics['Recall']:.3f}")

# Determine which model is better overall
xgb_better = sum([
    xgb_metrics['Accuracy'] > lr_metrics['Accuracy'],
    xgb_metrics['ROC-AUC'] > lr_metrics['ROC-AUC'],
    xgb_metrics['PR-AUC'] > lr_metrics['PR-AUC'],
    xgb_metrics['F1-Score'] > lr_metrics['F1-Score']
])

print(f"\n🏅 Overall Winner: {'XGBoost' if xgb_better >= 3 else 'Logistic Regression'}")
print(f"   • XGBoost wins in {xgb_better}/4 key metrics")

print("\n📈 Clinical Interpretation:")
if xgb_metrics['ROC-AUC'] > lr_metrics['ROC-AUC']:
    print(f"   • XGBoost shows better discriminative ability (ROC-AUC: {xgb_metrics['ROC-AUC']:.3f})")
if xgb_metrics['PR-AUC'] > lr_metrics['PR-AUC']:
    print(f"   • XGBoost is better at identifying insufficient pain management cases")
if xgb_metrics['Recall'] > lr_metrics['Recall']:
    print(f"   • XGBoost catches more true insufficient pain management cases")
else:
    print(f"   • Logistic Regression has higher sensitivity ({lr_metrics['Recall']:.3f})")

print("\n✅ Model comparison analysis completed!")

In [None]:
# Save detailed metrics to CSV for further analysis
comparison_df_detailed = pd.DataFrame([xgb_metrics, lr_metrics])
csv_path = "/Users/jk1/icu_research/PreHosp/analgesia/prediction_of_insufficient_pain_management/model_comparison_metrics.csv"
comparison_df_detailed.to_csv(csv_path, index=False)
print(f"💾 Detailed metrics saved to: {csv_path}")

# Display the comparison table
print("\n📋 DETAILED METRICS TABLE:")
print("=" * 50)
display(comparison_df_detailed.round(4))