In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys

# Set style for professional look
plt.style.use('ggplot')

# ======================================================
# 1. DATA INPUT (Final & Consistent Metrics from Tables)
# ======================================================

models = ["Linear\nReg.", "Ridge", "Lasso", "Random\nForest", "Neural\nNetwork", "XGBoost"]

# --- SCENARIO 1: NO GEO, VIBE (Original high-dimensional, low-R2 base) ---
r2_no_geo_vibe = {
    "Linear\nReg.": 0.54, "Ridge": 0.64, "Lasso": 0.65, 
    "Random\nForest": 0.64, "Neural\nNetwork": 0.67, "XGBoost": 0.82,
}
rmse_no_geo_vibe = {
    "Linear\nReg.": 231058, "Ridge": 204410, "Lasso": 201592, 
    "Random\nForest": 140409, "Neural\nNetwork": 151792, "XGBoost": 141009,
}

# --- SCENARIO 2: WITH GEO, NON-VIBE (Strong Geo Baseline) ---
r2_geo_baseline = {
    "Linear\nReg.": 0.78, "Ridge": 0.69, "Lasso": 0.72, 
    "Random\nForest": 0.81, "Neural\nNetwork": 0.78, "XGBoost": 0.81,
}
rmse_geo_baseline = {
    "Linear\nReg.": 161760, "Ridge": 189438, "Lasso": 178670, 
    "Random\nForest": 152033, "Neural\nNetwork": 124968, "XGBoost": 146499,
}

# --- SCENARIO 3: WITH GEO + VIBE (Final Enhanced Performance) ---
r2_geo_vibe = {
    "Linear\nReg.": 0.38, "Ridge": 0.70, "Lasso": 0.76, 
    "Random\nForest": 0.77, "Neural\nNetwork": 0.78, "XGBoost": 0.91, 
}
rmse_geo_vibe = {
    "Linear\nReg.": 176170, "Ridge": 172529, "Lasso": 156543, 
    "Random\nForest": 164019, "Neural\nNetwork": 123918, "XGBoost": 100608,
}

# ======================================================
# 2. Plotting Helper
# ======================================================

def compute_improvement(models, before_dict, after_dict, metric="r2"):
    """Calculates improvement (positive is always better: R2 gain or RMSE reduction)."""
    deltas = []
    for m in models:
        before = before_dict[m]
        after = after_dict[m]
        if metric == "r2":
            delta = after - before
        else:  # rmse
            delta = before - after
        deltas.append(delta)
    return np.array(deltas)

def create_metric_plot(models, deltas, title, ylabel, metric_type, filename):
    """Generates a single bar chart for either R2 or RMSE improvement/reduction."""
    
    fig, ax = plt.subplots(figsize=(10, 6))
    x = np.arange(len(models))
    
    # Define colors and annotation format based on metric type
    if metric_type == 'R2':
        colors = ['#4CAF50' if d > 0 else '#F44336' for d in deltas]
        annotation_format = lambda d: f"{d:+.3f}"
        annotation_offset = 0.005
    else: # RMSE
        colors = ['#2196F3' if d > 0 else '#F44336' for d in deltas]
        annotation_format = lambda d: f"${d/1000:+.1f}K"
        annotation_offset = 5000 
        
    ax.bar(x, deltas, color=colors)
    ax.set_title(title, fontsize=14)
    ax.set_ylabel(ylabel)
    ax.set_xticks(x)
    ax.set_xticklabels(models, rotation=0)
    ax.axhline(0, color='black', linewidth=0.8)
    ax.grid(axis='y', linestyle='--', alpha=0.5)

    for i, delta in enumerate(deltas):
        ax.text(
            x[i], 
            delta + (annotation_offset if delta > 0 else -annotation_offset), 
            annotation_format(delta), 
            ha='center', va='center', fontsize=10, 
            color='black'
        )

    plt.tight_layout()
    plt.savefig(filename)
    plt.close()

# ======================================================
# 3. EXECUTION: GENERATE FOUR SEPARATE CHARTS
# ======================================================

# --- Comparison A: Geo Baseline vs. Geo VIBE (Final VIBE Lift) ---

# 1A. R2 Improvement (Final Lift)
r2_final_lift = compute_improvement(models, r2_geo_baseline, r2_geo_vibe, metric="r2")
create_metric_plot(
    models, 
    r2_final_lift,
    "Final R² Change: Geo-Baseline vs. Geo-VIBE (Hypothesis Test)", 
    "Δ R² (Coefficient of Determination)",
    "R2",
    "chart_a_final_r2_lift.png"
)

# 1B. RMSE Reduction (Final Lift)
rmse_final_lift = compute_improvement(models, rmse_geo_baseline, rmse_geo_vibe, metric="rmse")
create_metric_plot(
    models, 
    rmse_final_lift,
    "Final RMSE Change: Geo-Baseline vs. Geo-VIBE", 
    "Δ RMSE (Dollar Reduction, ↑ is better)",
    "RMSE",
    "chart_b_final_rmse_lift.png"
)

# --- Comparison B: No Geo VIBE vs. Final Geo VIBE (Impact of Adding Geo to VIBE Model) ---
# This chart proves Geo is necessary even when VIBE features are present.

# 2A. R2 Improvement (Geo Effect on VIBE models)
r2_geo_effect_on_vibe = compute_improvement(models, r2_no_geo_vibe, r2_geo_vibe, metric="r2")
create_metric_plot(
    models, 
    r2_geo_effect_on_vibe,
    "R² Change: Raw VIBE Model vs. Geo-VIBE Model", 
    "Δ R² (Coefficient of Determination)",
    "R2",
    "chart_c_vibe_geo_effect_r2.png"
)

# 2B. RMSE Reduction (Geo Effect on VIBE models)
rmse_geo_effect_on_vibe = compute_improvement(models, rmse_no_geo_vibe, rmse_geo_vibe, metric="rmse")
create_metric_plot(
    models, 
    rmse_geo_effect_on_vibe,
    "RMSE Change: Raw VIBE Model vs. Geo-VIBE Model", 
    "Δ RMSE (Dollar Reduction, ↑ is better)",
    "RMSE",
    "chart_d_vibe_geo_effect_rmse.png"
)

print("Generated four comparison plots corresponding to your two required analyses.")

Generated four comparison plots corresponding to your two required analyses.
