XGBOOST CODE

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from xgboost import XGBRegressor

# ==========================================
# 0. CONFIG
# ==========================================
INPUT_CSV = "final_ml_features_normalized - final_ml_features_normalized.csv"
TARGET_COL = "Median_Housing_Value_2023"
RANDOM_STATE = 1738
TEST_SIZE = 0.2
N_SPLITS = 5

# ==========================================
# 1. LOAD DATA + BASIC CLEANUP
# ==========================================
df = pd.read_csv(INPUT_CSV)
print("Original shape:", df.shape)

# Drop rows with missing target
df = df.dropna(subset=[TARGET_COL])
print("After dropping missing target:", df.shape)

# Build city/state from Merge_Key if needed
if "state" not in df.columns or "city" not in df.columns:
    if "Merge_Key" in df.columns:
        city_state = df["Merge_Key"].astype(str).str.split(",", n=1, expand=True)
        df["city"] = city_state[0].str.strip()
        df["state"] = city_state[1].str.strip()
        print("Derived 'city' and 'state' from Merge_Key.")
    else:
        raise ValueError("Need 'state' or 'Merge_Key' to create geographic features.")

# ==========================================
# 2. ADD REGION COLUMN (GEOGRAPHIC EMBEDDING)
# ==========================================
state_to_region = {
    # Northeast
    "ME": "Northeast", "NH": "Northeast", "VT": "Northeast", "MA": "Northeast",
    "RI": "Northeast", "CT": "Northeast", "NY": "Northeast", "NJ": "Northeast",
    "PA": "Northeast",
    # Midwest
    "OH": "Midwest", "IN": "Midwest", "IL": "Midwest", "MI": "Midwest",
    "WI": "Midwest", "MN": "Midwest", "IA": "Midwest", "MO": "Midwest",
    "ND": "Midwest", "SD": "Midwest", "NE": "Midwest", "KS": "Midwest",
    # South
    "DE": "South", "MD": "South", "DC": "South", "VA": "South", "WV": "South",
    "NC": "South", "SC": "South", "GA": "South", "FL": "South",
    "KY": "South", "TN": "South", "MS": "South", "AL": "South",
    "OK": "South", "TX": "South", "AR": "South", "LA": "South",
    # West
    "MT": "West", "ID": "West", "WY": "West", "CO": "West", "NM": "West",
    "AZ": "West", "UT": "West", "NV": "West", "WA": "West", "OR": "West",
    "CA": "West", "AK": "West", "HI": "West",
}
df["region"] = df["state"].map(state_to_region).fillna("Other")

print("\nSample of city/state/region:")
print(df[["city", "state", "region"]].head())

# ==========================================
# 3. STRICT 2021‚Äì2022 BASELINE FEATURES
#    (NO MEDIAN VALUES, NO 2023 COLUMNS)
# ==========================================
baseline_21_22_raw = [
    "Total_Population_2021", "Median_Household_Income_2021",
    #"Median_Housing_Value_2021",  # EXCLUDED
    "Owner_Occupied_Units_2021",
    "Bachelors_Degree_Count_2021", "Masters_Degree_Count_2021",
    "Unemployed_Count_2021", "Unemployment_Rate_2021",
    "Bachelors_Or_Higher_Rate_2021",

    "Total_Population_2022", "Median_Household_Income_2022",
    # "Median_Housing_Value_2022",  # EXCLUDED
    "Owner_Occupied_Units_2022",
    "Bachelors_Degree_Count_2022", "Masters_Degree_Count_2022",
    "Unemployed_Count_2022", "Unemployment_Rate_2022",
    "Bachelors_Or_Higher_Rate_2022",
]

# Keep only existing columns and enforce "no 2023" + "no housing values"
baseline_21_22 = []
for c in baseline_21_22_raw:
    if c not in df.columns:
        continue
    if "2023" in c:
        continue
    if "Median_Housing_Value" in c:
        continue
    baseline_21_22.append(c)

print("\nStrict baseline (2021‚Äì22 only, no medians, no 2023):")
print(baseline_21_22)

# ==========================================
# 4. IDENTIFY VIBRANCY FEATURES
#    (numeric, not baseline, not target, not 2023, not housing values, no events)
# ==========================================
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()

# Start exclusion list with baseline + target
exclude_from_vibrancy = set(baseline_21_22 + [TARGET_COL])

# Exclude any numeric col with 2023 in its name or any housing value column
for col in numeric_cols:
    if "2023" in col:
        exclude_from_vibrancy.add(col)
    if "Median_Housing_Value" in col:
        exclude_from_vibrancy.add(col)

# Make sure geo/id columns are excluded if numeric (just in case)
id_and_geo = ["city", "state", "region", "Merge_Key"]
for c in id_and_geo:
    if c in numeric_cols:
        exclude_from_vibrancy.add(c)

candidate_vibrancy = [c for c in numeric_cols if c not in exclude_from_vibrancy]

# Drop event-based features
candidate_vibrancy = [c for c in candidate_vibrancy if "_events" not in c]

# Treat fortune & sbs as vibrancy if present
for special in ["fortune", "sbs"]:
    if special in df.columns and special not in candidate_vibrancy:
        candidate_vibrancy.append(special)

vibrancy_cols = candidate_vibrancy

print("\nNumber of vibrancy features (no events, no 2023, no medians):", len(vibrancy_cols))
print("First 20 vibrancy cols:", vibrancy_cols[:20])

# Final sanity checks
all_features_for_check = baseline_21_22 + vibrancy_cols
assert not any("2023" in c for c in all_features_for_check), "Found 2023 column in feature set!"
assert not any("Median_Housing_Value" in c for c in all_features_for_check), "Found housing value column in feature set!"

# Geographic categorical columns
geo_cols = ["state", "region"]

# ==========================================
# 5. BUILD FEATURE MATRICES FOR MODELS
# ==========================================
feature_cols_A = baseline_21_22 + geo_cols              # no vibrancy
feature_cols_B = baseline_21_22 + vibrancy_cols + geo_cols  # with vibrancy

feature_cols_A = [c for c in feature_cols_A if c in df.columns]
feature_cols_B = [c for c in feature_cols_B if c in df.columns]

print("\nFeature counts:")
print("Model A (no vibrancy):", len(feature_cols_A))
print("Model B (with vibrancy):", len(feature_cols_B))

X_A = df[feature_cols_A].copy()
X_B = df[feature_cols_B].copy()
y = df[TARGET_COL].values

indices = np.arange(len(df))
train_idx, test_idx = train_test_split(
    indices, test_size=TEST_SIZE, random_state=RANDOM_STATE
)

X_A_train, X_A_test = X_A.iloc[train_idx], X_A.iloc[test_idx]
X_B_train, X_B_test = X_B.iloc[train_idx], X_B.iloc[test_idx]
y_train, y_test = y[train_idx], y[test_idx]

print("\nTrain/Test sizes:")
print("X_A:", X_A_train.shape, X_A_test.shape)
print("X_B:", X_B_train.shape, X_B_test.shape)

# ==========================================
# 6. ROBUST ONE-HOT ENCODER
# ==========================================
def make_ohe():
    try:
        return OneHotEncoder(handle_unknown="ignore", sparse_output=False)
    except TypeError:
        return OneHotEncoder(handle_unknown="ignore", sparse=False)

# ==========================================
# 7. BUILD XGBOOST PIPELINE
# ==========================================
def build_xgb_strict(baseline_cols, vibrancy_cols, geo_cols, use_vibrancy=True):
    transformers = []

    numeric_baseline = [c for c in baseline_cols if c in df.columns]
    if numeric_baseline:
        transformers.append(
            ("baseline",
             Pipeline(steps=[
                 ("imputer", SimpleImputer(strategy="median"))
             ]),
             numeric_baseline)
        )

    numeric_vibrancy = [c for c in vibrancy_cols if c in df.columns] if use_vibrancy else []
    if use_vibrancy and numeric_vibrancy:
        transformers.append(
            ("vibrancy",
             Pipeline(steps=[
                 ("imputer", SimpleImputer(strategy="median"))
             ]),
             numeric_vibrancy)
        )

    geo = [g for g in geo_cols if g in df.columns]
    if geo:
        transformers.append(
            ("geo",
             make_ohe(),
             geo)
        )

    preprocessor = ColumnTransformer(
        transformers=transformers,
        remainder="drop"
    )

    model = XGBRegressor(
        n_estimators=500,
        max_depth=4,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        reg_lambda=1.0,
        objective="reg:squarederror",
        random_state=RANDOM_STATE,
        n_jobs=-1
    )

    pipe = Pipeline(steps=[
        ("pre", preprocessor),
        ("model", model)
    ])
    return pipe

pipe_A = build_xgb_strict(baseline_21_22, [], geo_cols, use_vibrancy=False)
pipe_B = build_xgb_strict(baseline_21_22, vibrancy_cols, geo_cols, use_vibrancy=True)

# ==========================================
# 8. HOLDOUT EVAL
# ==========================================
def eval_holdout(name, pipe, X_tr, X_te, y_tr, y_te):
    print(f"\nüß† Training {name} (holdout) ...")
    pipe.fit(X_tr, y_tr)

    pred_tr = pipe.predict(X_tr)
    pred_te = pipe.predict(X_te)

    rmse_tr = mean_squared_error(y_tr, pred_tr) ** 0.5
    rmse_te = mean_squared_error(y_te, pred_te) ** 0.5
    mae_tr = mean_absolute_error(y_tr, pred_tr)
    mae_te = mean_absolute_error(y_te, pred_te)
    r2_tr = r2_score(y_tr, pred_tr)
    r2_te = r2_score(y_te, pred_te)

    print(f"  Train -> RMSE: {rmse_tr:,.2f}, MAE: {mae_tr:,.2f}, R¬≤: {r2_tr:.3f}")
    print(f"  Test  -> RMSE: {rmse_te:,.2f}, MAE: {mae_te:,.2f}, R¬≤: {r2_te:.3f}")

    return {
        "model": name,
        "rmse_train": rmse_tr,
        "rmse_test": rmse_te,
        "mae_train": mae_tr,
        "mae_test": mae_te,
        "r2_train": r2_tr,
        "r2_test": r2_te,
        "fitted_pipeline": pipe
    }

# ==========================================
# 9. K-FOLD CV EVAL
# ==========================================
def eval_cv(name, pipe, X, y, n_splits=N_SPLITS):
    print(f"\nüîÅ {name} ‚Äì {n_splits}-fold cross-validation ...")
    cv = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_STATE)

    mse_scores = cross_val_score(pipe, X, y, cv=cv,
                                 scoring="neg_mean_squared_error",
                                 n_jobs=-1)
    rmse_scores = np.sqrt(-mse_scores)

    mae_scores = cross_val_score(pipe, X, y, cv=cv,
                                 scoring="neg_mean_absolute_error",
                                 n_jobs=-1)
    mae_scores = -mae_scores

    r2_scores = cross_val_score(pipe, X, y, cv=cv,
                                scoring="r2",
                                n_jobs=-1)

    print(f"  CV RMSE: mean={rmse_scores.mean():,.2f}, std={rmse_scores.std():.2f}")
    print(f"  CV MAE : mean={mae_scores.mean():,.2f}, std={mae_scores.std():.2f}")
    print(f"  CV R¬≤  : mean={r2_scores.mean():.3f}, std={r2_scores.std():.3f}")

    return {
        "model": name + " (CV)",
        "rmse_mean": rmse_scores.mean(),
        "rmse_std": rmse_scores.std(),
        "mae_mean": mae_scores.mean(),
        "mae_std": mae_scores.std(),
        "r2_mean": r2_scores.mean(),
        "r2_std": r2_scores.std(),
    }

# ==========================================
# 10. RUN ‚Äì HOLDOUT + CV
# ==========================================
results_holdout = []
results_cv = []

res_A_hold = eval_holdout("XGB_A (21‚Äì22 ACS + geo, no vibrancy)",
                          pipe_A, X_A_train, X_A_test, y_train, y_test)
results_holdout.append(res_A_hold)

res_A_cv = eval_cv("XGB_A (21‚Äì22 ACS + geo, no vibrancy)", pipe_A, X_A, y)
results_cv.append(res_A_cv)

res_B_hold = eval_holdout("XGB_B (21‚Äì22 ACS + geo + vibrancy)",
                          pipe_B, X_B_train, X_B_test, y_train, y_test)
results_holdout.append(res_B_hold)

res_B_cv = eval_cv("XGB_B (21‚Äì22 ACS + geo + vibrancy)", pipe_B, X_B, y)
results_cv.append(res_B_cv)

print("\n\n================ HOLDOUT SUMMARY ================")
print(pd.DataFrame([
    {
        "model": r["model"],
        "rmse_train": r["rmse_train"],
        "rmse_test": r["rmse_test"],
        "mae_train": r["mae_train"],
        "mae_test": r["mae_test"],
        "r2_train": r["r2_train"],
        "r2_test": r["r2_test"],
    }
    for r in results_holdout
]))

print("\n================ CV SUMMARY ================")
print(pd.DataFrame(results_cv))

# ==========================================
# 11. FEATURE IMPORTANCES
# ==========================================
def extract_importances(pipe, baseline_cols, vibrancy_cols):
    model = pipe.named_steps["model"]
    importances = model.feature_importances_

    n_base = len(baseline_cols)
    n_vib = len(vibrancy_cols)

    rows = []

    # baseline chunk
    for i, col in enumerate(baseline_cols):
        if i < len(importances):
            rows.append({"feature": col, "group": "baseline",
                         "importance": importances[i]})

    # vibrancy chunk
    start_v = n_base
    for j, col in enumerate(vibrancy_cols):
        idx = start_v + j
        if idx < len(importances):
            rows.append({"feature": col, "group": "vibrancy",
                         "importance": importances[idx]})

    return pd.DataFrame(rows).sort_values("importance", ascending=False)

# fit on full data for stable importances
pipe_A_full = build_xgb_strict(baseline_21_22, [], geo_cols, use_vibrancy=False)
pipe_A_full.fit(X_A, y)

pipe_B_full = build_xgb_strict(baseline_21_22, vibrancy_cols, geo_cols, use_vibrancy=True)
pipe_B_full.fit(X_B, y)

imp_A = extract_importances(pipe_A_full, baseline_21_22, [])
imp_B = extract_importances(pipe_B_full, baseline_21_22, vibrancy_cols)

print("\nüîç Top 10 feature importances ‚Äì Model A (no vibrancy):")
print(imp_A.head(10))

print("\nüîç Top 25 feature importances ‚Äì Model B (with vibrancy):")
print(imp_B.head(25))

Slide deck charts

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# ======================================================
# 1. Hard-code your metrics from the four tables
#    (numbers are from your slides)
# ======================================================

models = ["Linear\nRegression", "Ridge", "Lasso", "Random\nForest", "Neural\nNetwork", "XGBoost"]

# --- WITHOUT VIBE (no geo vs geo) ---
r2_no_vibe_no_geo = {
    "Linear\nRegression": 0.40,
    "Ridge": 0.49,
    "Lasso": 0.03,
    "Random\nForest": 0.60,
    "Neural\nNetwork": 0.70,
    "XGBoost": 0.81,
}

rmse_no_vibe_no_geo = {
    "Linear\nRegression": 181948,
    "Ridge": 167559,
    "Lasso": 230898,
    "Random\nForest": 147905,
    "Neural\nNetwork": 143621,
    "XGBoost": 159899,
}

r2_no_vibe_geo = {
    "Linear\nRegression": 0.78,
    "Ridge": 0.69,
    "Lasso": 0.72,
    "Random\nForest": 0.81,
    "Neural\nNetwork": 0.78,
    "XGBoost": 0.81,
}

rmse_no_vibe_geo = {
    "Linear\nRegression": 161760,
    "Ridge": 189438,
    "Lasso": 178670,
    "Random\nForest": 152033,
    "Neural\nNetwork": 124968,
    "XGBoost": 146499,
}

# --- WITH VIBE (no geo vs geo) ---
r2_vibe_no_geo = {
    "Linear\nRegression": 0.12,
    "Ridge": 0.51,
    "Lasso": 0.54,
    "Random\nForest": 0.61,
    "Neural\nNetwork": 0.67,
    "XGBoost": 0.82,
}

rmse_vibe_no_geo = {
    "Linear\nRegression": 220837,
    "Ridge": 164122,
    "Lasso": 159371,
    "Random\nForest": 146109,
    "Neural\nNetwork": 151792,
    "XGBoost": 153570,
}

r2_vibe_geo = {
    "Linear\nRegression": 0.38,
    "Ridge": 0.70,
    "Lasso": 0.76,
    "Random\nForest": 0.77,
    "Neural\nNetwork": 0.78,
    "XGBoost": 0.91,
}

rmse_vibe_geo = {
    "Linear\nRegression": 176170,
    "Ridge": 172529,
    "Lasso": 156543,
    "Random\nForest": 164019,
    "Neural\nNetwork": 123918,
    "XGBoost": 100608,
}

# ======================================================
# 2. Helper ‚Äì build ‚Äúimprovement‚Äù arrays for all models
# ======================================================
def compute_improvement(models, before_dict, after_dict, metric="r2"):
    """
    metric='r2'  -> improvement = after - before  (higher is better)
    metric='rmse'-> improvement = before - after  (reduction, so positive is better)
    """
    deltas = []
    for m in models:
        before = before_dict[m]
        after = after_dict[m]
        if metric == "r2":
            delta = after - before
        else:  # rmse
            delta = before - after
        deltas.append(delta)
    return np.array(deltas)

# compute all four sets
delta_r2_no_vibe  = compute_improvement(models, r2_no_vibe_no_geo,  r2_no_vibe_geo,  metric="r2")
delta_rmse_no_vibe = compute_improvement(models, rmse_no_vibe_no_geo, rmse_no_vibe_geo, metric="rmse")

delta_r2_vibe  = compute_improvement(models, r2_vibe_no_geo,  r2_vibe_geo,  metric="r2")
delta_rmse_vibe = compute_improvement(models, rmse_vibe_no_geo, rmse_vibe_geo, metric="rmse")

# ======================================================
# 3. Generic plotting helper (one chart)
# ======================================================
def plot_improvement_bar(models, deltas, title, ylabel, is_money=False):
    x = np.arange(len(models))
    fig, ax = plt.subplots(figsize=(7, 4))

    bars = ax.bar(x, deltas)

    # annotate bars
    for i, bar in enumerate(bars):
        height = bar.get_height()
        if is_money:
            text = f"+${height:,.0f}"
        else:
            text = f"+{height:.2f}"
        ax.text(
            bar.get_x() + bar.get_width() / 2,
            height + (0.02 * max(deltas)),
            text,
            ha="center",
            va="bottom",
            fontsize=9,
        )

    ax.set_xticks(x)
    ax.set_xticklabels(models, rotation=0)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    ax.axhline(0, color="black", linewidth=0.8)
    ax.grid(axis="y", linestyle="--", alpha=0.3)

    plt.tight_layout()
    plt.show()

# ======================================================
# 4. Plot the four requested charts
# ======================================================

# 1) No Vibe ‚Äì R^2 gain from adding geo
plot_improvement_bar(
    models,
    delta_r2_no_vibe,
    title="R¬≤ Improvement from Adding Geo (No Vibe)",
    ylabel="Œî R¬≤",
    is_money=False,
)

# 2) No Vibe ‚Äì RMSE reduction from adding geo
plot_improvement_bar(
    models,
    delta_rmse_no_vibe,
    title="RMSE Reduction from Adding Geo (No Vibe)",
    ylabel="Œî RMSE (‚Üì is better)",
    is_money=True,
)

# 3) With Vibe ‚Äì R^2 gain from adding geo
plot_improvement_bar(
    models,
    delta_r2_vibe,
    title="R¬≤ Improvement from Adding Geo (With Vibe)",
    ylabel="Œî R¬≤",
    is_money=False,
)

# 4) With Vibe ‚Äì RMSE reduction from adding geo
plot_improvement_bar(
    models,
    delta_rmse_vibe,
    title="RMSE Reduction from Adding Geo (With Vibe)",
    ylabel="Œî RMSE (‚Üì is better)",
    is_money=True,
)


Feature importance XGBoost

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# -----------------------------
# 1. Prepare top-25 importance
# -----------------------------
top_n = 25
topB = imp_B.head(top_n).copy()

# Reverse for nicer top-to-bottom layout
topB = topB.iloc[::-1]

features = topB["feature"].values
importances = topB["importance"].values
groups = topB["group"].values  # "baseline" or "vibrancy"

y_pos = np.arange(len(features))

# -----------------------------
# 2. Build hatch patterns so we can see
#    baseline vs vibrancy without colors
# -----------------------------
hatches = []
for g in groups:
    if g == "baseline":
        hatches.append("///")     # slanted stripes
    else:  # vibrancy
        hatches.append("")        # solid bar

# -----------------------------
# 3. Plot horizontal bar chart
# -----------------------------
fig, ax = plt.subplots(figsize=(8, 7))

bars = ax.barh(y_pos, importances)

# apply hatches + simple legend
for bar, hatch in zip(bars, hatches):
    bar.set_hatch(hatch)

# Legend proxy artists
from matplotlib.patches import Patch
legend_handles = [
    Patch(hatch="///", label="Baseline (ACS)"),
    Patch(hatch="", label="Vibrancy (food, venues, etc.)"),
]

ax.legend(handles=legend_handles, loc="lower right", frameon=False)

ax.set_yticks(y_pos)
ax.set_yticklabels(features)
ax.set_xlabel("Feature Importance (XGBoost gain)")
ax.set_title("Top 25 Feature Importances ‚Äì Model B (with Vibrancy)")

plt.tight_layout()
plt.show()


More slide deck visualizations

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# ============================================
# 1. DATA TABLES (CLEANED & READY)
# ============================================

# ---- No Vibe, No Geo ----
no_vibe_no_geo = pd.DataFrame({
    "Model": ["Linear Regression", "Ridge", "Lasso", "Random Forest", "Neural Network", "XGBoost"],
    "R2": [0.52, 0.50, 0.48, 0.68, 0.70, 0.68],
    "RMSE": [235814, 240616, 245513, 192728, 143621, 191111]
})

# ---- No Vibe, With Geo ----
no_vibe_with_geo = pd.DataFrame({
    "Model": ["Linear Regression", "Ridge", "Lasso", "Random Forest", "Neural Network", "XGBoost"],
    "R2": [0.78, 0.69, 0.72, 0.81, 0.78, 0.81],
    "RMSE": [161760, 189438, 178670, 152033, 124968, 146499]
})

# ---- With Vibe, No Geo ----
vibe_no_geo = pd.DataFrame({
    "Model": ["Linear Regression", "Ridge", "Lasso", "Random Forest", "Neural Network", "XGBoost"],
    "R2": [0.54, 0.64, 0.65, 0.64, 0.67, 0.82],
    "RMSE": [231058, 204410, 201592,140049, 151792, 141009]
})

# ---- With Vibe, With Geo ----
vibe_with_geo = pd.DataFrame({
    "Model": ["Linear Regression", "Ridge", "Lasso", "Random Forest", "Neural Network", "XGBoost"],
    "R2": [0.38, 0.70, 0.76, 0.77, 0.78, 0.91],
    "RMSE": [176170, 172529, 156543, 164019, 123918, 100608]
})


# ============================================
# 2. UNIVERSAL BAR COMPARISON FUNCTION
# ============================================

def bar_compare(df_before, df_after, metric, title):
    models = df_before["Model"]
    before_vals = df_before[metric]
    after_vals = df_after[metric]

    x = np.arange(len(models))
    width = 0.35

    plt.figure(figsize=(12, 6))
    plt.bar(x - width/2, before_vals, width, label="Before Geo", alpha=0.8)
    plt.bar(x + width/2, after_vals, width, label="After Geo", alpha=0.8)

    plt.xticks(x, models, rotation=45, ha="right")
    plt.ylabel(metric)
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.show()


# ============================================
# 3. GENERATE ALL 4 PLOTS
# ============================================

# ---- 1. No Vibe: R¬≤ ----
bar_compare(
    no_vibe_no_geo, 
    no_vibe_with_geo,
    metric="R2",
    title="R¬≤ Improvement from Adding Geographic Features (No Vibrancy)"
)

# ---- 2. No Vibe: RMSE ----
bar_compare(
    no_vibe_no_geo, 
    no_vibe_with_geo,
    metric="RMSE",
    title="RMSE Reduction from Adding Geographic Features (No Vibrancy)"
)

# ---- 3. With Vibe: R¬≤ ----
bar_compare(
    vibe_no_geo, 
    vibe_with_geo,
    metric="R2",
    title="R¬≤ Improvement from Adding Geographic Features (With Vibrancy)"
)

# ---- 4. With Vibe: RMSE ----
bar_compare(
    vibe_no_geo, 
    vibe_with_geo,
    metric="RMSE",
    title="RMSE Reduction from Adding Geographic Features (With Vibrancy)"
)
