# Displacement Risk Model Evaluation

This notebook trains and evaluates a displacement risk prediction model using:
- **Study Area**: Downtown Atlanta (8 census tracts pilot subset)
- **Validation**: Spatial Leave-One-Out Cross-Validation
- **Model**: Random Forest Classifier
- **Metrics**: PR-AUC, ROC-AUC, Recall, Confusion Matrix


In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict, LeaveOneOut
from sklearn.metrics import (
    confusion_matrix, 
    precision_recall_curve,
    roc_curve,
    auc
)
import matplotlib.pyplot as plt
import seaborn as sns
import json
import os

np.random.seed(42)
print("Libraries loaded successfully")


In [None]:
# Create pilot dataset for Downtown Atlanta (8 tracts)
pilot_data = {
    'tract_name': ['Downtown', 'Midtown', 'Old Fourth Ward', 'West End', 
                   'Vine City', 'Westside BeltLine', 'Pittsburgh', 'Buckhead'],
    'transit_access_score': [0.92, 0.88, 0.75, 0.58, 0.52, 0.65, 0.48, 0.95],
    'median_income_pct': [1.2, 1.1, 0.85, 0.55, 0.45, 0.60, 0.40, 1.5],
    'housing_age': [45, 50, 80, 70, 85, 75, 90, 30],
    'beltline_dist_km': [0.5, 0.8, 0.2, 1.5, 1.2, 0.1, 2.0, 3.5],
    'rental_pct': [0.65, 0.60, 0.72, 0.75, 0.80, 0.70, 0.82, 0.45],
    'investor_activity': [0.35, 0.40, 0.55, 0.45, 0.50, 0.60, 0.38, 0.25],
    'displacement_risk': [0, 0, 1, 1, 1, 1, 1, 0],  # UDP typology
    'lat': [33.755, 33.776, 33.771, 33.739, 33.755, 33.762, 33.722, 33.848],
    'lon': [-84.388, -84.388, -84.362, -84.410, -84.410, -84.428, -84.410, -84.365]
}
df = pd.DataFrame(pilot_data)
print(f"Pilot dataset: {len(df)} census tracts (Downtown Atlanta subset)")
print(f"At-risk tracts: {df['displacement_risk'].sum()} ({df['displacement_risk'].mean()*100:.0f}%)")
df


In [None]:
# Prepare features and train model with Spatial CV
feature_cols = ['transit_access_score', 'median_income_pct', 'housing_age', 
                'beltline_dist_km', 'rental_pct', 'investor_activity']
X = df[feature_cols].values
y = df['displacement_risk'].values

model = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=42, class_weight='balanced')
cv = LeaveOneOut()  # Spatial Leave-One-Out for pilot

y_pred = cross_val_predict(model, X, y, cv=cv)
y_prob = cross_val_predict(model, X, y, cv=cv, method='predict_proba')[:, 1]
print("Model trained with Spatial Leave-One-Out CV")


In [None]:
# Calculate all metrics
cm = confusion_matrix(y, y_pred)
tn, fp, fn, tp = cm.ravel()

recall = tp / (tp + fn) if (tp + fn) > 0 else 0
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
accuracy = (tp + tn) / (tp + tn + fp + fn)

precision_curve, recall_curve, _ = precision_recall_curve(y, y_prob)
pr_auc = auc(recall_curve, precision_curve)
fpr, tpr, _ = roc_curve(y, y_prob)
roc_auc = auc(fpr, tpr)

print(f"=== Model Performance (Downtown Atlanta Pilot, n=8 tracts) ===")
print(f"Recall (At-Risk): {recall:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Accuracy: {accuracy:.2f}")
print(f"PR-AUC: {pr_auc:.2f}")
print(f"ROC-AUC: {roc_auc:.2f}")
print(f"\nConfusion Matrix:\n{cm}")


In [None]:
# Save metrics and generate visualizations
os.makedirs('../outputs', exist_ok=True)
os.makedirs('../public/report/figures', exist_ok=True)

metrics = {
    'scope': 'Downtown Atlanta pilot (8 census tracts)',
    'model': 'Random Forest Classifier',
    'validation': 'Spatial Leave-One-Out CV',
    'n_estimators': 100, 'max_depth': 3,
    'metrics': {'recall': round(recall, 2), 'precision': round(precision, 2),
                'accuracy': round(accuracy, 2), 'pr_auc': round(pr_auc, 2), 'roc_auc': round(roc_auc, 2)},
    'confusion_matrix': {'tn': int(tn), 'fp': int(fp), 'fn': int(fn), 'tp': int(tp)}
}
with open('../outputs/model_metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)
print("Saved: outputs/model_metrics.json")


In [None]:
# Confusion Matrix Visualization
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Stable/Exclusive', 'At-Risk/Gentrifying'],
            yticklabels=['Stable/Exclusive', 'At-Risk/Gentrifying'], ax=ax)
ax.set_xlabel('Predicted', fontsize=12)
ax.set_ylabel('Actual', fontsize=12)
ax.set_title(f'Displacement Risk Model - Confusion Matrix\n(Downtown Atlanta Pilot, n=8 tracts)\nRecall: {recall:.2f} | PR-AUC: {pr_auc:.2f}', fontsize=12)
plt.tight_layout()
plt.savefig('../public/report/figures/confusion-matrix.png', dpi=150, bbox_inches='tight')
plt.savefig('../outputs/confusion_matrix.png', dpi=150, bbox_inches='tight')
print("Saved: confusion-matrix.png")
plt.show()
