# CommuteSync AI — Demo Visualization Notebook

This notebook walks through all 4 AI models with rich visualizations for the ideathon demo.

**Run `python run_all.py` first to generate outputs.**

In [None]:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns

# Add project root to path
ROOT = os.path.abspath('..')
sys.path.insert(0, ROOT)

plt.rcParams.update({'figure.dpi': 120, 'font.size': 11})
print('✅ Imports OK')

## 0. Dataset Overview

In [None]:
df = pd.read_csv('../data/dummy_commute_data.csv')
print(f'Shape: {df.shape}')
df.head()

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(16, 9))
fig.suptitle('CommuteSync Dataset — Feature Distributions', fontsize=14, fontweight='bold')

axes[0,0].hist(df['commute_time_minutes'], bins=50, color='steelblue', edgecolor='white')
axes[0,0].set_title('Commute Times'); axes[0,0].set_xlabel('Minutes since midnight')

axes[0,1].scatter(df['home_lon'], df['home_lat'], s=1, alpha=0.2, c='royalblue')
axes[0,1].set_title('Home Locations (Delhi)'); axes[0,1].set_xlabel('Longitude'); axes[0,1].set_ylabel('Latitude')

axes[0,2].hist(df['overlap_score'], bins=40, color='seagreen', edgecolor='white')
axes[0,2].set_title('Overlap Score Distribution')

axes[1,0].hist(df['dist_home_office_km'], bins=50, color='coral', edgecolor='white')
axes[1,0].set_title('Home→Office Distance (km)')

df['accepted'].value_counts().plot(kind='bar', ax=axes[1,1], color=['salmon', 'seagreen'], edgecolor='white')
axes[1,1].set_title('Acceptance Distribution'); axes[1,1].set_xticklabels(['Declined','Accepted'], rotation=0)

axes[1,2].hist(df['response_time_lag_min'], bins=40, color='orchid', edgecolor='white')
axes[1,2].set_title('Response Time Lag (min)')

plt.tight_layout()
plt.show()

## 1. Model 1 — Commute Overlap Clustering

In [None]:
from models.commute_overlap_model import load_data, build_feature_matrix, run_dbscan, evaluate_clustering, extract_matched_pairs

df_sample = load_data(sample_n=500)
X, scaler = build_feature_matrix(df_sample)
labels = run_dbscan(X, eps=0.4, min_samples=5)
metrics = evaluate_clustering(X, labels)

print('Cluster Metrics:')
for k, v in metrics.items():
    print(f'  {k}: {v}')

In [None]:
img = mpimg.imread('../outputs/cluster_visuals/cluster_map.png')
plt.figure(figsize=(13, 9))
plt.imshow(img); plt.axis('off')
plt.title('Model 1 — Commute Clusters in Delhi', fontsize=14)
plt.show()

In [None]:
img2 = mpimg.imread('../outputs/cluster_visuals/matched_pairs.png')
plt.figure(figsize=(13, 9))
plt.imshow(img2); plt.axis('off')
plt.title('Model 1 — Matched Carpool Pairs', fontsize=14)
plt.show()

In [None]:
pairs = pd.read_csv('../outputs/cluster_visuals/matched_pairs.csv')
print(f'Total matched pairs: {len(pairs)}')
pairs.head(10)

## 2. Model 2 — Optimal Meeting Points

In [None]:
for g in range(3):
    path = f'../outputs/meeting_point_maps/meeting_point_group_{g}.png'
    if os.path.exists(path):
        img = mpimg.imread(path)
        plt.figure(figsize=(9, 7))
        plt.imshow(img); plt.axis('off')
        plt.title(f'Model 2 — Meeting Point Group {g}', fontsize=13)
        plt.show()

## 3. Model 3 — Acceptance Prediction

In [None]:
report = pd.read_csv('../outputs/model_reports/acceptance_model_comparison.csv')
report

In [None]:
for fname in ['acceptance_roc_curves.png', 'acceptance_feature_importance.png', 'acceptance_model_comparison_chart.png']:
    path = f'../outputs/model_reports/{fname}'
    if os.path.exists(path):
        img = mpimg.imread(path)
        plt.figure(figsize=(11, 7))
        plt.imshow(img); plt.axis('off')
        plt.show()

## 4. Model 4 — Notification Timing

In [None]:
report4 = pd.read_csv('../outputs/model_reports/notification_timing_report.csv')
report4

In [None]:
for fname in ['notification_timing_residuals.png', 'notification_timing_predictions.png', 'notification_feature_importance.png']:
    path = f'../outputs/model_reports/{fname}'
    if os.path.exists(path):
        img = mpimg.imread(path)
        plt.figure(figsize=(13, 6))
        plt.imshow(img); plt.axis('off')
        plt.show()

## 5. Live Inference Demo

Use the saved best models to make real-time predictions.

In [None]:
import joblib
import pandas as pd

# Load best acceptance model
clf = joblib.load('../outputs/model_reports/acceptance_model_best.joblib')

new_user = pd.DataFrame([{
    'overlap_score': 0.85,
    'time_diff_minutes': 5,
    'dist_home_office_km': 8.0,
    'past_acceptance_rate': 0.70,
    'commute_duration_min': 35,
    'day_of_week': 1
}])

prob = clf.predict_proba(new_user)[0, 1]
pred = clf.predict(new_user)[0]
print(f'Acceptance probability: {prob:.2%}')
print(f'Prediction: {"✅ Accept" if pred == 1 else "❌ Decline"}')

In [None]:
# Load best notification model
reg = joblib.load('../outputs/model_reports/notification_model_best.joblib')

from utils.geo_utils import minutes_to_time

new_user_reg = pd.DataFrame([{
    'commute_time_minutes': 510,   # 8:30
    'day_of_week': 0,
    'past_acceptance_rate': 0.65,
    'response_time_lag_min': 7.5,
    'commute_duration_min': 40,
    'overlap_score': 0.72
}])

opt_min = reg.predict(new_user_reg)[0]
print(f'User departs at 08:30')
print(f'Optimal notification: {minutes_to_time(int(round(opt_min)))}')