In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neighbors import NearestNeighbors, KernelDensity


# Load cleaned dataset
gdf = gpd.read_file('../data/chipotle_clean.csv')
print(f"Loaded {len(gdf)} locations.")


# Static scatter map
fig, ax = plt.subplots(figsize=(10,6))
gdf.plot(ax=ax, markersize=6, color='darkred')
ax.set_title('Chipotle Locations (Projected)')
fig.savefig('../outputs/figures/all_chipotles_projected.png', dpi=150, bbox_inches='tight')
plt.show()


# k-distance plot for DBSCAN eps selection
coords = gdf[['x','y']].values
nbrs = NearestNeighbors(n_neighbors=4).fit(coords)
distances, _ = nbrs.kneighbors(coords)
k_dist = np.sort(distances[:, -1])
plt.figure(figsize=(8,4))
plt.plot(k_dist)
plt.ylabel('4-distance (m)')
plt.xlabel('Sorted Points')
plt.title('K-distance plot (k=4)')
plt.savefig('../outputs/figures/k4_distance.png', dpi=150, bbox_inches='tight')
plt.show()


# KDE heatmap
xs, ys = gdf['x'].values, gdf['y'].values
xmin, xmax, ymin, ymax = xs.min(), xs.max(), ys.min(), ys.max()
xx, yy = np.meshgrid(np.linspace(xmin, xmax, 200), np.linspace(ymin, ymax, 200))
grid = np.vstack([xx.ravel(), yy.ravel()]).T


kde = KernelDensity(bandwidth=50000).fit(np.vstack([xs, ys]).T)
Z = np.exp(kde.score_samples(grid)).reshape(xx.shape)
fig, ax = plt.subplots(figsize=(10,6))
ax.imshow(np.flipud(Z), extent=(xmin, xmax, ymin, ymax), cmap='Reds')
gdf.plot(ax=ax, markersize=2, color='black')
ax.set_title('Chipotle Density (KDE, bandwidth=50km)')
fig.savefig('../outputs/figures/kde_heatmap.png', dpi=150, bbox_inches='tight')
plt.show()