In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point


# Load dataset
df = pd.read_csv('../data/chipotle_stores.csv')
print(f"Original shape: {df.shape}")


# Drop missing coordinates and duplicates
lat_col, lon_col = 'latitude', 'longitude'
df = df.dropna(subset=[lat_col, lon_col]).drop_duplicates().reset_index(drop=True)
print(f"After cleaning: {df.shape}")


# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[lon_col], df[lat_col]), crs='EPSG:4326')


# Project to a metric CRS (US National Atlas Equal Area)
gdf = gdf.to_crs(epsg=2163)
gdf['x'] = gdf.geometry.x
gdf['y'] = gdf.geometry.y


# Save cleaned data
gdf.to_csv('../data/chipotle_clean.csv', index=False)
print('Saved cleaned data to ../data/chipotle_clean.csv')


gdf.head()