In [None]:
import pandas as pd
import json
from pathlib import Path

# 1. load your teammate's JSON
with open("../data/raw/restaurants_nyc.json", "r", encoding="utf-8") as f:
    data = json.load(f)

df = pd.DataFrame(data)

# 2. keep essential fields
cols = ["id", "name", "lat", "lon", "cuisine"]
df = df[cols]

# 3. drop rows missing coordinates or names
df = df.dropna(subset=["name", "lat", "lon"])

# 4. normalize cuisine field
def normalize_cuisine(x):
    if not x or pd.isna(x): return []
    return [c.strip().lower() for c in str(x).replace(",", ";").split(";") if c.strip()]

df["cuisines"] = df["cuisine"].apply(normalize_cuisine)

# 5. save cleaned version
out_path = Path("../data/processed/restaurants_nyc_clean.csv")
df.to_csv(out_path, index=False)
print("Saved cleaned dataset:", out_path)
