In [4]:
import os
os.environ["YELP_API_KEY"] = "C78YjTzIktDRhwKs1nFR2BAwUBh7Ux8pQ5EK1AN-50_HqlfBvINQOcjLhZ7vJXS9RBYDPYNrl7m3454LhlU-7i5GHRxHxxqSA-fb5jQpiXfNWTaUgcUDBjEuWnfqaHYx"

In [5]:
import os, time, pandas as pd, requests

# --- STEP 1: load geocoded hotels ---
hotels_path = "../data/hotels_nyc_geocoded.csv"
hotels = pd.read_csv(hotels_path)
print("Loaded", len(hotels), "hotels from:", hotels_path)

# --- STEP 2: Yelp API ---
API_KEY = os.getenv("YELP_API_KEY")
if not API_KEY:
    raise RuntimeError("❌ Yelp API key not found")
headers = {"Authorization": f"Bearer {API_KEY}"}
SEARCH_URL = "https://api.yelp.com/v3/businesses/search"

# --- STEP 3: simple radius helper (meters) ---
def borough_radius(borough: str) -> int:
    return {
        "Manhattan": 1000,
        "Brooklyn": 1400,
        "Queens": 1800,
        "Bronx": 1800,
        "Staten Island": 2200,
    }.get(str(borough), 1400)

# --- STEP 4: single-call fetch (no pagination) ---
def fetch_50(lat, lon, radius_m, sort_by="distance"):
    params = {
        "latitude": float(lat),
        "longitude": float(lon),
        "radius": int(radius_m),
        "categories": "restaurants",
        "limit": 50,            # <= 50, no offset
        "sort_by": sort_by
    }
    r = requests.get(SEARCH_URL, headers=headers, params=params, timeout=20)
    if r.status_code >= 400:
        try:
            print("Yelp error:", r.status_code, r.json())
        except Exception:
            print("Yelp error:", r.status_code, r.text[:200])
        return []               # skip this hotel, keep going
    return r.json().get("businesses", []) or []

# --- STEP 5: collect rows ---
records = []
seen_ids = set()  # optional dedupe across hotels (keeps first occurrence)

for _, h in hotels.iterrows():
    hotel_name = h["hotel_name"]
    lat, lon = h["lat"], h["lon"]
    if pd.isna(lat) or pd.isna(lon):
        print(f"⚠️ Skipping {hotel_name}: missing lat/lon")
        continue

    rad = borough_radius(h.get("borough", ""))
    data = fetch_50(lat, lon, radius_m=rad, sort_by="distance")
    print(f"✅ {hotel_name}: fetched {len(data)} (radius={rad} m)")

    for b in data:
        bid = b.get("id")
        if not bid or bid in seen_ids:  # optional dedupe
            continue
        seen_ids.add(bid)
        coords = b.get("coordinates") or {}
        cats = [c.get("alias") or c.get("title") for c in (b.get("categories") or [])]
        records.append({
            "hotel_name": hotel_name,
            "yelp_business_id": bid,
            "restaurant_name": b.get("name"),
            "rating": b.get("rating"),
            "price": b.get("price"),
            "categories": cats,
            "restaurant_lat": coords.get("latitude"),
            "restaurant_lon": coords.get("longitude"),
            "distance_m": b.get("distance"),
            "url": b.get("url"),
        })
    time.sleep(0.25)  # polite pause

# --- STEP 6: save (always write headers even if empty) ---
COLS = ["hotel_name","yelp_business_id","restaurant_name","rating","price",
        "categories","restaurant_lat","restaurant_lon","distance_m","url"]
restaurants_df = pd.DataFrame(records, columns=COLS)

out_path = "../data/restaurants_near_hotels.csv"
restaurants_df.to_csv(out_path, index=False)
print(f"\n✅ Saved {len(restaurants_df)} rows to {out_path}")
print("Hotels total:", hotels["hotel_name"].nunique())
print("Hotels with ≥1 restaurant:", restaurants_df["hotel_name"].nunique())

if "borough" in hotels.columns and not restaurants_df.empty:
    cov = (restaurants_df.merge(hotels[["hotel_name","borough"]], on="hotel_name", how="left")
                        .groupby("borough")["yelp_business_id"].nunique())
    print("\nUnique restaurants by borough:\n", cov)

Loaded 36 hotels from: ../data/hotels_nyc_geocoded.csv
✅ New York Marriott Marquis: fetched 50 (radius=1000 m)
✅ Courtyard New York Manhattan/Midtown East: fetched 50 (radius=1000 m)
✅ New York Marriott Downtown: fetched 50 (radius=1000 m)
✅ Moxy NYC Times Square: fetched 50 (radius=1000 m)
✅ Residence Inn New York Manhattan/Central Park: fetched 50 (radius=1000 m)
✅ Courtyard New York Manhattan/Times Square West: fetched 50 (radius=1000 m)
✅ Fairfield Inn & Suites New York Manhattan/Times Square South: fetched 50 (radius=1000 m)
✅ JW Marriott Essex House New York: fetched 50 (radius=1000 m)
✅ Renaissance New York Midtown Hotel: fetched 50 (radius=1000 m)
✅ Courtyard New York Manhattan/SoHo: fetched 50 (radius=1000 m)
✅ Moxy NYC East Village: fetched 50 (radius=1000 m)
✅ Residence Inn New York Manhattan/Times Square: fetched 50 (radius=1000 m)
✅ Fairfield Inn & Suites New York Midtown Manhattan/Penn Station: fetched 50 (radius=1000 m)
✅ SpringHill Suites New York Midtown Manhattan/Fift