In [None]:
import requests
import pandas as pd
import time

In [None]:
zipcode = pd.read_csv("USZipsWithLatLon_20231227.csv")
postal_codes = [
    60602, 60611, 60610, 60605, 60616, 60607, 60614, 60622, 60612, 60624,
    60644, 60651, 60647, 60639, 60707, 60675, 60613, 60618, 60641,
    60640, 60625, 60630, 60660, 60659, 60646, 60626, 60645,
    60608, 60623, 60653, 60615, 60609, 60632, 60637, 60621, 60636, 60629,
    60638, 60649, 60619, 60620, 60617, 60628, 60643, 60655, 60633, 60827
]

chi_zip = zipcode[
(zipcode["postal code"].astype(int).isin(postal_codes))
]

In [None]:
API_KEY = "****"
headers = {
    "Accept": "application/json",
    "Authorization": f"Bearer {API_KEY}"
}
url = "https://api.yelp.com/v3/businesses/search"

# fashion
# hotelstrave
# popuprestaurants
# outlet_stores
# deptstores
CATEGORIES = (
    "restaurants,nightlife,food,"
    "arts,shoppingcenters,"
    "publicmarkets"
)

RADIUS     = 850
LIMIT      = 50
MAX_RESULTS = 240
MAX_OFFSET  = MAX_RESULTS - LIMIT


In [None]:
all_rows = []

for _, row in chi_zip.iterrows():
    lat  = row["latitude"]
    lng  = row["longitude"]
    zc   = row["postal code"]
    
    print(f"ZIP {zc}  (lat={lat}, lng={lng})")
    
    for offset in range(0, MAX_OFFSET + 1, LIMIT):
        params = {
            "latitude":  lat,
            "longitude": lng,
            "radius":    RADIUS,
            "categories": CATEGORIES,
            "sort_by": "review_count",
            "limit":     LIMIT,
            "offset":    offset
            
        }
        r = requests.get(url, headers=headers, params=params)
        r.raise_for_status()
        js = r.json()
        biz = js.get("businesses", [])
        
        if not biz:
            break
        
        for b in biz:
            b["query_zip"]       = zc
            b["query_latitude"]  = lat
            b["query_longitude"] = lng
        
        all_rows.extend(biz)
        
        time.sleep(0.4)
    

In [None]:
result_df = (
    pd.json_normalize(all_rows)
      .drop_duplicates(subset="id")
      .reset_index(drop=True)
)

print(f"Total of {result_df.shape[0]} businesses")
print(result_df.head())
result_df.to_csv("result_df_allzip.csv", index=False)