In [1]:
from calendar import month

import requests, json
url = "https://gbfs.lyft.com/gbfs/1.1/bay/gbfs.json"
root = requests.get(url, timeout=15).json()
feeds = {f["name"]: f["url"] for f in root["data"]["en"]["feeds"]}
print(json.dumps(feeds, indent=2))


{
  "gbfs": "https://gbfs.lyft.com/gbfs/1.1/bay/gbfs.json",
  "ebikes_at_stations": "https://gbfs.lyft.com/gbfs/1.1/bay/en/ebikes_at_stations.json",
  "system_information": "https://gbfs.lyft.com/gbfs/1.1/bay/en/system_information.json",
  "station_information": "https://gbfs.lyft.com/gbfs/1.1/bay/en/station_information.json",
  "station_status": "https://gbfs.lyft.com/gbfs/1.1/bay/en/station_status.json",
  "free_bike_status": "https://gbfs.lyft.com/gbfs/1.1/bay/en/free_bike_status.json",
  "system_hours": "https://gbfs.lyft.com/gbfs/1.1/bay/en/system_hours.json",
  "system_calendar": "https://gbfs.lyft.com/gbfs/1.1/bay/en/system_calendar.json",
  "system_regions": "https://gbfs.lyft.com/gbfs/1.1/bay/en/system_regions.json",
  "system_pricing_plans": "https://gbfs.lyft.com/gbfs/1.1/bay/en/system_pricing_plans.json",
  "system_alerts": "https://gbfs.lyft.com/gbfs/1.1/bay/en/system_alerts.json",
  "gbfs_versions": "https://gbfs.lyft.com/gbfs/1.1/bay/en/gbfs_versions.json"
}


# Static Data

In [2]:
import requests, json, os, datetime,glob
os.makedirs("data/raw", exist_ok=True)

url = "https://gbfs.lyft.com/gbfs/1.1/bay/en/station_information.json"
resp = requests.get(url, timeout=15)
resp.raise_for_status()
payload = resp.json()

payload = resp.json()
payload

{'data': {'stations': [{'rental_methods': ['KEY', 'CREDITCARD'],
    'lat': 37.329732,
    'lon': -121.901782,
    'eightd_station_services': [],
    'electric_bike_surcharge_waiver': False,
    'external_id': 'dc2f7685-c3e3-4536-b78b-740479cbb207',
    'region_id': '5',
    'eightd_has_key_dispenser': False,
    'capacity': 35,
    'short_name': 'SJ-M7-1',
    'station_type': 'classic',
    'name': 'San Jose Diridon Station',
    'station_id': 'dc2f7685-c3e3-4536-b78b-740479cbb207',
    'has_kiosk': True,
    'rental_uris': {'ios': 'https://sfo.lft.to/lastmile_qr_scan',
     'android': 'https://sfo.lft.to/lastmile_qr_scan'}},
   {'rental_methods': ['KEY', 'CREDITCARD'],
    'lat': 37.339301,
    'lon': -121.889937,
    'eightd_station_services': [],
    'electric_bike_surcharge_waiver': False,
    'external_id': 'bae9be55-04d4-4641-9781-3d1c4b6950f1',
    'region_id': '5',
    'eightd_has_key_dispenser': False,
    'capacity': 15,
    'short_name': 'SJ-L10',
    'station_type': 'class

In [3]:
import requests, json, os, datetime,glob


# load latest station_information
stations = payload["data"]["stations"]

# load regions
import requests
regions = requests.get("https://gbfs.lyft.com/gbfs/1.1/bay/en/system_regions.json", timeout=15).json()["data"]["regions"]

print("Regions:", [ (r["region_id"], r["name"]) for r in regions ])

sj_region_ids = { r["region_id"] for r in regions if "san" in r["name"].lower() and "jose" in r["name"].lower() }
sj = [s for s in stations if s.get("region_id") in sj_region_ids]
print(sj_region_ids)
print("San José stations:", len(sj))
print("Sample:", [{k:s.get(k) for k in ["station_id","name","lat","lon","capacity"]} for s in sj[:2]])


Regions: [('3', 'San Francisco'), ('5', 'San Jose'), ('12', 'Oakland'), ('13', 'Emeryville'), ('14', 'Berkeley'), ('23', '8D'), ('3', 'San Francisco'), ('5', 'San Jose'), ('12', 'Oakland'), ('13', 'Emeryville'), ('14', 'Berkeley'), ('23', '8D')]
{'5'}
San José stations: 79
Sample: [{'station_id': 'dc2f7685-c3e3-4536-b78b-740479cbb207', 'name': 'San Jose Diridon Station', 'lat': 37.329732, 'lon': -121.901782, 'capacity': 35}, {'station_id': 'bae9be55-04d4-4641-9781-3d1c4b6950f1', 'name': 'Saint James Park', 'lat': 37.339301, 'lon': -121.889937, 'capacity': 15}]


In [4]:
len(stations)

581

In [8]:
# Method for the static feed

#Import Dependencies
import requests, json

def static_station_data() -> list:
    url = "https://gbfs.lyft.com/gbfs/1.1/bay/en/station_information.json"
    resp = requests.get(url, timeout=15)
    resp.raise_for_status()
    payload = resp.json()

    # load latest station_information
    stations = payload["data"]["stations"]

    ## hardcode region, San Jose = 5
    san_jose_region_id="5"

    sj_stations = []
    for station in stations:

        if station.get("region_id") == san_jose_region_id:
            sj_stations.append(station)



    return sj_stations

sj_s = static_station_data()

len(sj_s)

sj_s[0]



{'rental_methods': ['KEY', 'CREDITCARD'],
 'lat': 37.3509643,
 'lon': -121.9020161,
 'eightd_station_services': [],
 'electric_bike_surcharge_waiver': False,
 'external_id': '0d730ac1-7ce6-45cf-aca6-b412ea46709d',
 'region_id': '5',
 'eightd_has_key_dispenser': False,
 'capacity': 27,
 'short_name': 'SJ-H10',
 'station_type': 'classic',
 'name': 'Mission St at 1st St',
 'station_id': '0d730ac1-7ce6-45cf-aca6-b412ea46709d',
 'has_kiosk': True,
 'rental_uris': {'ios': 'https://sfo.lft.to/lastmile_qr_scan',
  'android': 'https://sfo.lft.to/lastmile_qr_scan'}}

In [10]:
import pandas as pd

df = pd.DataFrame(sj)
df = df.drop(['has_kiosk','rental_uris','eightd_has_key_dispenser','electric_bike_surcharge_waiver','external_id','external_id','eightd_station_services'],axis =1)
df


Unnamed: 0,rental_methods,lat,lon,region_id,capacity,short_name,station_type,name,station_id
0,"[KEY, CREDITCARD]",37.329732,-121.901782,5,35,SJ-M7-1,classic,San Jose Diridon Station,dc2f7685-c3e3-4536-b78b-740479cbb207
1,"[KEY, CREDITCARD]",37.339301,-121.889937,5,15,SJ-L10,classic,Saint James Park,bae9be55-04d4-4641-9781-3d1c4b6950f1
2,"[KEY, CREDITCARD]",37.343985,-121.874385,5,19,SJ-L13,classic,17th St at Santa Clara St,30429aed-9a47-4fd9-87fa-0db835aa8265
3,"[KEY, CREDITCARD]",37.360001,-121.878778,5,19,SJ-I14,classic,23rd St at Taylor St,7d59176c-49dd-4b07-a5ab-bcc109974db3
4,"[KEY, CREDITCARD]",37.350964,-121.902016,5,27,SJ-H10,classic,Mission St at 1st St,0d730ac1-7ce6-45cf-aca6-b412ea46709d
...,...,...,...,...,...,...,...,...,...
74,"[KEY, CREDITCARD]",37.324126,-121.899720,5,23,SJ-N7,classic,Columbia Ave at Bird Ave,eb283c26-5d6b-4da4-a2ac-f0bfe3329856
75,"[KEY, CREDITCARD]",37.341132,-121.892844,5,23,SJ-K10,classic,2nd St at Julian St,ccf416f8-ccc0-4f82-a659-e5d7851d2bcb
76,"[KEY, CREDITCARD]",37.315158,-121.897833,5,15,SJ-P6,classic,Bird Ave at Coe Ave,b09fd293-4364-4ce6-9341-0ced58486c5a
77,"[KEY, CREDITCARD]",37.342663,-121.877292,5,23,SJ-L12,classic,N 14th St at E Santa Clara St,b6d76c7a-2082-430b-8403-da5db00becaf


In [6]:
df.head()

Unnamed: 0,station_id,rental_methods,name,capacity,lon,short_name,region_id,station_type,lat
0,c53990d7-f965-40f4-b305-3435e1c95a71,"[KEY, CREDITCARD]",23rd St at Santa Clara St,19,-121.86857,SJ-M14,5,classic,37.34648
1,30429aed-9a47-4fd9-87fa-0db835aa8265,"[KEY, CREDITCARD]",17th St at Santa Clara St,19,-121.874385,SJ-L13,5,classic,37.343985
2,0d730ac1-7ce6-45cf-aca6-b412ea46709d,"[KEY, CREDITCARD]",Mission St at 1st St,27,-121.902016,SJ-H10,5,classic,37.350964
3,bae9be55-04d4-4641-9781-3d1c4b6950f1,"[KEY, CREDITCARD]",Saint James Park,15,-121.889937,SJ-L10,5,classic,37.339301
4,ed707a89-a68d-4921-a4cb-16c268e45a5b,"[KEY, CREDITCARD]",San Fernando St at 7th St,23,-121.883215,SJ-M11-2,5,classic,37.337122


In [50]:
# Temp solution
station_id_list ={}

for i in df["station_id"]:
    station_id_list[i] = "SJ"

len(station_id_list)

NameError: name 'df' is not defined

## Live Data

In [45]:
import requests,os
url ='https://gbfs.lyft.com/gbfs/1.1/bay/en/station_status.json'
resp = requests.get(url, timeout=15)
resp.raise_for_status()
payload = resp.json()

payload = resp.json()


In [47]:
stations = payload["data"]["stations"]

len(stations)

len(stations)

581

In [49]:
region5_stations = []

for station in stations:
    if station["station_id"] in station_id_list:
        region5_stations.append(station)


print(len(region5_stations))

len(region5_stations)


NameError: name 'station_id_list' is not defined

In [64]:
# Method for the Live dock feed

#Import Dependencies
import requests, json

def live_station_data() -> list:
  #get station Ids for San Jose stations
  region5_station_ids = {}
  ## hardcode region, San Jose = 5
  san_jose_region_id="5"

  url = "https://gbfs.lyft.com/gbfs/1.1/bay/en/station_information.json"
  resp = requests.get(url, timeout=15)
  resp.raise_for_status()
  payload = resp.json()

  # load latest station_information
  stations = payload["data"]["stations"]

  for station in stations:

        if station.get("region_id") == san_jose_region_id:
            region5_station_ids[station.get("station_id")] = "5"

  url = "https://gbfs.lyft.com/gbfs/1.1/bay/en/station_status.json"
  resp = requests.get(url, timeout=15)
  resp.raise_for_status()
  payload = resp.json()

  stations_live_status_sj = []
  # load latest station_information
  stations_live_status = payload["data"]["stations"]
  for station in stations_live_status:
      if station.get("station_id") in region5_station_ids:
            stations_live_status_sj.append(station)

  return stations_live_status_sj



d =live_station_data()





79

In [11]:
# create the
df_live_feed = pd.DataFrame(region5_stations)
df_live_feed = df_live_feed.drop(["num_docks_disabled","eightd_has_available_keys","legacy_id"],axis=1)
df_live_feed.shape

(79, 11)

In [12]:
df_live_feed.head(10)

Unnamed: 0,num_bikes_available,station_id,num_bikes_disabled,num_scooters_available,is_returning,last_reported,num_scooters_unavailable,is_renting,num_ebikes_available,num_docks_available,is_installed
0,8,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,0,0,1,1754908120,0,1,2,7,1
1,7,ab8cc22e-0f34-4476-bf81-293cbbb2e69c,0,0,1,1754908118,0,1,1,20,1
2,12,68c89d1f-407a-4550-a2b7-ecf0ad7ee422,2,0,1,1754908123,0,1,7,5,1
3,11,0d48fc9e-6798-46f0-bbb6-67a168800e0b,0,0,1,1754908137,0,1,4,4,1
4,7,ed707a89-a68d-4921-a4cb-16c268e45a5b,1,0,1,1754908192,0,1,5,15,1
5,10,6994e1e6-bb3c-4309-8207-52e6004a7302,0,0,1,1754908150,0,1,2,13,1
6,14,dc2f7685-c3e3-4536-b78b-740479cbb207,3,0,1,1754908173,0,1,14,18,1
7,17,c53990d7-f965-40f4-b305-3435e1c95a71,2,0,1,1754908202,0,1,16,0,1
8,9,30429aed-9a47-4fd9-87fa-0db835aa8265,0,0,1,1754908203,0,1,3,9,1
9,15,7d59176c-49dd-4b07-a5ab-bcc109974db3,0,0,1,1754908159,0,1,6,4,1


## Weather Data

In [67]:
from dotenv import load_dotenv
load_dotenv()
ninja_api = os.getenv("NINJAAPI")


# Latitude & Longitude values
lat=37.3382
lon=-121.8863


# API Ninjas endpoint with lat/lon
api_url = f'https://api.api-ninjas.com/v1/weather?lat={lat}&lon={lon}'

# Make the request
response = requests.get(api_url, headers={'X-Api-Key': ninja_api})

if response.status_code == requests.codes.ok:
    final_response = response.json()
    print(final_response)  # use .json() for parsed output
else:
    print("Error:", response.status_code, response.text)

def get_weather(lat,lon):
    api_url = f'https://api.api-ninjas.com/v1/weather?lat={lat}&lon={lon}'
    # Make the request
    response = requests.get(api_url, headers={'X-Api-Key': ninja_api})

    if response.status_code == requests.codes.ok:

        return response.json()

    else:
        return "error"


#need to add an extraction timestamp




{'cloud_pct': 100, 'temp': 18, 'feels_like': 18, 'humidity': 83, 'min_temp': 16, 'max_temp': 19, 'wind_speed': 2.57, 'wind_degrees': 350, 'sunrise': 1755177803, 'sunset': 1755226874}


In [71]:
from dotenv import load_dotenv
load_dotenv()
ninja_api = os.getenv("NINJAAPI")
def get_weather_data_san_jose():
    # Latitude & Longitude values
    lat=37.3382
    lon=-121.8863
    api_url = f'https://api.api-ninjas.com/v1/weather?lat={lat}&lon={lon}'
    # Make the request
    response = requests.get(api_url, headers={'X-Api-Key': ninja_api})

    if response.status_code == requests.codes.ok:
        return response.json()

    else:
        return "error"

w = get_weather_data_san_jose()
w


{'cloud_pct': 100,
 'temp': 17,
 'feels_like': 17,
 'humidity': 84,
 'min_temp': 16,
 'max_temp': 18,
 'wind_speed': 2.57,
 'wind_degrees': 350,
 'sunrise': 1755177802,
 'sunset': 1755226872}

In [14]:
weather_df = pd.DataFrame([final_response])
weather_df = weather_df.drop(['feels_like','wind_degrees'], axis= 1)

In [15]:
weather_df

Unnamed: 0,cloud_pct,temp,humidity,min_temp,max_temp,wind_speed,sunrise,sunset
0,100,17,84,16,18,1.54,1754918447,1754967886


## Holiday Data

In [16]:
# Convert Unix timestamp (seconds) to local time + calendar features

import holidays

# Do it in the etl pipe

from datetime import datetime
import holidays

def date_converter(unix_ts: int):
    dt_local = datetime.fromtimestamp(unix_ts)  # no need for datetime.datetime
    is_weekday = dt_local.isoweekday()
    month = dt_local.month
    hour = dt_local.hour

    us_holidays = holidays.US()
    is_holiday = dt_local.date() in us_holidays

    return {
        "raw_time": unix_ts,
        "dt_local": dt_local,
        "is_weekday": is_weekday,
        "month": month,
        "hour": hour,
        "is_holiday": is_holiday
    }


d = date_converter(1754893182)
print(d)

df_datetime = pd.DataFrame([d])
df_datetime


{'raw_time': 1754893182, 'dt_local': datetime.datetime(2025, 8, 10, 23, 19, 42), 'is_weekday': 7, 'month': 8, 'hour': 23, 'is_holiday': False}


Unnamed: 0,raw_time,dt_local,is_weekday,month,hour,is_holiday
0,1754893182,2025-08-10 23:19:42,7,8,23,False


## Event Data

In [72]:
ticket_master_api = os.getenv('TICKETMASTER')

print(ticket_master_api)

EdEXhPQmgIagBHtdx19kFqcr3fC7oSe5


In [73]:
import os, time, requests
from datetime import datetime, timedelta, timezone

API_KEY = os.getenv("TICKETMASTER")
BASE_URL = "https://app.ticketmaster.com/discovery/v2/events.json"

def iso_utc(dt: datetime) -> str:
    if dt.tzinfo is None:
        dt = dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

def fetch_events_ticketmaster_miles(
    lat: float,
    lon: float,
    start_utc: datetime,
    end_utc: datetime,
    radius_miles: int = 1,     # send as INT, not float
    page_size: int = 100,      # smaller page while debugging
    max_pages: int = 10,
    sleep_sec: float = 0.2,
    apikey: str | None = None,
):
    apikey = apikey or API_KEY
    if not apikey:
        raise RuntimeError("Set TICKETMASTER_API_KEY env var or pass apikey=...")

    params = {
        "apikey": apikey,
        "latlong": f"{lat},{lon}",
        "radius": radius_miles,       # integer
        "unit": "miles",              # miles
        "startDateTime": iso_utc(start_utc),
        "endDateTime": iso_utc(end_utc),
        "countryCode": "US",
        "locale": "en-us",
        "size": page_size,
        "sort": "date,asc",
        "page": 0,
    }

    events = []
    for page in range(max_pages):
        params["page"] = page
        r = requests.get(BASE_URL, params=params, timeout=20)

        # retry on rate limit
        if r.status_code == 429:
            time.sleep(1.0)
            r = requests.get(BASE_URL, params=params, timeout=20)

        if not r.ok:
            # PRINT SERVER ERROR BODY to see exact reason
            try:
                print("Error body:", r.json())
            except Exception:
                print("Error body (text):", r.text)
            r.raise_for_status()

        data = r.json()
        page_events = (data.get("_embedded") or {}).get("events") or []
        if not page_events:
            break

        fetched_at = iso_utc(datetime.utcnow())
        for ev in page_events:
            classes = ev.get("classifications") or []
            seg = (classes[0].get("segment") or {}).get("name") if classes else None
            genr = (classes[0].get("genre") or {}).get("name") if classes else None

            venues = (ev.get("_embedded") or {}).get("venues") or []
            v = venues[0] if venues else {}
            loc = v.get("location") or {}
            try:
                lat_v = float(loc.get("latitude")) if loc.get("latitude") is not None else None
                lon_v = float(loc.get("longitude")) if loc.get("longitude") is not None else None
            except (TypeError, ValueError):
                lat_v = lon_v = None

            public_sales = (ev.get("sales") or {}).get("public") or {}

            events.append({
                #"event_id": ev.get("id"),
                #"name": ev.get("name"),
                "start_datetime": (ev.get("dates") or {}).get("start", {}).get("dateTime"),
                "timezone": (ev.get("dates") or {}).get("timezone"),
                #"venue_id": v.get("id"),
                #"venue_name": v.get("name"),
                #"venue_city": (v.get("city") or {}).get("name"),
                #"venue_state": (v.get("state") or {}).get("stateCode"),
                "venue_lat": lat_v,
                "venue_lon": lon_v,
                #"fetched_at": fetched_at,
                "source": "ticketmaster",
                "query_radius_miles": radius_miles,
                "query_lat": lat,
                "query_lon": lon,
            })

        page_info = data.get("page", {})
        if page >= page_info.get("totalPages", 1) - 1:
            break
        time.sleep(sleep_sec)

    count = {"events":events, "metadata":{"number_events": len(events), "radius_events": radius_miles} }

    return count

lat, lon = 37.323345, -121.913497
start = datetime.utcnow()
end = start + timedelta(days=2)

events = fetch_events_ticketmaster_miles(lat, lon, start, end, radius_miles=2)

print(f"Fetched {events} events")



  start = datetime.utcnow()


Fetched {'events': [{'start_datetime': '2025-08-15T03:00:00Z', 'timezone': 'America/Los_Angeles', 'venue_lat': 37.335634, 'venue_lon': -121.887954, 'source': 'ticketmaster', 'query_radius_miles': 2, 'query_lat': 37.323345, 'query_lon': -121.913497}, {'start_datetime': '2025-08-16T02:30:00Z', 'timezone': 'America/Los_Angeles', 'venue_lat': 37.3329688, 'venue_lon': -121.9045755, 'source': 'ticketmaster', 'query_radius_miles': 2, 'query_lat': 37.323345, 'query_lon': -121.913497}, {'start_datetime': '2025-08-16T02:30:00Z', 'timezone': 'America/Los_Angeles', 'venue_lat': 37.3329688, 'venue_lon': -121.9045755, 'source': 'ticketmaster', 'query_radius_miles': 2, 'query_lat': 37.323345, 'query_lon': -121.913497}, {'start_datetime': '2025-08-16T02:30:00Z', 'timezone': 'America/Los_Angeles', 'venue_lat': 37.335634, 'venue_lon': -121.887954, 'source': 'ticketmaster', 'query_radius_miles': 2, 'query_lat': 37.323345, 'query_lon': -121.913497}, {'start_datetime': '2025-08-16T03:00:00Z', 'timezone': '

  fetched_at = iso_utc(datetime.utcnow())


In [85]:
import os, time, requests
from datetime import datetime, timezone

def fetch_events_ticketmaster_miles(radius_miles=30, page_size=100, max_pages=10, sleep_sec=0.2, apikey=None):


    lat=37.3382
    lon=-121.8863
    start_utc = datetime.utcnow()
    end_utc = start + timedelta(days=1)

    def iso_utc(dt):
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

    apikey = apikey or os.getenv("TICKETMASTER")
    if not apikey:
        raise RuntimeError("Set TICKETMASTER env var or pass apikey=...")

    base_url = "https://app.ticketmaster.com/discovery/v2/events.json"
    params = {
        "apikey": apikey,
        "latlong": f"{lat},{lon}",
        "radius": int(radius_miles),
        "unit": "miles",
        "startDateTime": iso_utc(start_utc),
        "endDateTime": iso_utc(end_utc),
        "countryCode": "US",
        "locale": "en-us",
        "size": int(page_size),
        "sort": "date,asc",
        "page": 0,
    }

    events = []
    for page in range(int(max_pages)):
        params["page"] = page
        r = requests.get(base_url, params=params, timeout=20)
        if r.status_code == 429:
            time.sleep(1.0)
            r = requests.get(base_url, params=params, timeout=20)
        if not r.ok:
            try:
                print("Error body:", r.json())
            except Exception:
                print("Error body (text):", r.text)
            r.raise_for_status()

        data = r.json()
        page_events = (data.get("_embedded") or {}).get("events") or []
        if not page_events:
            break

        fetched_at = iso_utc(datetime.utcnow())
        for ev in page_events:
            classes = ev.get("classifications") or []
            seg = (classes[0].get("segment") or {}).get("name") if classes else None
            genr = (classes[0].get("genre") or {}).get("name") if classes else None

            venues = (ev.get("_embedded") or {}).get("venues") or []
            v = venues[0] if venues else {}
            loc = v.get("location") or {}
            try:
                lat_v = float(loc.get("latitude")) if loc.get("latitude") is not None else None
                lon_v = float(loc.get("longitude")) if loc.get("longitude") is not None else None
            except (TypeError, ValueError):
                lat_v = lon_v = None

            public_sales = (ev.get("sales") or {}).get("public") or {}
            public_start = public_sales.get("startDateTime")
            public_end = public_sales.get("endDateTime")

            events.append({
                "event_id": ev.get("id"),
                "name": ev.get("name"),
                "segment": seg,
                "genre": genr,
                "start_datetime": (ev.get("dates") or {}).get("start", {}).get("dateTime"),
                "timezone": (ev.get("dates") or {}).get("timezone"),
                "venue_id": v.get("id"),
                "venue_name": v.get("name"),
                "venue_city": (v.get("city") or {}).get("name"),
                "venue_state": (v.get("state") or {}).get("stateCode"),
                "venue_lat": lat_v,
                "venue_lon": lon_v,
                "public_sale_start": public_start,
                "public_sale_end": public_end,
                "fetched_at": fetched_at,
                "source": "ticketmaster",
                "query_radius_miles": radius_miles,
                "query_lat": lat,
                "query_lon": lon,
            })

        page_info = data.get("page", {})
        if page >= page_info.get("totalPages", 1) - 1:
            break
        time.sleep(float(sleep_sec))

    return {"events": events, "metadata": {"number_events": len(events), "radius_events": radius_miles}}







  start = datetime.utcnow()
  start_utc = datetime.utcnow()


Fetched {'events': [{'event_id': 'Z7r9jZ1A7bg-9', 'name': 'Stanford Cardinal Womens Soccer vs. USF Dons Womens Soccer', 'segment': 'Undefined', 'genre': None, 'start_datetime': '2025-08-15T02:00:00Z', 'timezone': None, 'venue_id': 'Z7r9jZa7jH', 'venue_name': 'Laird Q. Cagan Stadium', 'venue_city': 'Stanford', 'venue_state': 'CA', 'venue_lat': 37.416199, 'venue_lon': -122.172203, 'public_sale_start': '2025-07-15T17:00:00Z', 'public_sale_end': '2025-08-15T02:00:00Z', 'fetched_at': '2025-08-14T09:11:25Z', 'source': 'ticketmaster', 'query_radius_miles': 30, 'query_lat': 37.3382, 'query_lon': -121.8863}, {'event_id': 'rZ7HnEZ1AfC7FN', 'name': 'Damian "Jr. Gong" & Stephen Marley with Special Guest J Boog', 'segment': 'Music', 'genre': 'Reggae', 'start_datetime': '2025-08-15T02:00:00Z', 'timezone': 'America/Los_Angeles', 'venue_id': 'rZ7HnEZ17Qa1d', 'venue_name': 'Quarry Amphitheater', 'venue_city': 'Santa Cruz', 'venue_state': 'CA', 'venue_lat': 36.99865, 'venue_lon': -122.05636, 'public_sal

  fetched_at = iso_utc(datetime.utcnow())


In [86]:
events["metadata"]

{'number_events': 4, 'radius_events': 30}

In [21]:
df.columns

Index(['station_id', 'rental_methods', 'name', 'capacity', 'lon', 'short_name',
       'region_id', 'station_type', 'lat'],
      dtype='object')

In [22]:
df_live_feed.columns

Index(['num_bikes_available', 'station_id', 'num_bikes_disabled',
       'num_scooters_available', 'is_returning', 'last_reported',
       'num_scooters_unavailable', 'is_renting', 'num_ebikes_available',
       'num_docks_available', 'is_installed'],
      dtype='object')

In [23]:
import pandas as pd

# Merge live feed with static station info
df_combined = pd.merge(
    df_live_feed,
    df,
    on="station_id",   # common key
    how="left"         # keep all live feed rows
)

df_combined


Unnamed: 0,num_bikes_available,station_id,num_bikes_disabled,num_scooters_available,is_returning,last_reported,num_scooters_unavailable,is_renting,num_ebikes_available,num_docks_available,is_installed,rental_methods,name,capacity,lon,short_name,region_id,station_type,lat
0,8,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,0,0,1,1754908120,0,1,2,7,1,"[KEY, CREDITCARD]",Bestor Art Park,15,-121.874119,SJ-Q11,5,classic,37.323678
1,7,ab8cc22e-0f34-4476-bf81-293cbbb2e69c,0,0,1,1754908118,0,1,1,20,1,"[KEY, CREDITCARD]",Kerley Dr at Rosemary St,27,-121.906834,SJ-F10,5,classic,37.360854
2,12,68c89d1f-407a-4550-a2b7-ecf0ad7ee422,2,0,1,1754908123,0,1,7,5,1,"[KEY, CREDITCARD]",San Carlos St at Meridian Ave,19,-121.913497,SJ-M4,5,classic,37.323345
3,11,0d48fc9e-6798-46f0-bbb6-67a168800e0b,0,0,1,1754908137,0,1,4,4,1,"[KEY, CREDITCARD]",Julian St at 6th St,15,-121.888889,SJ-K11,5,classic,37.342997
4,7,ed707a89-a68d-4921-a4cb-16c268e45a5b,1,0,1,1754908192,0,1,5,15,1,"[KEY, CREDITCARD]",San Fernando St at 7th St,23,-121.883215,SJ-M11-2,5,classic,37.337122
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,25,94c877d2-f064-4373-b372-3f8997a07f30,1,0,1,1754908195,0,1,14,5,1,"[KEY, CREDITCARD]",South San Jose State,31,-121.870099,SJ-R11,5,classic,37.320316
75,10,ccf416f8-ccc0-4f82-a659-e5d7851d2bcb,1,0,1,1754908208,0,1,10,12,1,"[KEY, CREDITCARD]",2nd St at Julian St,23,-121.892844,SJ-K10,5,classic,37.341132
76,22,eb283c26-5d6b-4da4-a2ac-f0bfe3329856,0,0,1,1754908219,0,1,8,1,1,"[KEY, CREDITCARD]",Columbia Ave at Bird Ave,23,-121.899720,SJ-N7,5,classic,37.324126
77,10,6200707a-ffec-4dde-b142-4df69780264f,0,0,1,1754908224,0,1,3,13,1,"[KEY, CREDITCARD]",10th St at Mission St,23,-121.892251,SJ-H12,5,classic,37.355693


In [35]:
shape = df_combined.shape

for row in range(shape[0]):

        lat = df_combined.iloc[row]["lat"]
        lon = df_combined.iloc[row]["lon"]
        start = datetime.utcnow()
        end = start + timedelta(days=4)
        weather_data = get_weather(lat,lon)  # Json
        holiday_data = date_converter(df_combined.iloc[row]["last_reported"])
        event_data = fetch_events_ticketmaster_miles(lat, lon, start, end, radius_miles=2)

        final_json = weather_data|holiday_data|event_data
        temp_df = pd.DataFrame([final_json])







        print(temp_df)



  start = datetime.utcnow()
  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918445  1754967881  1754908120 2025-08-11 03:28:40   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  
   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        85        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918451  1754967894  1754908118 2025-08-11 03:28:38   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              0              2  


  start = datetime.utcnow()
  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        85        15        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918454  1754967890  1754908123 2025-08-11 03:28:43   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918446  1754967887  1754908137 2025-08-11 03:28:57   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918446  1754967885  1754908192 2025-08-11 03:29:52   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  
   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918439  1754967884  1754908150 2025-08-11 03:29:10   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              0              2  


  start = datetime.utcnow()
  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918451  1754967888  1754908173 2025-08-11 03:29:33   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918441  1754967882  1754908202 2025-08-11 03:30:02   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918443  1754967883  1754908203 2025-08-11 03:30:03   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918442  1754967886  1754908159 2025-08-11 03:29:19   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        85        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918449  1754967891  1754908180 2025-08-11 03:29:40   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918446  1754967887  1754908183 2025-08-11 03:29:43   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        85        15        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918452  1754967891  1754908219 2025-08-11 03:30:19   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918446  1754967887  1754908139 2025-08-11 03:28:59   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918451  1754967888  1754908141 2025-08-11 03:29:01   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918446  1754967887  1754908142 2025-08-11 03:29:02   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918446  1754967885  1754908166 2025-08-11 03:29:26   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        85        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918449  1754967891  1754908173 2025-08-11 03:29:33   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918447  1754967884  1754908181 2025-08-11 03:29:41   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()


   cloud_pct  temp  feels_like  humidity  min_temp  max_temp  wind_speed  \
0        100    17          17        84        16        18        2.06   

   wind_degrees     sunrise      sunset    raw_time            dt_local  \
0           330  1754918451  1754967888  1754908219 2025-08-11 03:30:19   

   is_weekday  month  hour  is_holiday  number_events  radius_events  
0           1      8     3       False              2              2  


KeyboardInterrupt: 

In [36]:
shape = df_combined.shape

for row in range(shape[0]):
    lat = df_combined.iloc[row]["lat"]
    lon = df_combined.iloc[row]["lon"]

    start = datetime.utcnow()
    end = start + timedelta(days=4)

    weather_data = get_weather(lat, lon)                          # dict
    holiday_data = date_converter(df_combined.iloc[row]["last_reported"])  # dict
    event_data = fetch_events_ticketmaster_miles(lat, lon, start, end, radius_miles=2)  # dict

    final_json = weather_data | holiday_data | event_data          # single dict for this row

    # 1) Make sure all keys exist as columns
    for k in final_json.keys():
        if k not in df_combined.columns:
            df_combined[k] = pd.NA

    # 2) Write values into THIS ROW (in place)
    for k, v in final_json.items():
        df_combined.at[row, k] = v



  start = datetime.utcnow()
  fetched_at = iso_utc(datetime.utcnow())
  start = datetime.utcnow()
  fetched_at = iso_utc(datetime.utcnow())


In [37]:
df_combined.shape

(79, 37)

In [38]:
df_combined

Unnamed: 0,num_bikes_available,station_id,num_bikes_disabled,num_scooters_available,is_returning,last_reported,num_scooters_unavailable,is_renting,num_ebikes_available,num_docks_available,...,sunrise,sunset,raw_time,dt_local,is_weekday,month,hour,is_holiday,number_events,radius_events
0,8,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,0,0,1,1754908120,0,1,2,7,...,1754918445,1754967881,1754908120,2025-08-11 03:28:40,1,8,3,False,2,2
1,7,ab8cc22e-0f34-4476-bf81-293cbbb2e69c,0,0,1,1754908118,0,1,1,20,...,1754918449,1754967893,1754908118,2025-08-11 03:28:38,1,8,3,False,0,2
2,12,68c89d1f-407a-4550-a2b7-ecf0ad7ee422,2,0,1,1754908123,0,1,7,5,...,1754918454,1754967890,1754908123,2025-08-11 03:28:43,1,8,3,False,2,2
3,11,0d48fc9e-6798-46f0-bbb6-67a168800e0b,0,0,1,1754908137,0,1,4,4,...,1754918447,1754967886,1754908137,2025-08-11 03:28:57,1,8,3,False,2,2
4,7,ed707a89-a68d-4921-a4cb-16c268e45a5b,1,0,1,1754908192,0,1,5,15,...,1754918446,1754967885,1754908192,2025-08-11 03:29:52,1,8,3,False,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,25,94c877d2-f064-4373-b372-3f8997a07f30,1,0,1,1754908195,0,1,14,5,...,1754918445,1754967881,1754908195,2025-08-11 03:29:55,1,8,3,False,2,2
75,10,ccf416f8-ccc0-4f82-a659-e5d7851d2bcb,1,0,1,1754908208,0,1,10,12,...,1754918447,1754967886,1754908208,2025-08-11 03:30:08,1,8,3,False,2,2
76,22,eb283c26-5d6b-4da4-a2ac-f0bfe3329856,0,0,1,1754908219,0,1,8,1,...,1754918452,1754967888,1754908219,2025-08-11 03:30:19,1,8,3,False,2,2
77,10,6200707a-ffec-4dde-b142-4df69780264f,0,0,1,1754908224,0,1,3,13,...,1754918446,1754967889,1754908224,2025-08-11 03:30:24,1,8,3,False,2,2
