In [117]:
import json
import os
from datetime import datetime
from google.cloud import storage
from dotenv import load_dotenv
load_dotenv()
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")

def get_dock_feed():

    bucket_name,start_date =  "dockflow", "2025-08-10"
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, "%Y-%m-%d").date()

    client = storage.Client()
    bucket = client.bucket(bucket_name)

    # Broad prefix to get all GBFS data
    prefix = "raw/gbfs/"
    data =[]

    for blob in bucket.list_blobs(prefix=prefix):
        # Try parsing date from blob name: raw/gbfs/year=YYYY/month=MM/day=DD/...
        try:
            parts = blob.name.split("/")
            year = int(parts[2].split("=")[1])
            month = int(parts[3].split("=")[1])
            day = int(parts[4].split("=")[1])
            blob_date = datetime(year, month, day).date()
        except Exception:
            continue  # Skip if format doesn't match

        if blob_date >= start_date:
            data_json = json.loads(blob.download_as_text())
            data.append(data_json)


    flattened_data = []
    for sublist in data:
        flattened_data.extend(sublist)

    df1 = pd.DataFrame(flattened_data)

    return df1

# Example usage:
d =get_dock_feed()
d

Unnamed: 0,station_id,last_reported,num_ebikes_available,legacy_id,num_docks_disabled,num_scooters_unavailable,eightd_has_available_keys,num_bikes_disabled,is_renting,num_docks_available,num_bikes_available,num_scooters_available,is_installed,is_returning
0,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,1754985922,2,299,0,0,False,0,1,7,8,0,1,1
1,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,1754985922,2,299,0,0,False,0,1,7,8,0,1,1
2,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,1754985922,2,299,0,0,False,0,1,7,8,0,1,1
3,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,1754985922,2,299,0,0,False,0,1,7,8,0,1,1
4,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,1754985922,2,299,0,0,False,0,1,7,8,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,94c877d2-f064-4373-b372-3f8997a07f30,1755168355,17,423,0,0,False,0,1,2,29,0,1,1
162,7c086aa7-c33f-47e5-ace9-74c40807c409,1755168359,6,394,0,0,False,1,1,0,22,0,1,1
163,b6d76c7a-2082-430b-8403-da5db00becaf,1755168354,1,470,0,0,False,0,1,4,19,0,1,1
164,ccf416f8-ccc0-4f82-a659-e5d7851d2bcb,1755168360,2,357,0,0,False,0,1,19,4,0,1,1


In [118]:
def get_dock_feed_static():

    bucket_name,start_date =  "dockflow", "2025-08-10"
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, "%Y-%m-%d").date()

    client = storage.Client()
    bucket = client.bucket(bucket_name)

    # Broad prefix to get all GBFS data
    prefix = "raw/static/"
    data =[]

    for blob in bucket.list_blobs(prefix=prefix):
        try:
            parts = blob.name.split("/")
            # Case 1: folder-style year/month/day
            filename = parts[-1]
            # Extract 20250814 from "stations_20250814T105313Z.json"
            date_str = filename.split("_")[1][:8]
            blob_date = datetime.strptime(date_str, "%Y%m%d").date()

        except Exception as e:
            continue

    if blob_date >= start_date:
        data_json = json.loads(blob.download_as_text())
        data.append(data_json)



    flattened_data = []
    for sublist in data:
        flattened_data.extend(sublist)

    df1 = pd.DataFrame(flattened_data)

    return df1


C = get_dock_feed_static()
C


Unnamed: 0,rental_methods,short_name,external_id,eightd_has_key_dispenser,lat,has_kiosk,rental_uris,station_type,electric_bike_surcharge_waiver,name,region_id,capacity,eightd_station_services,station_id,lon
0,"[KEY, CREDITCARD]",SJ-K11,0d48fc9e-6798-46f0-bbb6-67a168800e0b,False,37.342997,True,"{'ios': 'https://sfo.lft.to/lastmile_qr_scan',...",classic,False,Julian St at 6th St,5,15,[],0d48fc9e-6798-46f0-bbb6-67a168800e0b,-121.888889
1,"[KEY, CREDITCARD]",SJ-M4,68c89d1f-407a-4550-a2b7-ecf0ad7ee422,False,37.323345,True,"{'ios': 'https://sfo.lft.to/lastmile_qr_scan',...",classic,False,San Carlos St at Meridian Ave,5,19,[],68c89d1f-407a-4550-a2b7-ecf0ad7ee422,-121.913497
2,"[KEY, CREDITCARD]",SJ-Q11,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,False,37.323678,True,"{'ios': 'https://sfo.lft.to/lastmile_qr_scan',...",classic,False,Bestor Art Park,5,15,[],46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,-121.874119
3,"[KEY, CREDITCARD]",SJ-M11-2,ed707a89-a68d-4921-a4cb-16c268e45a5b,False,37.337122,True,"{'ios': 'https://sfo.lft.to/lastmile_qr_scan',...",classic,False,San Fernando St at 7th St,5,23,[],ed707a89-a68d-4921-a4cb-16c268e45a5b,-121.883215
4,"[KEY, CREDITCARD]",SJ-F10,ab8cc22e-0f34-4476-bf81-293cbbb2e69c,False,37.360854,True,"{'ios': 'https://sfo.lft.to/lastmile_qr_scan',...",classic,False,Kerley Dr at Rosemary St,5,27,[],ab8cc22e-0f34-4476-bf81-293cbbb2e69c,-121.906834
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,"[KEY, CREDITCARD]",SJ-N6,46f36a3a-f550-4fe0-bfe8-b94fddaf95d8,False,37.321182,True,"{'ios': 'https://sfo.lft.to/lastmile_qr_scan',...",classic,False,Auzerais Ave at Los Gatos Creek Trail,5,23,[],46f36a3a-f550-4fe0-bfe8-b94fddaf95d8,-121.903090
75,"[KEY, CREDITCARD]",SJ-N10-3,bd217291-f4ad-4e1d-8c13-e27d39a35fef,False,37.332039,True,"{'ios': 'https://sfo.lft.to/lastmile_qr_scan',...",classic,False,5th St at San Salvador St,5,19,[],bd217291-f4ad-4e1d-8c13-e27d39a35fef,-121.881766
76,"[KEY, CREDITCARD]",SJ-N7,eb283c26-5d6b-4da4-a2ac-f0bfe3329856,False,37.324126,True,"{'ios': 'https://sfo.lft.to/lastmile_qr_scan',...",classic,False,Columbia Ave at Bird Ave,5,23,[],eb283c26-5d6b-4da4-a2ac-f0bfe3329856,-121.899720
77,"[KEY, CREDITCARD]",SJ-J12-1,7c086aa7-c33f-47e5-ace9-74c40807c409,False,37.349426,True,"{'ios': 'https://sfo.lft.to/lastmile_qr_scan',...",classic,False,10th St at Empire St,5,23,[],7c086aa7-c33f-47e5-ace9-74c40807c409,-121.887619


In [113]:
import pandas as pd

def get_event_metadata_table():
    bucket_name, start_date = "dockflow", "2025-08-10"
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, "%Y-%m-%d").date()

    client = storage.Client()
    bucket = client.bucket(bucket_name)
    prefix = "raw/events/"

    rows = []

    for blob in bucket.list_blobs(prefix=prefix):
        try:
            parts = blob.name.split("/")
            year = int(parts[2].split("=")[1])
            month = int(parts[3].split("=")[1])
            day = int(parts[4].split("=")[1])
            blob_date = datetime(year, month, day).date()
        except Exception:
            continue

        if blob_date >= start_date:
            data_json = json.loads(blob.download_as_text())
            meta = data_json.get("metadata", {})

            row = {
                "date_stamp": str(blob_date),
                "radius_events": meta.get("radius_events"),
                "number_events": meta.get("number_events")
            }
            rows.append(row)

    df = pd.DataFrame(rows)
    return df


C = get_event_metadata_table()
C

Unnamed: 0,date_stamp,radius_events,number_events
0,2025-08-14,30,4


In [115]:
import pandas as pd

def get_weather_feed(local_dir="downloads"):
    bucket_name, start_date = "dockflow", "2025-08-10"
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, "%Y-%m-%d").date()

    client = storage.Client()
    bucket = client.bucket(bucket_name)
    prefix = "raw/weather/"

    rows = []

    for blob in bucket.list_blobs(prefix=prefix):
        try:
            parts = blob.name.split("/")
            year = int(parts[2].split("=")[1])
            month = int(parts[3].split("=")[1])
            day = int(parts[4].split("=")[1])
            blob_date = datetime(year, month, day).date()
        except Exception:
            continue

        if blob_date >= start_date:
            data_json = json.loads(blob.download_as_text())

            # Add blob timestamp
            row = data_json.copy()
            row["collected_at"] = blob.time_created.isoformat()
            rows.append(row)

    df1 = pd.DataFrame(rows)
    return df1
d =get_weather_feed()
d

Unnamed: 0,cloud_pct,temp,feels_like,humidity,min_temp,max_temp,wind_speed,wind_degrees,sunrise,sunset,collected_at
0,100,17,17,84,16,18,3.6,60,1755177803,1755226874,2025-08-14T11:22:16.025000+00:00
1,100,17,17,84,16,18,3.09,50,1755177801,1755226871,2025-08-14T11:30:03.894000+00:00


## Done


In [94]:
import json
import os
from datetime import datetime
from google.cloud import storage


from dotenv import load_dotenv
load_dotenv()
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")


def get_event_data():


    bucket_name,start_date =  "dockflow", "2025-08-10"
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, "%Y-%m-%d").date()

    client = storage.Client()
    bucket = client.bucket(bucket_name)

    # Broad prefix to get all GBFS data
    prefix = "raw/events/"

    data =[]
    print("h")
    print(bucket.list_blobs)
    for blob in bucket.list_blobs(prefix=prefix):
        # Try parsing date from blob name: raw/gbfs/year=YYYY/month=MM/day=DD/...
        try:
            parts = blob.name.split("/")

            print(parts)
            year = int(parts[2].split("=")[1])
            month = int(parts[3].split("=")[1])
            day = int(parts[4].split("=")[1])
            blob_date = datetime(year, month, day).date()
        except Exception:
            continue  # Skip if format doesn't match

        if blob_date >= start_date:
            data_json = json.loads(blob.download_as_text())
            data.append(data_json)


    print(data)
    flattened_data = []
    for sublist in data:
        if "events" in sublist:
            flattened_data.extend(sublist["events"])


    df1 = pd.DataFrame(flattened_data)

    return df1



# Example usage:
d =get_event_data()
d


h
<bound method Bucket.list_blobs of <Bucket: dockflow>>
['raw', 'events', '']
['raw', 'events', 'year=2025', 'month=08', 'day=14', 'events_20250814T112034Z.json']
[{'events': [{'event_id': 'Z7r9jZ1A7bg-9', 'name': 'Stanford Cardinal Womens Soccer vs. USF Dons Womens Soccer', 'segment': 'Undefined', 'genre': None, 'start_datetime': '2025-08-15T02:00:00Z', 'timezone': None, 'venue_id': 'Z7r9jZa7jH', 'venue_name': 'Laird Q. Cagan Stadium', 'venue_city': 'Stanford', 'venue_state': 'CA', 'venue_lat': 37.416199, 'venue_lon': -122.172203, 'public_sale_start': '2025-07-15T17:00:00Z', 'public_sale_end': '2025-08-15T02:00:00Z', 'fetched_at': '2025-08-14T11:20:34Z', 'source': 'ticketmaster', 'query_radius_miles': 30, 'query_lat': 37.3382, 'query_lon': -121.8863}, {'event_id': 'rZ7HnEZ1AfC7FN', 'name': 'Damian "Jr. Gong" & Stephen Marley with Special Guest J Boog', 'segment': 'Music', 'genre': 'Reggae', 'start_datetime': '2025-08-15T02:00:00Z', 'timezone': 'America/Los_Angeles', 'venue_id': 'rZ7H

Unnamed: 0,event_id,name,segment,genre,start_datetime,timezone,venue_id,venue_name,venue_city,venue_state,venue_lat,venue_lon,public_sale_start,public_sale_end,fetched_at,source,query_radius_miles,query_lat,query_lon
0,Z7r9jZ1A7bg-9,Stanford Cardinal Womens Soccer vs. USF Dons W...,Undefined,,2025-08-15T02:00:00Z,,Z7r9jZa7jH,Laird Q. Cagan Stadium,Stanford,CA,37.416199,-122.172203,2025-07-15T17:00:00Z,2025-08-15T02:00:00Z,2025-08-14T11:20:34Z,ticketmaster,30,37.3382,-121.8863
1,rZ7HnEZ1AfC7FN,"Damian ""Jr. Gong"" & Stephen Marley with Specia...",Music,Reggae,2025-08-15T02:00:00Z,America/Los_Angeles,rZ7HnEZ17Qa1d,Quarry Amphitheater,Santa Cruz,CA,36.99865,-122.05636,2025-04-30T17:00:00Z,2025-08-15T02:00:00Z,2025-08-14T11:20:34Z,ticketmaster,30,37.3382,-121.8863
2,G5vYZbfBToNOu,RÜFÜS DU SOL Inhale / Exhale World Tour 2025,Music,Dance/Electronic,2025-08-15T02:30:00Z,America/Los_Angeles,KovZpZA6ta1A,Shoreline Amphitheatre,Mountain View,CA,37.426718,-122.080722,2024-10-24T17:00:00Z,2025-08-15T04:30:00Z,2025-08-14T11:20:34Z,ticketmaster,30,37.3382,-121.8863
3,rZ7HnEZ1AfuIod,D'Lai,Arts & Theatre,Comedy,2025-08-15T03:00:00Z,America/Los_Angeles,rZ7HnEZ178E04,San Jose Improv,San Jose,CA,37.335634,-121.887954,2025-04-08T17:00:00Z,2025-08-15T03:00:00Z,2025-08-14T11:20:34Z,ticketmaster,30,37.3382,-121.8863


In [95]:
import pandas as pd

def get_event_data():
    bucket_name, start_date = "dockflow", "2025-08-10"
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, "%Y-%m-%d").date()

    client = storage.Client()
    bucket = client.bucket(bucket_name)
    prefix = "raw/events/"

    all_events = []

    for blob in bucket.list_blobs(prefix=prefix):
        try:
            parts = blob.name.split("/")
            year = int(parts[2].split("=")[1])
            month = int(parts[3].split("=")[1])
            day = int(parts[4].split("=")[1])
            blob_date = datetime(year, month, day).date()
        except Exception:
            continue

        if blob_date >= start_date:
            data_json = json.loads(blob.download_as_text())

            radius = data_json.get("metadata", {}).get("radius_events")
            events = data_json.get("events", [])

            for ev in events:
                ev["radius_events"] = radius
                ev["date_stamp"] = str(blob_date)   # attach date
                all_events.append(ev)

    df = pd.DataFrame(all_events)
    return df

D =get_event_data()
D


Unnamed: 0,event_id,name,segment,genre,start_datetime,timezone,venue_id,venue_name,venue_city,venue_state,...,venue_lon,public_sale_start,public_sale_end,fetched_at,source,query_radius_miles,query_lat,query_lon,radius_events,date_stamp
0,Z7r9jZ1A7bg-9,Stanford Cardinal Womens Soccer vs. USF Dons W...,Undefined,,2025-08-15T02:00:00Z,,Z7r9jZa7jH,Laird Q. Cagan Stadium,Stanford,CA,...,-122.172203,2025-07-15T17:00:00Z,2025-08-15T02:00:00Z,2025-08-14T11:20:34Z,ticketmaster,30,37.3382,-121.8863,30,2025-08-14
1,rZ7HnEZ1AfC7FN,"Damian ""Jr. Gong"" & Stephen Marley with Specia...",Music,Reggae,2025-08-15T02:00:00Z,America/Los_Angeles,rZ7HnEZ17Qa1d,Quarry Amphitheater,Santa Cruz,CA,...,-122.05636,2025-04-30T17:00:00Z,2025-08-15T02:00:00Z,2025-08-14T11:20:34Z,ticketmaster,30,37.3382,-121.8863,30,2025-08-14
2,G5vYZbfBToNOu,RÜFÜS DU SOL Inhale / Exhale World Tour 2025,Music,Dance/Electronic,2025-08-15T02:30:00Z,America/Los_Angeles,KovZpZA6ta1A,Shoreline Amphitheatre,Mountain View,CA,...,-122.080722,2024-10-24T17:00:00Z,2025-08-15T04:30:00Z,2025-08-14T11:20:34Z,ticketmaster,30,37.3382,-121.8863,30,2025-08-14
3,rZ7HnEZ1AfuIod,D'Lai,Arts & Theatre,Comedy,2025-08-15T03:00:00Z,America/Los_Angeles,rZ7HnEZ178E04,San Jose Improv,San Jose,CA,...,-121.887954,2025-04-08T17:00:00Z,2025-08-15T03:00:00Z,2025-08-14T11:20:34Z,ticketmaster,30,37.3382,-121.8863,30,2025-08-14


In [96]:
import pandas as pd

def get_event_metadata_table():
    bucket_name, start_date = "dockflow", "2025-08-10"
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, "%Y-%m-%d").date()

    client = storage.Client()
    bucket = client.bucket(bucket_name)
    prefix = "raw/events/"

    rows = []

    for blob in bucket.list_blobs(prefix=prefix):
        try:
            parts = blob.name.split("/")
            year = int(parts[2].split("=")[1])
            month = int(parts[3].split("=")[1])
            day = int(parts[4].split("=")[1])
            blob_date = datetime(year, month, day).date()
        except Exception:
            continue

        if blob_date >= start_date:
            data_json = json.loads(blob.download_as_text())
            meta = data_json.get("metadata", {})

            row = {
                "date_stamp": str(blob_date),
                "radius_events": meta.get("radius_events"),
                "number_events": meta.get("number_events")
            }
            rows.append(row)

    df = pd.DataFrame(rows)
    return df


C = get_event_metadata_table()
C

Unnamed: 0,date_stamp,radius_events,number_events
0,2025-08-14,30,4


In [97]:
import pandas as pd

def get_weather_feed(local_dir="downloads"):
    bucket_name, start_date = "dockflow", "2025-08-10"
    if isinstance(start_date, str):
        start_date = datetime.strptime(start_date, "%Y-%m-%d").date()

    client = storage.Client()
    bucket = client.bucket(bucket_name)
    prefix = "raw/weather/"

    rows = []

    for blob in bucket.list_blobs(prefix=prefix):
        try:
            parts = blob.name.split("/")
            year = int(parts[2].split("=")[1])
            month = int(parts[3].split("=")[1])
            day = int(parts[4].split("=")[1])
            blob_date = datetime(year, month, day).date()
        except Exception:
            continue

        if blob_date >= start_date:
            data_json = json.loads(blob.download_as_text())

            # Add blob timestamp
            row = data_json.copy()
            row["collected_at"] = blob.time_created.isoformat()
            rows.append(row)

    df1 = pd.DataFrame(rows)
    return df1
d =get_weather_feed()

In [98]:
d.head()

Unnamed: 0,cloud_pct,temp,feels_like,humidity,min_temp,max_temp,wind_speed,wind_degrees,sunrise,sunset,collected_at
0,100,17,17,84,16,18,3.6,60,1755177803,1755226874,2025-08-14T11:22:16.025000+00:00
1,100,17,17,84,16,18,3.09,50,1755177801,1755226871,2025-08-14T11:30:03.894000+00:00


In [22]:
ls

[[{'station_id': '46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f',
   'last_reported': 1754985922,
   'num_ebikes_available': 2,
   'legacy_id': '299',
   'num_docks_disabled': 0,
   'num_scooters_unavailable': 0,
   'eightd_has_available_keys': False,
   'num_bikes_disabled': 0,
   'is_renting': 1,
   'num_docks_available': 7,
   'num_bikes_available': 8,
   'num_scooters_available': 0,
   'is_installed': 1,
   'is_returning': 1}],
 [{'station_id': '46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f',
   'last_reported': 1754985922,
   'num_ebikes_available': 2,
   'legacy_id': '299',
   'num_docks_disabled': 0,
   'num_scooters_unavailable': 0,
   'eightd_has_available_keys': False,
   'num_bikes_disabled': 0,
   'is_renting': 1,
   'num_docks_available': 7,
   'num_bikes_available': 8,
   'num_scooters_available': 0,
   'is_installed': 1,
   'is_returning': 1}]]

In [27]:
df = pd.DataFrame([[ls]])

In [28]:
df

Unnamed: 0,0
0,[[{'station_id': '46b4ef45-b06b-40eb-9fdf-9bc8...


In [36]:
import pandas as pd



data = d

# Method 1: Flatten the nested structure first, then create DataFrame
# This assumes each inner list contains dictionaries you want as separate rows
flattened_data = []
for sublist in data:
    flattened_data.extend(sublist)

df1 = pd.DataFrame(flattened_data)
print("Method 1 - Flattened:")
print(df1)
print(f"Shape: {df1.shape}")
print("\n" + "="*50 + "\n")

# Method 2: If you want to preserve the grouping, add a group identifier
flattened_with_groups = []
for group_idx, sublist in enumerate(data):
    for item in sublist:
        item_with_group = item.copy()
        item_with_group['group_id'] = group_idx
        flattened_with_groups.append(item_with_group)

df2 = pd.DataFrame(flattened_with_groups)
print("Method 2 - With group identifier:")
print(df2[['group_id', 'station_id', 'num_bikes_available', 'num_docks_available']])
print(f"Shape: {df2.shape}")
print("\n" + "="*50 + "\n")

# Method 3: Using pd.concat if you want more control
dfs = []
for i, sublist in enumerate(data):
    temp_df = pd.DataFrame(sublist)
    temp_df['source_list'] = i  # Add identifier for which list it came from
    dfs.append(temp_df)

df3 = pd.concat(dfs, ignore_index=True)
print("Method 3 - Using concat:")
print(df3[['source_list', 'station_id', 'num_bikes_available', 'num_docks_available']])
print(f"Shape: {df3.shape}")

# Display full column info
print("\n" + "="*50 + "\n")
print("All columns in the DataFrame:")
print(df1.columns.tolist())
print(f"\nData types:\n{df1.dtypes}")

Method 1 - Flattened:
                               station_id  last_reported  \
0    46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f     1754985922   
1    46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f     1754985922   
2    46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f     1754985922   
3    46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f     1754985922   
4    46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f     1754985922   
..                                    ...            ...   
161  94c877d2-f064-4373-b372-3f8997a07f30     1755168355   
162  7c086aa7-c33f-47e5-ace9-74c40807c409     1755168359   
163  b6d76c7a-2082-430b-8403-da5db00becaf     1755168354   
164  ccf416f8-ccc0-4f82-a659-e5d7851d2bcb     1755168360   
165  6c176d42-fea5-4df1-80eb-66a99cc73a21     1755168365   

     num_ebikes_available legacy_id  num_docks_disabled  \
0                       2       299                   0   
1                       2       299                   0   
2                       2       299                   0   
3                    

In [39]:
df2.shape

(166, 15)

In [40]:
df1

Unnamed: 0,station_id,last_reported,num_ebikes_available,legacy_id,num_docks_disabled,num_scooters_unavailable,eightd_has_available_keys,num_bikes_disabled,is_renting,num_docks_available,num_bikes_available,num_scooters_available,is_installed,is_returning
0,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,1754985922,2,299,0,0,False,0,1,7,8,0,1,1
1,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,1754985922,2,299,0,0,False,0,1,7,8,0,1,1
2,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,1754985922,2,299,0,0,False,0,1,7,8,0,1,1
3,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,1754985922,2,299,0,0,False,0,1,7,8,0,1,1
4,46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f,1754985922,2,299,0,0,False,0,1,7,8,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,94c877d2-f064-4373-b372-3f8997a07f30,1755168355,17,423,0,0,False,0,1,2,29,0,1,1
162,7c086aa7-c33f-47e5-ace9-74c40807c409,1755168359,6,394,0,0,False,1,1,0,22,0,1,1
163,b6d76c7a-2082-430b-8403-da5db00becaf,1755168354,1,470,0,0,False,0,1,4,19,0,1,1
164,ccf416f8-ccc0-4f82-a659-e5d7851d2bcb,1755168360,2,357,0,0,False,0,1,19,4,0,1,1


In [41]:
data = d

# Method 1: Flatten the nested structure first, then create DataFrame
# This assumes each inner list contains dictionaries you want as separate rows
flattened_data = []
for sublist in data:
    flattened_data.extend(sublist)

df1 = pd.DataFrame(flattened_data)
print(flattened_data)
print("Method 1 - Flattened:")
print(df1)
print(f"Shape: {df1.shape}")
print("\n" + "="*50 + "\n")

[{'station_id': '46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f', 'last_reported': 1754985922, 'num_ebikes_available': 2, 'legacy_id': '299', 'num_docks_disabled': 0, 'num_scooters_unavailable': 0, 'eightd_has_available_keys': False, 'num_bikes_disabled': 0, 'is_renting': 1, 'num_docks_available': 7, 'num_bikes_available': 8, 'num_scooters_available': 0, 'is_installed': 1, 'is_returning': 1}, {'station_id': '46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f', 'last_reported': 1754985922, 'num_ebikes_available': 2, 'legacy_id': '299', 'num_docks_disabled': 0, 'num_scooters_unavailable': 0, 'eightd_has_available_keys': False, 'num_bikes_disabled': 0, 'is_renting': 1, 'num_docks_available': 7, 'num_bikes_available': 8, 'num_scooters_available': 0, 'is_installed': 1, 'is_returning': 1}, {'station_id': '46b4ef45-b06b-40eb-9fdf-9bc8ff104a4f', 'last_reported': 1754985922, 'num_ebikes_available': 2, 'legacy_id': '299', 'num_docks_disabled': 0, 'num_scooters_unavailable': 0, 'eightd_has_available_keys': False, 'num_b

In [43]:
len(flattened_data)

166