In [3]:
import json
import pandas as pd
from pathlib import Path
from datetime import datetime

# --------------------------------------
# 1. Paths
# --------------------------------------
bronze_path = Path("../data/bronze")
silver_path = Path("../data/silver")
silver_path.mkdir(parents=True, exist_ok=True)

# --------------------------------------
# 2. Find the newest Bronze JSON file
# --------------------------------------
bronze_files = sorted(bronze_path.glob("raw_*.json"))
if not bronze_files:
    raise FileNotFoundError("No Bronze files found. Run 01_fetch_initial_data.ipynb first.")

latest_file = bronze_files[-1]
print(f"Using Bronze file: {latest_file.name}")

# --------------------------------------
# 3. Load JSON
# --------------------------------------
with open(latest_file, "r") as f:
    raw = json.load(f)

hourly = raw["hourly"]

# Pull metadata
lat = raw.get("latitude")
lon = raw.get("longitude")
tz = raw.get("timezone")

# --------------------------------------
# 4. Build Hourly DataFrame
# --------------------------------------
df = pd.DataFrame(hourly)

# Ensure timestamps are true datetime
df["time"] = pd.to_datetime(df["time"])

# Add metadata fields for lineage
df["ingestion_ts"] = datetime.now()
df["source_file"] = latest_file.name
df["latitude"] = lat
df["longitude"] = lon
df["timezone"] = tz

df.head()

Using Bronze file: raw_20251202_135323.json


Unnamed: 0,time,temperature_2m,relative_humidity_2m,dew_point_2m,precipitation_probability,precipitation,cloud_cover,surface_pressure,wind_speed_10m,wind_gusts_10m,wind_direction_10m,ingestion_ts,source_file,latitude,longitude,timezone
0,2025-12-01 00:00:00,6.5,93,5.4,0,0.0,82,988.1,2.0,3.0,267,2025-12-03 08:34:20.391182,raw_20251202_135323.json,34.174866,-83.56782,America/New_York
1,2025-12-01 01:00:00,5.4,89,3.8,0,0.0,100,987.8,2.4,2.9,272,2025-12-03 08:34:20.391182,raw_20251202_135323.json,34.174866,-83.56782,America/New_York
2,2025-12-01 02:00:00,5.3,93,4.3,0,0.0,100,988.1,1.6,3.4,274,2025-12-03 08:34:20.391182,raw_20251202_135323.json,34.174866,-83.56782,America/New_York
3,2025-12-01 03:00:00,4.5,91,3.2,0,0.0,48,988.5,1.84,3.9,299,2025-12-03 08:34:20.391182,raw_20251202_135323.json,34.174866,-83.56782,America/New_York
4,2025-12-01 04:00:00,3.6,87,1.7,0,0.0,97,988.4,1.25,3.5,299,2025-12-03 08:34:20.391182,raw_20251202_135323.json,34.174866,-83.56782,America/New_York


In [4]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
silver_file = silver_path / f"hourly_{timestamp}.parquet"

df.to_parquet(silver_file, index=False)

silver_file


WindowsPath('../data/silver/hourly_20251203_083755.parquet')