In [0]:
# dbutils.fs.rm("dbfs:/bronze/weather/", recurse=True)

In [0]:
import datetime as dt
import requests
from pyspark.sql import functions as F
from pyspark.sql.types import *

# Dynamic date string for ingestion
INGESTION_DATE = dt.date.today().isoformat()

# New partitioned path (no date in the path)
BRONZE_WEATHER_PATH = "dbfs:/bronze/weather/"


In [0]:
URL = "https://api.weather.gov/gridpoints/SEW/123,67/forecast/hourly"


In [0]:
headers = {
    "User-Agent": "Elham Weather Data Ingest - elham.afruzi@gmail.com"
}


In [0]:
response = requests.get(URL, headers=headers)


In [0]:
print("Status Code:", response.status_code)
print("Response Headers:", response.headers)
print("Raw Response:", response.text[:500])  # Show only first 500 characters


In [0]:
data = response.json()

In [0]:
props = data.get("properties", {})
periods = props.get("periods", [])

print(f"Updated at: {props.get('updated', 'N/A')}")
print(f"Hours returned: {len(periods)}")


In [0]:
df_weather = spark.createDataFrame(periods)

df_weather = (
    df_weather
    .withColumn("forecast_retrieved_at", F.current_timestamp())
    .withColumn("forecast_time", F.to_timestamp("startTime"))
    .withColumn("ingestion_date", F.lit(INGESTION_DATE))
)

df_weather.select("forecast_time", "temperature", "shortForecast", "ingestion_date").show(5, truncate=False)


In [0]:
df_weather.write \
    .format("delta") \
    .mode("append") \
    .partitionBy("ingestion_date") \
    .save(BRONZE_WEATHER_PATH)

print("✓ Hourly forecast saved to Bronze (partitioned)")
