**_Purpose: Extract from API → add timestamp → store raw JSON/hourly data as Delta._**

In [0]:
import requests
from datetime import datetime

url = "https://api.open-meteo.com/v1/forecast?latitude=38.8951&longitude=-77.0364&hourly=temperature_2m,relative_humidity_2m,precipitation&timezone=America/New_York"

response = requests.get(url)
data = response.json()

# Add extraction timestamp
data['extracted_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

print(data.keys())  # Should show 'latitude', 'longitude', 'hourly', etc.


dict_keys(['latitude', 'longitude', 'generationtime_ms', 'utc_offset_seconds', 'timezone', 'timezone_abbreviation', 'elevation', 'hourly_units', 'hourly', 'extracted_at'])


In [0]:
import pandas as pd
hourly_data = data['hourly']
df = pd.DataFrame(hourly_data)
df['extracted_at'] = data['extracted_at']
df.head()

Unnamed: 0,time,temperature_2m,relative_humidity_2m,precipitation,extracted_at
0,2025-09-22T00:00,19.5,73,0.0,2025-09-22 19:55:31
1,2025-09-22T01:00,19.2,76,0.0,2025-09-22 19:55:31
2,2025-09-22T02:00,18.5,80,0.0,2025-09-22 19:55:31
3,2025-09-22T03:00,18.3,78,0.0,2025-09-22 19:55:31
4,2025-09-22T04:00,16.8,84,0.0,2025-09-22 19:55:31


**Step 3: Convert Pandas DataFrame to Spark DataFrame**

In [0]:
# Convert pandas DF to Spark DF
bronze_spark_df = spark.createDataFrame(df)

In [0]:
# Add a unique ID column to prevent duplicates
from pyspark.sql.functions import monotonically_increasing_id
bronze_spark_df = bronze_spark_df.withColumn("id", monotonically_increasing_id())


In [0]:
# Save Spark DataFrame as Delta table

# Save Spark DataFrame as Delta table with schema merge
bronze_spark_df.write.format("delta") \
    .mode("append") \
    .option("mergeSchema", "true") \
    .saveAsTable("weather_bronze")

# Quick check
spark.sql("SELECT * FROM weather_bronze LIMIT 5").show()


+----------------+--------------+--------------------+-------------+-------------------+----+
|            time|temperature_2m|relative_humidity_2m|precipitation|       extracted_at|  id|
+----------------+--------------+--------------------+-------------+-------------------+----+
|2025-09-22T00:00|          19.5|                  73|          0.0|2025-09-22 19:39:01|NULL|
|2025-09-22T01:00|          19.2|                  76|          0.0|2025-09-22 19:39:01|NULL|
|2025-09-22T02:00|          18.5|                  80|          0.0|2025-09-22 19:39:01|NULL|
|2025-09-22T03:00|          18.3|                  78|          0.0|2025-09-22 19:39:01|NULL|
|2025-09-22T04:00|          16.8|                  84|          0.0|2025-09-22 19:39:01|NULL|
+----------------+--------------+--------------------+-------------+-------------------+----+

