In [0]:
%pip install knmi-py

In [0]:
import knmi
from datetime import datetime, timedelta
from pyspark.sql.types import DoubleType

# Calculate date range: from 2 weeks ago until now
end_date = datetime.now()
start_date = end_date - timedelta(weeks=40)

# Format dates for API (YYYYMMDDHH)
start_str = start_date.strftime("%Y%m%d%H")
end_str = end_date.strftime("%Y%m%d%H")

start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)

print(f"Fetching data from {start_date} to {end_date}")

# Fetch data from API
hourly_df = knmi.get_hour_data_dataframe(
    stations=[240],
    start=start_str,
    end=end_str
)

print(f"Fetched {len(hourly_df)} rows from API")

# Reset index to make timestamp a regular column
hourly_df = hourly_df.reset_index()

# Add API query timestamp column
api_query_timestamp = datetime.now()
hourly_df['api_query_timestamp'] = api_query_timestamp

print(f"API query timestamp: {api_query_timestamp}")

# Convert pandas DataFrame to Spark DataFrame
spark_df = spark.createDataFrame(hourly_df)

# Cast columns to match existing table schema (double columns)
spark_df = spark_df.withColumn("T10N", spark_df["T10N"].cast(DoubleType())) \
                   .withColumn("N", spark_df["N"].cast(DoubleType())) \
                   .withColumn("WW", spark_df["WW"].cast(DoubleType())) \
                   .withColumn("Y", spark_df["Y"].cast(DoubleType()))

# Insert data into Delta table
spark_df.write.format("delta").mode("append").saveAsTable("data.bronze.weather_data_bronze")

print(f"âœ… Successfully inserted {len(hourly_df)} rows into data.bronze.weather_data_bronze")