In [0]:
from pyspark.sql.functions import col, unix_timestamp, hour, minute, current_timestamp
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.regression import LinearRegression
import datetime

# 1. LOAD: Get only the last 1 hour of data from Silver
# We filter based on the 'event_hub_time'
one_hour_ago = datetime.datetime.now() - datetime.timedelta(hours=1)

df_silver = spark.read.table("weather_silver_cleaned") \
    .filter(col("event_hub_time") >= one_hour_ago)

# Check if we have enough data to train (need at least a few rows)
if df_silver.count() < 5:
    dbutils.notebook.exit("Not enough data to train yet. Waiting for more API calls...")

# 2. FEATURES: Extract Hour and Minute
df_ml = df_silver.withColumn("h", hour(col("event_hub_time"))) \
                 .withColumn("m", minute(col("event_hub_time"))) \
                 .withColumn("ts", unix_timestamp(col("event_hub_time")))

assembler = VectorAssembler(inputCols=["ts", "h", "m"], outputCol="features")
df_training = assembler.transform(df_ml).select("features", col("temp_celsius").alias("label"))

# 3. TRAIN: Fit the model on this hour's data
lr = LinearRegression(featuresCol="features", labelCol="label")
model = lr.fit(df_training)

# 4. PREDICT: Calculate the timestamp for exactly 1 hour from now
next_hour_ts = (datetime.datetime.now() + datetime.timedelta(hours=1))
next_hour_unix = next_hour_ts.timestamp()

# Prepare the future feature row
future_data = spark.createDataFrame([(next_hour_unix, next_hour_ts.hour, next_hour_ts.minute)], ["ts", "h", "m"])
future_features = assembler.transform(future_data)

# Run Prediction
prediction_df = model.transform(future_features)
predicted_val = prediction_df.select("prediction").collect()[0][0]

# 5. SAVE: Store the prediction in a table to track accuracy later
final_row = spark.createDataFrame([
    (next_hour_ts, predicted_val, datetime.datetime.now())
], ["predicted_for_time", "predicted_temp", "calculated_at"])

final_row.write.format("delta").mode("append").saveAsTable("weather_predictions_history")

print(f"Prediction for {next_hour_ts} is {predicted_val:.2f}Â°C. Logged to history.")