In [0]:
from pyspark.sql.functions import col, unix_timestamp
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.regression import LinearRegression

# 1. Load the historical data from Silver
df_silver = spark.read.table("weather_silver_cleaned")

# 2. Convert timestamp to a number (Unix seconds)
df_ml = df_silver.withColumn("timestamp_num", unix_timestamp(col("event_hub_time")))

# 3. Create a 'Feature' vector (Spark ML requires this)
assembler = VectorAssembler(inputCols=["timestamp_num"], outputCol="features")
df_final = assembler.transform(df_ml).select("features", col("temp_celsius").alias("label"))

In [0]:
# Create the Linear Regression object
lr = LinearRegression(featuresCol="features", labelCol="label")

# Fit the model (This is the 'training' phase)
model = lr.fit(df_final)

print(f"Model Trained. Intercept: {model.intercept}, Coefficient: {model.coefficients[0]}")

In [0]:
import datetime

# 1. Get the timestamp for exactly 24 hours from now
tomorrow_date = datetime.datetime.now() + datetime.timedelta(days=1)
tomorrow_unix = tomorrow_date.timestamp()

# 2. Create a small DataFrame for this future point
future_df = spark.createDataFrame([(tomorrow_unix,)], ["timestamp_num"])
future_vec = assembler.transform(future_df)

# 3. Predict!
prediction = model.transform(future_vec)
predicted_temp = prediction.select("prediction").collect()[0][0]

print(f"The predicted temperature for London tomorrow ({tomorrow_date.strftime('%Y-%m-%d')}) is: {predicted_temp:.2f}Â°C")