In [2]:
import pandas as pd
import numpy as np
import joblib
from datetime import date, timedelta
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, r2_score

# 1. Last inn data
df = pd.read_csv("../data/processed/historisk_vaer_alle_kommuner_geo.csv")
df["time"] = pd.to_datetime(df["time"])
df["kommune"] = df["kommune"].str.lower()
df = df.sort_values(by=["kommune", "time"])

# 2. Fyll inn manglende verdier
for col in ["mean(air_temperature P1D)", "sum(precipitation_amount P1D)", "mean(wind_speed P1D)"]:
    df[col] = df.groupby("kommune")[col].transform(lambda x: x.interpolate(limit_direction="both"))
df.fillna(method="ffill", inplace=True)
df.fillna(method="bfill", inplace=True)

# 3. Lag features
def create_lag_features(df, n_lags=7):
    for lag in range(1, n_lags + 1):
        df[f"temp_lag_{lag}"] = df["mean(air_temperature P1D)"].shift(lag)
        df[f"nedbor_lag_{lag}"] = df["sum(precipitation_amount P1D)"].shift(lag)
        df[f"vind_lag_{lag}"] = df["mean(wind_speed P1D)"].shift(lag)
    return df

# 4. Bygg treningssett
dfs = []
for kommune, group in df.groupby("kommune"):
    if len(group) >= 8:
        group = group.copy()
        group = create_lag_features(group)
        group["month"] = group["time"].dt.month
        group["dayofyear"] = group["time"].dt.dayofyear
        group["target_temp_t+1"] = group["mean(air_temperature P1D)"].shift(-1)
        group = group.dropna()
        dfs.append(group)

df_all_features = pd.concat(dfs)

# 5. Encode kommune
le = LabelEncoder()
df_all_features["kommune_encoded"] = le.fit_transform(df_all_features["kommune"])

# 6. Tren modellen
feature_cols = [col for col in df_all_features.columns if "lag" in col] + \
               ["month", "dayofyear", "latitude", "longitude", "kommune_encoded"]
X = df_all_features[feature_cols]
y = df_all_features["target_temp_t+1"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

model = RandomForestRegressor(n_estimators=300, max_depth=15, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)
joblib.dump(X_train.columns.tolist(), "model_features_order.pkl")
# 7. Lagre modell og encoder
joblib.dump(model, "random_forest_weather_model.pkl")
joblib.dump(le, "label_encoder.pkl")

# 8. Testprediksjon for OSLO
kommune_navn = "oslo"
df_oslo = df[df["kommune"] == kommune_navn].sort_values("time").tail(7)

input_row = {}
for i in range(7):
    input_row[f"temp_lag_{i+1}"] = df_oslo["mean(air_temperature P1D)"].values[-(i+1)]
    input_row[f"nedbor_lag_{i+1}"] = df_oslo["sum(precipitation_amount P1D)"].values[-(i+1)]
    input_row[f"vind_lag_{i+1}"] = df_oslo["mean(wind_speed P1D)"].values[-(i+1)]

last = df_oslo.iloc[-1]
input_row["month"] = last["time"].month
input_row["dayofyear"] = last["time"].dayofyear
input_row["latitude"] = last["latitude"]
input_row["longitude"] = last["longitude"]
input_row["kommune_encoded"] = le.transform([kommune_navn])[0]

input_df = pd.DataFrame([input_row])
predictions = []

for _ in range(7):
    pred = model.predict(input_df[feature_cols])[0]
    predictions.append(pred)
    for j in reversed(range(1, 7)):
        input_df[f"temp_lag_{j+1}"] = input_df[f"temp_lag_{j}"]
        input_df[f"nedbor_lag_{j+1}"] = input_df[f"nedbor_lag_{j}"]
        input_df[f"vind_lag_{j+1}"] = input_df[f"vind_lag_{j}"]
    input_df["temp_lag_1"] = pred

# 9. Skriv resultat
today = date.today()
print(f"📍 7-dagers temperatur for {kommune_navn.upper()}:")
for i, temp in enumerate(predictions, 1):
    dato = today + timedelta(days=i)
    print(f"  {dato.strftime('%A %d. %b')}: {round(temp, 2)} °C")


  df.fillna(method="ffill", inplace=True)
  df.fillna(method="bfill", inplace=True)


📍 7-dagers temperatur for OSLO:
  Sunday 08. Jun: 1.25 °C
  Monday 09. Jun: 1.82 °C
  Tuesday 10. Jun: 2.44 °C
  Wednesday 11. Jun: 2.6 °C
  Thursday 12. Jun: 3.0 °C
  Friday 13. Jun: 3.03 °C
  Saturday 14. Jun: 3.0 °C
