# Notebook 4 – Predictions and visualization

Purpose:
- Load the latest trained model.
- Fetch weather forecast for at least the next day and build future features.
- Generate price predictions for the forecast horizon.
- Optionally write predictions to a `electricity_prices_predictions` feature group.
- Produce plots (hourly price forecast, forecast vs actual) and save them under `plots/` for the dashboard.

Notes:
- Document where plots are stored and naming conventions.
- Mention any steps needed when the model version changes.
- Indicate how often this notebook is expected to run.


In [40]:
from pathlib import Path
import os
import sys
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from xgboost import plot_importance
from sklearn.metrics import mean_squared_error, r2_score
import json
import warnings
import holidays
warnings.filterwarnings("ignore")

from dotenv import load_dotenv
import hopsworks

# 1. Find project root (one level up from notebooks/)
root_dir = Path("..").resolve()

# 2. Add project root to PYTHONPATH so we can import the src package
if str(root_dir) not in sys.path:
    sys.path.append(str(root_dir))

# 3. Load .env from project root
env_path = root_dir / ".env"
load_dotenv(env_path)

# 4. Load settings and utility functions (after adjusting PYTHONPATH)
from src.config import ElectricitySettings
from src import util

settings = ElectricitySettings()

# 5. Log in to Hopsworks and get feature store
project = hopsworks.login(engine="python")
fs = project.get_feature_store()


print("Successfully logged in to Hopsworks project:", settings.HOPSWORKS_PROJECT)


ElectricitySettings initialized
2025-12-22 21:00:38,474 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-12-22 21:00:38,475 INFO: Initializing external client
2025-12-22 21:00:38,476 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443






2025-12-22 21:00:39,318 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/127
Successfully logged in to Hopsworks project: ScalableProject


In [41]:
today = datetime.datetime.now() - datetime.timedelta(0)
yesterday = today - datetime.timedelta(days = 1)


In [42]:
secrets = hopsworks.get_secrets_api()
area = secrets.get_secret("ELECTRICITY_LOCATION_JSON").value
area = json.loads(area)
PRICE_AREA = area['price_area']
CITY = area['city']
LATITUDE = area['latitude']
LONGITUDE = area['longitude']

In [43]:
mr = project.get_model_registry()

# Load exact per-sensor model for this slug (no fallback)
model_name = f"electricity_prices_xgboost_model_lags_{PRICE_AREA.lower()}"
retrieved_model = mr.get_model(name=model_name)
if retrieved_model is None:
    raise RuntimeError(f"Model '{model_name}' not found in registry")

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

Downloading: 0.000%|          | 0/5335463 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 1 files)... 

Downloading: 0.000%|          | 0/115919 elapsed<00:00 remaining<?

Downloading model artifact (0 dirs, 2 files)... 

Downloading: 0.000%|          | 0/115919 elapsed<00:00 remaining<?

Downloading model artifact (1 dirs, 3 files)... DONE

In [44]:
# Loading the XGBoost regressor model and label encoder from the saved model directory
# retrieved_xgboost_model = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
retrieved_xgboost_model = XGBRegressor()

retrieved_xgboost_model.load_model(saved_model_dir + "/model.json")

# Displaying the retrieved XGBoost regressor model
retrieved_xgboost_model

In [45]:
# --- Fetch historical prices (last 4 days) to build lag features ---
electricity_prices_fg = fs.get_feature_group('electricity_prices', version=1)
lookback_start = (pd.Timestamp.utcnow() - pd.Timedelta(days=4)).normalize()

hist_prices = electricity_prices_fg.filter(
    (electricity_prices_fg.price_area == PRICE_AREA.lower()) &
    (electricity_prices_fg.date >= lookback_start)
).read()

hist_prices['date'] = pd.to_datetime(hist_prices['date'], utc=True)
hist_prices = hist_prices.sort_values('unix_time')[['price_area','date','hour','unix_time','price_sek']]

# --- Fetch weather forecast for kommande dagar ---
forecast_days = 2  
forecast_df = util.get_hourly_weather_forecast(
    latitude=LATITUDE,
    longitude=LONGITUDE,
    city=PRICE_AREA.lower(),
    forecast_days=forecast_days,
)
forecast_df['date'] = pd.to_datetime(forecast_df['timestamp'], utc=True)
forecast_df['unix_time'] = forecast_df['date'].astype('int64') // 10**6
forecast_df['price_area'] = PRICE_AREA.lower()
forecast_df['price_area'] = forecast_df['price_area'].astype('string')
if 'city' in forecast_df.columns:
    forecast_df = forecast_df.drop(columns=['city'])
forecast_df = forecast_df.drop(columns=['timestamp'])

# Behåll bara imorgon
forecast_day = (pd.Timestamp.utcnow().normalize() + pd.Timedelta(days=1)).date()
forecast_df = forecast_df[forecast_df['date'].dt.date == forecast_day].copy()

# Kalender/helg/season/holiday
forecast_df['weekday'] = forecast_df['date'].dt.weekday.astype('int8')
forecast_df['is_weekend'] = forecast_df['weekday'].isin([5, 6]).astype('int8')
forecast_df['month'] = forecast_df['date'].dt.month.astype('int8')
season_map = {12: 0, 1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 2, 7: 2, 8: 2, 9: 3, 10: 3, 11: 3}
forecast_df['season'] = forecast_df['month'].map(season_map).astype('int8')
try:
    years = range(forecast_df['date'].dt.year.min(), forecast_df['date'].dt.year.max() + 1)
    se_holidays = holidays.Sweden(years=years)
    forecast_df['is_holiday'] = forecast_df['date'].dt.date.isin(se_holidays).astype('int8')
except Exception:
    forecast_df['is_holiday'] = 0

# --- Bygg lag features genom att kombinera historik + forecast-platshållare ---
forecast_prices = forecast_df[['price_area','date','hour','unix_time']].copy()
forecast_prices['price_sek'] = np.nan

lag_base = pd.concat([
    hist_prices[['price_area','date','hour','unix_time','price_sek']],
    forecast_prices
], ignore_index=True).sort_values('unix_time')

for lag in [24, 48, 72]:
    lag_base[f'price_lag_{lag}'] = lag_base.groupby('price_area')['price_sek'].shift(lag).astype('float32')

lag_base['price_roll3d'] = (
    lag_base.groupby('price_area')['price_sek']
            .rolling(72, min_periods=1)
            .mean()
            .reset_index(level=0, drop=True)
            .astype('float32')
)

# Plocka ut lag-features för forecast-rader
lags_forecast = lag_base[lag_base['price_sek'].isna()][['unix_time','price_lag_24','price_lag_48','price_lag_72','price_roll3d']]
forecast_df = forecast_df.merge(lags_forecast, on='unix_time', how='left')

# --- Förbered feature-matris för inferens ---
# Matcha träningsnamn: price-features är prefixade av Hopsworks (electricity_prices_*)
forecast_df = forecast_df.copy()
forecast_df['electricity_prices_unix_time'] = forecast_df['unix_time']
forecast_df['electricity_prices_weekday'] = forecast_df['weekday']
forecast_df['electricity_prices_is_weekend'] = forecast_df['is_weekend']
forecast_df['electricity_prices_month'] = forecast_df['month']
forecast_df['electricity_prices_season'] = forecast_df['season']
forecast_df['electricity_prices_is_holiday'] = forecast_df['is_holiday']
forecast_df['electricity_prices_price_lag_24'] = forecast_df['price_lag_24']
forecast_df['electricity_prices_price_lag_48'] = forecast_df['price_lag_48']
forecast_df['electricity_prices_price_lag_72'] = forecast_df['price_lag_72']
forecast_df['electricity_prices_price_roll3d'] = forecast_df['price_roll3d']

feature_cols = [
    "price_area",
    "unix_time",
    "date",
    "hour",
    "temperature_2m", "apparent_temperature",
    "precipitation", "rain", "snowfall",
    "cloud_cover",
    "wind_speed_10m", "wind_speed_100m",
    "wind_direction_10m", "wind_direction_100m",
    "wind_gusts_10m",
    "surface_pressure",
    # prefixed price cols expected by model
    "electricity_prices_unix_time",
    "electricity_prices_weekday",
    "electricity_prices_is_weekend",
    "electricity_prices_month",
    "electricity_prices_season",
    "electricity_prices_is_holiday",
    "electricity_prices_price_lag_24",
    "electricity_prices_price_lag_48",
    "electricity_prices_price_lag_72",
    "electricity_prices_price_roll3d",
]
forecast_df = forecast_df[feature_cols]

cat_cols = [c for c in forecast_df.columns if 'price_area' in c]
X_pred = forecast_df.drop(columns=['date'] + cat_cols)

# --- Prediktera ---
predictions = retrieved_xgboost_model.predict(X_pred)
forecast_df['predicted_price_sek'] = predictions.astype('float32')

# Visa resultat för imorgon
print(forecast_df[['date','hour','predicted_price_sek']].sort_values(['date','hour']).head(24))


ValueError: Reading data with Hive is not supported when using hopsworks client version >= 4.0

In [None]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

# --- INSTÄLLNINGAR ---
sns.set_style("whitegrid")
plt.rcParams.update({'font.size': 12})

# Sökvägar
current_folder = os.getcwd()
project_root = os.path.dirname(current_folder)
if os.path.basename(current_folder) != "NotebooksElectricity":
    project_root = current_folder
img_path = os.path.join(project_root, "docs/PricesDashboard/assets/img")
os.makedirs(img_path, exist_ok=True)

# Hitta kolumnnamn för prediction i forecast_df
price_col = 'predicted_price_sek'
if 'prediction' in forecast_df.columns: price_col = 'prediction'

print(f"Genererar grafer till: {img_path}")


In [None]:
# ==========================================
# GRAF 1: LADDA-GUIDEN
# ==========================================
mean_price = forecast_df[price_col].mean()
forecast_df['color'] = forecast_df[price_col].apply(lambda x: '#22c55e' if x < mean_price else '#ef4444')

plt.figure(figsize=(14, 7))
plt.bar(forecast_df['date'], forecast_df[price_col], color=forecast_df['color'], alpha=0.9, width=0.04)
plt.axhline(y=mean_price, color='gray', linestyle='--', alpha=0.5, label=f'Snittpris ({mean_price:.2f} kr)')

plt.title('Ladda Smart Imorgon: Grönt = Billigt', fontsize=18, pad=20)
plt.xlabel('Klockslag', fontsize=14)
plt.ylabel('Pris (SEK/kWh)', fontsize=14)
plt.legend(loc='upper right', fontsize=12)
plt.xticks(rotation=45)
sns.despine()
plt.tight_layout()
plt.savefig(os.path.join(img_path, "electricity_price_signal.png"), dpi=150)
plt.close()


In [None]:
# ==========================================
# GRAF 2: FEATURE IMPORTANCE
# ==========================================
try:
    bst = retrieved_xgboost_model.get_booster()
    importance = bst.get_score(importance_type='weight')

    if not importance:
         importance = dict(zip(X_pred.columns, retrieved_xgboost_model.feature_importances_))

    imp_df = pd.DataFrame(list(importance.items()), columns=['Feature', 'Score'])
    imp_df['Feature'] = imp_df['Feature'].str.replace('electricity_prices_', '').str.replace('weather_', '')
    imp_df = imp_df.sort_values(by='Score', ascending=False).head(12)

    plt.figure(figsize=(14, 10))
    sns.barplot(x='Score', y='Feature', data=imp_df, palette='viridis')
    plt.title('Vad styr elpriset just nu?', fontsize=18, pad=20)
    plt.xlabel('Påverkan (Vikt)', fontsize=14)
    plt.ylabel('', fontsize=14)
    plt.tight_layout()
    plt.savefig(os.path.join(img_path, "feature_importance.png"), dpi=150)
    plt.close()
except Exception as e:
    print(f"⚠️ Feature importance error: {e}")


In [None]:
# ==========================================
# GRAF 3: TREND (Historisk prediktion + Framtid)
# ==========================================
print("Genererar trend-graf...")

# 1. Vi har redan 'hist_prices' (Dina faktiska priser)
#    Men vi behöver HISTORISKT VÄDER för att modellen ska kunna gissa bakåt i tiden.
weather_fg = fs.get_feature_group('weather_hourly', version=1)

# Hämta väder för samma period som hist_prices
# (Använd online=True för att det ska gå snabbt och funka utan Hive)
hist_start = hist_prices['date'].min()
hist_weather = weather_fg.filter(weather_fg.date >= hist_start).read(online=True)

# Städa hist_weather
hist_weather['date'] = pd.to_datetime(hist_weather['date'], utc=True)
hist_weather['unix_time'] = hist_weather['date'].astype('int64') // 10**6
hist_weather = hist_weather.sort_values('unix_time').drop_duplicates(subset=['unix_time'])

# 2. Koppla ihop historiska priser med historiskt väder
#    Detta skapar 'past_df' som modellen kan räkna på
past_df = hist_prices.merge(hist_weather, on='unix_time', how='inner', suffixes=('', '_y'))
past_df = past_df.loc[:, ~past_df.columns.str.endswith('_y')] # Rensa dubbletter

# Lägg till Feature Engineering på historian (Samma som du gjorde för forecast)
past_df['weekday'] = past_df['date'].dt.weekday.astype('int8')
past_df['is_weekend'] = past_df['weekday'].isin([5, 6]).astype('int8')
past_df['month'] = past_df['date'].dt.month.astype('int8')
past_df['season'] = past_df['month'].map(season_map).astype('int8')
past_df['is_holiday'] = 0 # Förenkling

# Fixa lags för historian (Vi har ju faktiska priser, så vi kan räkna ut lags exakt)
past_df['price_lag_24'] = past_df['price_sek'].shift(24)
past_df['price_lag_48'] = past_df['price_sek'].shift(48)
past_df['price_lag_72'] = past_df['price_sek'].shift(72)
# Rullande medelvärde
past_df['price_roll3d'] = past_df['price_sek'].rolling(72, min_periods=1).mean()

# 3. Slå ihop DÅTID (past_df) och FRAMTID (forecast_df)
#    forecast_df har du redan fixat features för i din kod ovan!
full_df = pd.concat([past_df, forecast_df], ignore_index=True).sort_values('unix_time')

# Fyll i eventuella hål (fill na) som uppstod vid skarven
full_df = full_df.ffill().bfill()

# 4. Mappa om kolumnnamn till det modellen vill ha (prefixet 'electricity_prices_')
full_df['electricity_prices_unix_time'] = full_df['unix_time']
full_df['electricity_prices_weekday'] = full_df['weekday']
full_df['electricity_prices_is_weekend'] = full_df['is_weekend']
full_df['electricity_prices_month'] = full_df['month']
full_df['electricity_prices_season'] = full_df['season']
full_df['electricity_prices_is_holiday'] = full_df['is_holiday']
full_df['electricity_prices_price_lag_24'] = full_df['price_lag_24']
full_df['electricity_prices_price_lag_48'] = full_df['price_lag_48']
full_df['electricity_prices_price_lag_72'] = full_df['price_lag_72']
full_df['electricity_prices_price_roll3d'] = full_df['price_roll3d']

# 5. PREDIKTERA PÅ ALLT (Hela linjen)
# Filtrera ut rätt features
X_all = full_df[feature_cols].copy() # feature_cols definierade du i din kod
# Ta bort datum/strängar
cat_cols = [c for c in X_all.columns if 'price_area' in c]
X_all = X_all.drop(columns=['date', 'hour'] + cat_cols, errors='ignore')

# Kör modellen!
full_df['predicted_price'] = retrieved_xgboost_model.predict(X_all)

# 6. RITA GRAFEN
plt.figure(figsize=(16, 8))

# Fixa datumformat för plotting
full_df['date'] = pd.to_datetime(full_df['date'])
hist_prices['date'] = pd.to_datetime(hist_prices['date'])

# A. Orange linje (Modellens gissning för BÅDE dåtid och framtid)
sns.lineplot(
    x='date',
    y='predicted_price',
    data=full_df,
    label='Modellens Prognos',
    color='#f97316',
    linewidth=2,
    linestyle='--'
)

# B. Svart linje (Faktiskt utfall - Historik)
sns.lineplot(
    x='date',
    y='price_sek',
    data=hist_prices,
    label='Faktiskt Pris',
    color='#1e293b',
    linewidth=3,
    alpha=0.8
)

# Markera "NU"
now_time = pd.Timestamp.utcnow()
plt.axvline(x=now_time, color='gray', linestyle=':', alpha=0.8)
plt.text(now_time, full_df['predicted_price'].max(), ' NU ', color='gray', ha='center', backgroundcolor='white')

plt.title('Pristrend: Modell vs Verklighet', fontsize=18)
plt.xlabel('Datum')
plt.ylabel('Pris (SEK)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()

# Spara
save_path = os.path.join(img_path, "price_trend.png")
plt.savefig(save_path, dpi=150)
print(f"✅ Graf sparad: {save_path}")
plt.show()