# THis 

In [None]:
from pathlib import Path
import sys
import datetime
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import holidays

from dotenv import load_dotenv
import hopsworks

# 1. Find project root (one level up from notebooks/)
root_dir = Path("..").resolve()

# 2. Add project root to PYTHONPATH so we can import the src package
if str(root_dir) not in sys.path:
    sys.path.append(str(root_dir))

# 3. Load .env from project root
env_path = root_dir / ".env"
load_dotenv(env_path)

# 4. Load settings and utility functions (after adjusting PYTHONPATH)
from src.config import ElectricitySettings
from src import util

settings = ElectricitySettings()

# 5. Log in to Hopsworks and get feature store
project = hopsworks.login(engine="python")
fs = project.get_feature_store()


print("Successfully logged in to Hopsworks project:", settings.HOPSWORKS_PROJECT)


In [None]:
# Get the feature groups (new schema with engineered features)
electricity_prices_fg = fs.get_feature_group('electricity_prices', version=1)
weather_hourly_fg = fs.get_feature_group('weather_hourly', version=1)


In [None]:
# Configuration
PRICE_AREA = "SE3"  # Stockholm / South-Central Sweden
CITY = "Stockholm"
LATITUDE = 59.3251   # Stockholm coordinates
LONGITUDE = 18.0711

#LATITUDE, LONGITUDE = util.get_city_coordinates(CITY)

today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)

## Step 1 — Fetch and upsert yesterday's electricity prices
Pull yesterday's hourly prices, align them to the backfill schema (including `unix_time` as PK) and write to the `electricity_prices` feature group.

In [None]:
# Fetch a 4-day window to compute lags (yesterday + previous 3 days)
window_start = yesterday - datetime.timedelta(days=3)
df_prices = util.fetch_electricity_prices(
    start_date=window_start,
    end_date=yesterday,
    price_area=PRICE_AREA,
    show_progress=False,
    request_pause=0,
)
df_prices = util.align_electricity_price_schema(df_prices)

# Align to backfill schema: UTC date, unix_time ms, price_area lowercase string
df_prices["date"] = pd.to_datetime(df_prices["timestamp"], utc=True)
df_prices["unix_time"] = df_prices["date"].astype("int64") // 10**6
df_prices = df_prices.drop(columns=["timestamp"])
# Remove unused currency columns if present
df_prices = df_prices.drop(columns=["price_eur", "exchange_rate"], errors="ignore")
df_prices["price_area"] = PRICE_AREA.lower()
df_prices["price_area"] = df_prices["price_area"].astype("string")

# Sort for lag/rolling calculations
df_prices = df_prices.sort_values(["price_area", "unix_time"])

# Calendar features
df_prices["weekday"] = df_prices["date"].dt.weekday.astype("int8")
df_prices["is_weekend"] = df_prices["weekday"].isin([5, 6]).astype("int8")
df_prices["month"] = df_prices["date"].dt.month.astype("int8")
season_map = {12: 0, 1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 2, 7: 2, 8: 2, 9: 3, 10: 3, 11: 3}
df_prices["season"] = df_prices["month"].map(season_map).astype("int8")
try:
    import holidays
    years = range(df_prices["date"].dt.year.min(), df_prices["date"].dt.year.max() + 1)
    se_holidays = holidays.Sweden(years=years)
    df_prices["is_holiday"] = df_prices["date"].dt.date.isin(se_holidays).astype("int8")
except Exception:
    df_prices["is_holiday"] = 0

# Lagged prices and 72h rolling mean
for lag in [24, 48, 72]:
    df_prices[f"price_lag_{lag}"] = (
        df_prices.groupby("price_area")["price_sek"].shift(lag).astype("float32")
    )

df_prices["price_roll3d"] = (
    df_prices.groupby("price_area")["price_sek"]
             .rolling(72, min_periods=1)
             .mean()
             .reset_index(level=0, drop=True)
             .astype("float32")
)

# Keep only yesterday's rows after lag computation
df_prices = df_prices[df_prices["date"].dt.date == yesterday].copy()
df_prices = df_prices.dropna().reset_index(drop=True)

price_columns = [
    "unix_time",
    "date",
    "hour",
    "price_area",
    "price_sek",
    "weekday",
    "is_weekend",
    "month",
    "season",
    "is_holiday",
    "price_lag_24",
    "price_lag_48",
    "price_lag_72",
    "price_roll3d",
]
df_prices = df_prices[price_columns]
print(f"Fetched {len(df_prices)} rows for {yesterday}")

# Insert new data
electricity_prices_fg.insert(df_prices, wait=True)

## Step 2 — Fetch and upsert yesterday's weather
Fetch yesterday's hourly actual weather, align to the backfill schema, and write to the `weather_hourly` feature group.

In [None]:
# Fetch and upload yesterday's actual hourly weather
actual_weather_yesterday = util.get_yesterday_hourly_weather(
    latitude=LATITUDE,
    longitude=LONGITUDE,
    city=CITY,
)
actual_weather_yesterday['date'] = pd.to_datetime(actual_weather_yesterday['timestamp'], utc=True)
actual_weather_yesterday['unix_time'] = actual_weather_yesterday['date'].astype('int64') // 10**6

# Align schema to backfill FG: drop timestamp, use price_area label
actual_weather_yesterday['price_area'] = PRICE_AREA.lower()
actual_weather_yesterday['price_area'] = actual_weather_yesterday['price_area'].astype('string')
if 'city' in actual_weather_yesterday.columns:
    actual_weather_yesterday = actual_weather_yesterday.drop(columns=['city'])
actual_weather_yesterday = actual_weather_yesterday.drop(columns=['timestamp'])

# Calendar features
actual_weather_yesterday['weekday'] = actual_weather_yesterday['date'].dt.weekday.astype('int8')
actual_weather_yesterday['is_weekend'] = actual_weather_yesterday['weekday'].isin([5, 6]).astype('int8')
actual_weather_yesterday['month'] = actual_weather_yesterday['date'].dt.month.astype('int8')
season_map = {12: 0, 1: 0, 2: 0, 3: 1, 4: 1, 5: 1, 6: 2, 7: 2, 8: 2, 9: 3, 10: 3, 11: 3}
actual_weather_yesterday['season'] = actual_weather_yesterday['month'].map(season_map).astype('int8')
try:
    import holidays
    years = range(actual_weather_yesterday['date'].dt.year.min(), actual_weather_yesterday['date'].dt.year.max() + 1)
    se_holidays = holidays.Sweden(years=years)
    actual_weather_yesterday['is_holiday'] = actual_weather_yesterday['date'].dt.date.isin(se_holidays).astype('int8')
except Exception:
    actual_weather_yesterday['is_holiday'] = 0

weather_cols = [
    "unix_time",
    "date",
    "hour",
    "price_area",
    "temperature_2m",
    "apparent_temperature",
    "precipitation",
    "rain",
    "snowfall",
    "cloud_cover",
    "wind_speed_10m",
    "wind_speed_100m",
    "wind_direction_10m",
    "wind_direction_100m",
    "wind_gusts_10m",
    "surface_pressure",
    "weekday",
    "is_weekend",
    "month",
    "season",
    "is_holiday",
]
actual_weather_yesterday = actual_weather_yesterday[weather_cols]

if len(actual_weather_yesterday):
    weather_hourly_fg.insert(actual_weather_yesterday, storage="online", wait=True)
    print(f"Inserted actual weather for yesterday: {len(actual_weather_yesterday)} rows for {yesterday}")
else:
    print("No actual weather rows for yesterday.")
