In [1]:
import pandas as pd
import numpy as np
import requests
from io import BytesIO

In [2]:
import pandas as pd
import requests
from io import BytesIO


def build_historical_15min_endverbrauch(start=2009, end=2025):

    all_years = []

    for year in range(start, end + 1):
        print("Year:", year)

        for ext in ["xls", "xlsx"]:
            url = f"https://www.swissgrid.ch/content/dam/dataimport/energy-statistic/EnergieUebersichtCH-{year}.{ext}"

            r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
            if r.status_code != 200:
                continue

            try:
                df = pd.read_excel(
                    BytesIO(r.content),
                    sheet_name="Zeitreihen0h15",
                    header=None
                )

                # Row 0 = long names
                # Row 1 = units
                # Row 2+ = data

                headers = df.iloc[0].astype(str).str.replace("\n", " ").str.strip()
                df = df.iloc[2:].copy()
                df.columns = headers

                # Column 0 = Zeitstempel
                # Column 1 = Summe endverbrauchte Energie

                df = df.iloc[:, [0, 1]]
                df.columns = ["datetime", "energy_kwh"]

                df["datetime"] = pd.to_datetime(df["datetime"], errors="coerce")
                df["energy_kwh"] = pd.to_numeric(df["energy_kwh"], errors="coerce")

                df = df.dropna()

                # Convert 15min kWh → MW
                df["load_mw"] = df["energy_kwh"] / 0.25

                df = df[["datetime", "load_mw"]]

                all_years.append(df)
                break

            except Exception as e:
                print("Read error:", year, ext, e)

    df_full = (
        pd.concat(all_years)
          .drop_duplicates("datetime")
          .sort_values("datetime")
          .set_index("datetime")
          .asfreq("15min")
          .reset_index()
    )

    print("Total rows:", len(df_full))
    print("Missing intervals:", df_full["load_mw"].isna().sum())

    return df_full


In [3]:
build_historical_15min_endverbrauch()

Year: 2009


KeyboardInterrupt: 

In [None]:
import pandas as pd
import requests
from io import BytesIO
from datetime import datetime


def update_current_year(
    master_path="data/processed/swissgrid_load_15min_2009_2025.csv"
):

    current_year = datetime.now().year
    print("Updating year:", current_year)

    master = pd.read_csv(master_path, parse_dates=["datetime"])

    loaded = False

    for ext in ["xlsx", "xls"]:
        url = f"https://www.swissgrid.ch/content/dam/dataimport/energy-statistic/EnergieUebersichtCH-{current_year}.{ext}"

        r = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        if r.status_code != 200:
            continue

        try:
            df = pd.read_excel(
                BytesIO(r.content),
                sheet_name="Zeitreihen0h15",
                header=None
            )

            # Row 0 = long headers
            headers = df.iloc[0].astype(str).str.replace("\n", " ").str.strip()
            df = df.iloc[2:].copy()
            df.columns = headers

            # Column 0 = Zeitstempel
            # Column 1 = Summe endverbrauchte Energie
            df = df.iloc[:, [0, 1]]
            df.columns = ["datetime", "energy_kwh"]

            df["datetime"] = pd.to_datetime(df["datetime"], errors="coerce")
            df["energy_kwh"] = pd.to_numeric(df["energy_kwh"], errors="coerce")

            df = df.dropna()

            # Convert kWh per 15min → MW
            df["load_mw"] = df["energy_kwh"] / 0.25
            df = df[["datetime", "load_mw"]]

            loaded = True
            break

        except Exception as e:
            print("Read error:", ext, e)

    if not loaded:
        print("Could not load current year.")
        return master

    latest_timestamp = master["datetime"].max()
    df_new = df[df["datetime"] > latest_timestamp]

    print("New rows:", len(df_new))

    master_updated = (
        pd.concat([master, df_new])
          .drop_duplicates("datetime")
          .sort_values("datetime")
          .set_index("datetime")
          .asfreq("15min")
          .reset_index()
    )

    master_updated.to_csv(master_path, index=False)
    print("Master file updated.")

    return master_updated


df = update_current_year()


Updating year: 2026
New rows: 0
Master file updated.


In [None]:
import pandas as pd
import requests
from datetime import datetime

# City coordinates
CITIES = {
    "zurich":  (47.3769, 8.5417),
    "geneva":  (46.2044, 6.1432),
    "basel":   (47.5596, 7.5886),
    "bern":    (46.9480, 7.4474),
    "lausanne":(46.5197, 6.6323),
    "lugano":  (46.0037, 8.9511),
}

# Approx population weights
WEIGHTS = {
    "zurich": 0.30,
    "geneva": 0.18,
    "basel": 0.15,
    "bern": 0.15,
    "lausanne": 0.12,
    "lugano": 0.10,
}

NASA_URL = (
    "https://power.larc.nasa.gov/api/temporal/hourly/point"
    "?parameters=T2M"
    "&community=RE"
    "&longitude={lon}"
    "&latitude={lat}"
    "&start={start}"
    "&end={end}"
    "&format=JSON"
)


import pandas as pd
import requests
from datetime import datetime, timedelta


def fetch_weather(start_year=2009):

    end_year = datetime.now().year

    all_cities_data = []

    for city, (lat, lon) in CITIES.items():
        print("Fetching:", city)

        city_frames = []

        for year in range(start_year, end_year + 1):

            start = f"{year}0101"
            end = f"{year}1231"

            # If current year, stop at yesterday
            if year == end_year:
                end = (datetime.now() - timedelta(days=1)).strftime("%Y%m%d")

            url = NASA_URL.format(
                lon=lon,
                lat=lat,
                start=start,
                end=end
            )

            r = requests.get(url)
            r.raise_for_status()

            json_data = r.json()

            if "properties" not in json_data:
                print("No data for:", year)
                continue

            data = json_data["properties"]["parameter"]["T2M"]

            df_year = pd.DataFrame({
                "datetime": pd.to_datetime(
                    list(data.keys()),
                    format="%Y%m%d%H"
                ),
                city: list(data.values())
            })

            df_year[city] = df_year[city].replace(-999, pd.NA)

            city_frames.append(df_year)

        if not city_frames:
            raise ValueError(f"No data fetched for {city}")

        df_city = pd.concat(city_frames)
        all_cities_data.append(df_city)

    # Merge cities
    df_weather = all_cities_data[0]
    for df in all_cities_data[1:]:
        df_weather = df_weather.merge(df, on="datetime", how="inner")

    # Clean
    for city in CITIES:
        df_weather[city] = pd.to_numeric(df_weather[city], errors="coerce")

    df_weather = df_weather.dropna()

    # Weighted temp
    df_weather["temp_weighted"] = sum(
        df_weather[c] * WEIGHTS[c] for c in CITIES
    )

    df_weather["HDH"] = (18 - df_weather["temp_weighted"]).clip(lower=0)
    df_weather["CDH"] = (df_weather["temp_weighted"] - 22).clip(lower=0)

    df_weather["temp_72h"] = (
        df_weather["temp_weighted"]
        .rolling(72, min_periods=1)
        .mean()
    )

    df_weather["extreme_cold"] = (
        df_weather["temp_weighted"] < -5
    ).astype(int)

    return df_weather.reset_index(drop=True)



In [None]:
def add_calendar_features(df):

    df["hour"] = df["datetime"].dt.hour
    df["day_of_week"] = df["datetime"].dt.dayofweek
    df["month"] = df["datetime"].dt.month
    df["weekend"] = (df["day_of_week"] >= 5).astype(int)

    return df


In [None]:
def merge_load_weather(df_load, df_weather):

    df = df_load.merge(df_weather, on="datetime", how="left")

    return df


In [None]:
df_weather = fetch_weather()
print(df_weather.head())
print(df_weather.shape)


Fetching: zurich
Fetching: geneva
Fetching: basel
Fetching: bern
Fetching: lausanne
Fetching: lugano
             datetime  zurich  geneva  basel  bern  lausanne  lugano  \
0 2009-01-01 00:00:00   -1.60   -1.11  -3.80 -3.13     -3.89   -5.22   
1 2009-01-01 01:00:00   -1.60   -1.45  -4.15 -3.84     -4.68   -4.89   
2 2009-01-01 02:00:00   -1.96   -1.99  -4.52 -4.65     -5.43   -4.58   
3 2009-01-01 03:00:00   -2.43   -2.84  -4.89 -5.34     -6.25   -4.35   
4 2009-01-01 04:00:00   -2.89   -3.63  -5.24 -5.96     -7.23   -4.51   

   temp_weighted      HDH  CDH  temp_72h  extreme_cold  
0        -2.7081  20.7081  0.0 -2.708100             0  
1        -2.9901  20.9901  0.0 -2.849100             0  
2        -3.4313  21.4313  0.0 -3.043167             0  
3        -3.9597  21.9597  0.0 -3.272300             0  
4        -4.5190  22.5190  0.0 -3.521640             0  
(150000, 12)


In [None]:
df_weather.tail(220)

Unnamed: 0,datetime,zurich,geneva,basel,bern,lausanne,lugano,temp_weighted,HDH,CDH,temp_72h,extreme_cold
600065,2026-02-11 16:15:00,6.46,1.79,6.94,3.95,1.56,5.01,4.5819,13.4181,0.0,2.832519,0
600066,2026-02-11 16:30:00,6.46,1.79,6.94,3.95,1.56,5.01,4.5819,13.4181,0.0,2.832519,0
600067,2026-02-11 16:45:00,6.46,1.79,6.94,3.95,1.56,5.01,4.5819,13.4181,0.0,2.832519,0
600068,2026-02-11 17:00:00,5.99,0.95,6.29,2.97,1.35,4.52,3.9710,14.0290,0.0,2.865644,0
600069,2026-02-11 17:15:00,5.99,0.95,6.29,2.97,1.35,4.52,3.9710,14.0290,0.0,2.865644,0
...,...,...,...,...,...,...,...,...,...,...,...,...
600280,2026-02-13 22:00:00,2.90,0.95,3.66,1.95,-0.02,1.74,2.0541,15.9459,0.0,3.250710,0
600281,2026-02-13 22:15:00,2.90,0.95,3.66,1.95,-0.02,1.74,2.0541,15.9459,0.0,3.250710,0
600282,2026-02-13 22:30:00,2.90,0.95,3.66,1.95,-0.02,1.74,2.0541,15.9459,0.0,3.250710,0
600283,2026-02-13 22:45:00,2.90,0.95,3.66,1.95,-0.02,1.74,2.0541,15.9459,0.0,3.250710,0


In [None]:
df_weather["datetime"] = pd.to_datetime(df_weather["datetime"])
df_weather = (
    df_weather
        .set_index("datetime")
        .resample("15min")
        .ffill()
        .reset_index()
)
df = merge_load_weather(df, df_weather)


In [None]:
df

Unnamed: 0.1,datetime,Unnamed: 0,net_outflow_kwh,load_mw,zurich,geneva,basel,bern,lausanne,lugano,...,extreme_cold,hour,day_of_week,month,weekend,lag_1,lag_96,lag_672,rolling_24h_mean,rolling_24h_std


In [None]:
def add_lags(df):

    df = df.sort_values("datetime")

    df["lag_1"] = df["load_mw"].shift(1)
    df["lag_96"] = df["load_mw"].shift(96)
    df["lag_672"] = df["load_mw"].shift(672)

    df["rolling_24h_mean"] = df["load_mw"].rolling(96).mean()
    df["rolling_24h_std"] = df["load_mw"].rolling(96).std()

    return df

import numpy as np

def add_fourier_terms(df, period=96, order=3):

    t = np.arange(len(df))

    for k in range(1, order + 1):
        df[f"sin_{k}"] = np.sin(2 * np.pi * k * t / period)
        df[f"cos_{k}"] = np.cos(2 * np.pi * k * t / period)

    return df


In [None]:
df = add_calendar_features(df)
df = add_lags(df)
df

Unnamed: 0.1,datetime,Unnamed: 0,net_outflow_kwh,load_mw,zurich,geneva,basel,bern,lausanne,lugano,...,extreme_cold,hour,day_of_week,month,weekend,lag_1,lag_96,lag_672,rolling_24h_mean,rolling_24h_std
