In [42]:
import pandas as pd
import numpy as np
import requests
from io import BytesIO

In [43]:
import pandas as pd
from pathlib import Path


def read_correct_sheet(file):
    xls = pd.ExcelFile(file)

    # pick Zeitreihen sheet (Swissgrid time series)
    for name in xls.sheet_names:
        n = name.lower().replace(" ", "")
        if "zeitreihen" in n:
            return pd.read_excel(file, sheet_name=name, header=0)

    # fallback = first sheet
    return pd.read_excel(file, sheet_name=0, header=0)


def merge_swissgrid_excels(folder, output_file):
    folder = Path(folder)

    # read BOTH xls and xlsx
    files = sorted(list(folder.glob("*.xls")) + list(folder.glob("*.xlsx")))
    
    if not files:
        raise ValueError("No Excel files found")

    print("Found", len(files), "files")

    # first file defines column structure
    base = read_correct_sheet(files[0])
    cols = base.columns

    frames = [base]

    for f in files[1:]:
        print("Reading", f.name)
        df = read_correct_sheet(f)
        df = df.reindex(columns=cols)   # align structure
        frames.append(df)

    merged = pd.concat(frames, ignore_index=True)

    Path(output_file).parent.mkdir(parents=True, exist_ok=True)
    merged.to_csv(output_file, index=False)

    print("Saved →", output_file)


In [44]:
merge_swissgrid_excels(
    folder="../data/raw",   # where Excel files are
    output_file="../data/processed/swissgrid_all_years.csv"
)


Found 18 files
Reading EnergieUebersichtCH-2010.xls
Reading EnergieUebersichtCH-2011.xls
Reading EnergieUebersichtCH-2012.xls
Reading EnergieUebersichtCH-2013.xls
Reading EnergieUebersichtCH-2014.xls
Reading EnergieUebersichtCH-2015.xls
Reading EnergieUebersichtCH-2016.xls
Reading EnergieUebersichtCH-2017.xls
Reading EnergieUebersichtCH-2018.xls
Reading EnergieUebersichtCH-2019.xls
Reading EnergieUebersichtCH-2020.xlsx
Reading EnergieUebersichtCH-2021.xlsx
Reading EnergieUebersichtCH-2022.xlsx
Reading EnergieUebersichtCH-2023.xlsx
Reading EnergieUebersichtCH-2024.xlsx
Reading EnergieUebersichtCH-2025.xlsx
Reading EnergieUebersichtCH-2026.xlsx
Saved → ../data/processed/swissgrid_all_years.csv


In [36]:
import pandas as pd
import requests
from datetime import datetime

# City coordinates
CITIES = {
    "zurich":  (47.3769, 8.5417),
    "geneva":  (46.2044, 6.1432),
    "basel":   (47.5596, 7.5886),
    "bern":    (46.9480, 7.4474),
    "lausanne":(46.5197, 6.6323),
    "lugano":  (46.0037, 8.9511),
}

# Approx population weights
WEIGHTS = {
    "zurich": 0.30,
    "geneva": 0.18,
    "basel": 0.15,
    "bern": 0.15,
    "lausanne": 0.12,
    "lugano": 0.10,
}

NASA_URL = (
    "https://power.larc.nasa.gov/api/temporal/hourly/point"
    "?parameters=T2M"
    "&community=RE"
    "&longitude={lon}"
    "&latitude={lat}"
    "&start={start}"
    "&end={end}"
    "&format=JSON"
)


import pandas as pd
import requests
from datetime import datetime, timedelta


def fetch_weather(start_year=2009):

    end_year = datetime.now().year

    all_cities_data = []

    for city, (lat, lon) in CITIES.items():
        print("Fetching:", city)

        city_frames = []

        for year in range(start_year, end_year + 1):

            start = f"{year}0101"
            end = f"{year}1231"

            # If current year, stop at yesterday
            if year == end_year:
                end = (datetime.now() - timedelta(days=1)).strftime("%Y%m%d")

            url = NASA_URL.format(
                lon=lon,
                lat=lat,
                start=start,
                end=end
            )

            r = requests.get(url)
            r.raise_for_status()

            json_data = r.json()

            if "properties" not in json_data:
                print("No data for:", year)
                continue

            data = json_data["properties"]["parameter"]["T2M"]

            df_year = pd.DataFrame({
                "datetime": pd.to_datetime(
                    list(data.keys()),
                    format="%Y%m%d%H"
                ),
                city: list(data.values())
            })

            df_year[city] = df_year[city].replace(-999, pd.NA)

            city_frames.append(df_year)

        if not city_frames:
            raise ValueError(f"No data fetched for {city}")

        df_city = pd.concat(city_frames)
        all_cities_data.append(df_city)

    # Merge cities
    df_weather = all_cities_data[0]
    for df in all_cities_data[1:]:
        df_weather = df_weather.merge(df, on="datetime", how="inner")

    # Clean
    for city in CITIES:
        df_weather[city] = pd.to_numeric(df_weather[city], errors="coerce")

    df_weather = df_weather.dropna()

    # Weighted temp
    df_weather["temp_weighted"] = sum(
        df_weather[c] * WEIGHTS[c] for c in CITIES
    )

    df_weather["HDH"] = (18 - df_weather["temp_weighted"]).clip(lower=0)
    df_weather["CDH"] = (df_weather["temp_weighted"] - 22).clip(lower=0)

    df_weather["temp_72h"] = (
        df_weather["temp_weighted"]
        .rolling(72, min_periods=1)
        .mean()
    )

    df_weather["extreme_cold"] = (
        df_weather["temp_weighted"] < -5
    ).astype(int)

    return df_weather.reset_index(drop=True)



In [101]:
df_weather = fetch_weather()

Fetching: zurich
Fetching: geneva
Fetching: basel
Fetching: bern
Fetching: lausanne
Fetching: lugano


In [None]:
df_weather

In [100]:
df = df.rename({"Unnamed: 0": 'datetime'},axis=1)
df

Unnamed: 0,datetime,Summe endverbrauchte Energie Regelblock Schweiz\nTotal energy consumed by end users in the Swiss controlblock,Summe produzierte Energie Regelblock Schweiz\nTotal energy production Swiss controlblock,Summe verbrauchte Energie Regelblock Schweiz\nTotal energy consumption Swiss controlblock,Netto Ausspeisung aus dem Übertragungsnetz Schweiz\nNet outflow of the Swiss transmission grid,Vertikale Einspeisung ins Übertragungsnetz Schweiz\nGrid feed-in Swiss transmission grid,Positive Sekundär-Regelenergie\nPositive secundary control energy,Negative Sekundär-Regelenergie\nNegative secundary control energy,Positive Tertiär-Regelenergie\nPositive tertiary control energy,Negative Tertiär-Regelenergie\nNegative tertiary control energy,...,geneva,basel,bern,lausanne,lugano,temp_weighted,HDH,CDH,temp_72h,extreme_cold
0,2009-01-01 00:00:00+00:00,1.759240e+06,1.323260e+06,1.916273e+06,532489.976333,1.045734e+06,0.00000,-52666.666667,0.0,0.0,...,,,,,,,,,,
1,2009-01-01 01:00:00+00:00,1.701811e+06,1.316808e+06,1.892073e+06,513379.872250,1.041072e+06,0.00000,-36750.000000,0.0,0.0,...,,,,,,,,,,
2,2009-01-01 02:00:00+00:00,1.646580e+06,1.279014e+06,1.827304e+06,490059.652500,1.005223e+06,0.00000,-30500.000000,0.0,-14250.0,...,,,,,,,,,,
3,2009-01-01 03:00:00+00:00,1.574991e+06,1.272334e+06,1.797303e+06,464318.523250,1.001454e+06,0.00000,-36500.000000,0.0,-28500.0,...,,,,,,,,,,
4,2009-01-01 04:00:00+00:00,1.486570e+06,1.247834e+06,1.714821e+06,410165.941000,9.782375e+05,0.00000,-38250.000000,0.0,-38062.5,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157051,2026-12-01 19:00:00+00:00,2.174850e+06,2.647383e+06,2.321059e+06,0.000000,1.762487e+06,4190.11975,-1626.970000,6250.0,-3812.5,...,,,,,,,,,,
157052,2026-12-01 20:00:00+00:00,2.053916e+06,2.169903e+06,2.196710e+06,5716.092552,1.727589e+06,2575.38175,-3914.273250,61812.5,0.0,...,,,,,,,,,,
157053,2026-12-01 21:00:00+00:00,1.927986e+06,1.874026e+06,2.065670e+06,153374.265722,1.618502e+06,4588.97475,-1129.853750,33000.0,0.0,...,,,,,,,,,,
157054,2026-12-01 22:00:00+00:00,1.858143e+06,1.708134e+06,1.982530e+06,240551.532457,1.528235e+06,1104.57175,-4813.361500,8375.0,-2500.0,...,,,,,,,,,,
