# 01 – Data Preparation  
Denne notebook indlæser og renser elprisdata og henter historisk vejrdata fra Open-Meteo API.


In [None]:
# ## Indlæsning af data
import pandas as pd
import requests

df = pd.read_csv("../data/raw/DayAheadPrices.csv", delimiter=";")
df["HourDK"] = pd.to_datetime(df["HourDK"])
df["SpotPriceDKK"] = df["SpotPriceDKK"].str.replace(",", ".").astype(float)

# Sorter kronologisk
df = df.sort_values('HourDK').reset_index(drop=True)

# Drop ubrugte kolonner
df = df.drop(columns=["HourUTC", "PriceArea", "SpotPriceEUR"])


# Fjern dubletter og ufuldstændige døgn
df[df.duplicated(subset=["HourDK"], keep=False)]

df = df.drop_duplicates(subset=["HourDK"], keep="first")

df[df.duplicated(subset=["HourDK"], keep=False)]

# Find datoer med manglende timer (sommertid)
all_hours = pd.date_range(df["HourDK"].min(), df["HourDK"].max(), freq="h")
missing_hours = set(all_hours) - set(df["HourDK"])
missing_dates = list({h.date() for h in missing_hours})

# fjern datoer fra df

missing_dates = [pd.Timestamp(d) for d in missing_dates]

df = df[~df["HourDK"].dt.normalize().isin(missing_dates)].copy()

# Find datoer med manglende timer (sommertid)
all_hours = pd.date_range(df["HourDK"].min(), df["HourDK"].max(), freq="h")
missing_hours = set(all_hours) - set(df["HourDK"])
missing_dates = list({h.date() for h in missing_hours})


Gemte: ../data/processed/merged.csv


In [None]:


# ## Hent historisk vejrdata fra Open-Meteo


def fetch_weather_data(start_date, end_date, lat=56.2, lon=10.5):
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": lat,
        "longitude": lon,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": ["temperature_2m", "windspeed_10m", "shortwave_radiation"],
        "timezone": "Europe/Copenhagen"
    }
    r = requests.get(url, params=params)
    r.raise_for_status()
    data = r.json()
    return pd.DataFrame({
        "TimeDK": pd.to_datetime(data["hourly"]["time"]),
        "Temperature": data["hourly"]["temperature_2m"],
        "WindSpeed": data["hourly"]["windspeed_10m"],
        "SolarRadiation": data["hourly"]["shortwave_radiation"]
    })



In [None]:


df_weather = fetch_weather_data(df["HourDK"].min().date(), df["HourDK"].max().date())


# ## Merge elpris og vejrdata
df_merged = pd.merge(
    df.rename(columns={"HourDK": "TimeDK"}),
    df_weather,
    on="TimeDK",
    how="inner"
)
df_merged.to_csv("../data/processed/merged.csv", index=False)
print("Gemte:", "../data/processed/merged.csv")

