In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from datetime import datetime
from tqdm import tqdm

In [2]:
def get_energy_data():

    # get all available time stamps
    stampsurl = "https://www.smard.de/app/chart_data/410/DE/index_hour.json"
    response = requests.get(stampsurl)
    #ignore first 6 years (don't need those in the baseline and speeds the code up a bit)
    timestamps = list(response.json()["timestamps"])[1*52:]

 
    col_names = ['date_time','Netzlast_Gesamt']
    energydata = pd.DataFrame(columns=col_names)
    
    # loop over all available timestamps
    for stamp in tqdm(timestamps):

        dataurl = "https://www.smard.de/app/chart_data/410/DE/410_DE_hour_" + str(stamp) + ".json"
        response = requests.get(dataurl)
        rawdata = response.json()["series"]
        for i in range(len(rawdata)):

            rawdata[i][0] = datetime.fromtimestamp(int(str(rawdata[i][0])[:10])).strftime("%Y-%m-%d %H:%M:%S")

        energydata = pd.concat([energydata, pd.DataFrame(rawdata, columns=col_names)])

    energydata = energydata.dropna()
    energydata["date_time"] = pd.to_datetime(energydata.date_time) #+ pd.DateOffset(hours=1) #adjust for correct time 'label'
    #set date_time as index
    energydata.set_index("date_time", inplace=True)

    return energydata

In [3]:
df = get_energy_data()

100%|██████████| 481/481 [01:41<00:00,  4.76it/s]


In [4]:
df.rename(columns={'Netzlast_Gesamt':'target'}, inplace=True)
df.index.name = 'Datetime'
df.index = pd.to_datetime(df.index)
df = df.tz_localize('Europe/Berlin', ambiguous='infer', nonexistent='raise')
# In UTC konvertieren
df1 = df.tz_convert('UTC')

# Wieder zurück nach Europe/Berlin konvertieren
df1 = df.tz_convert('Europe/Berlin')
df.index = pd.to_datetime(df1.index)

df['target']= df['target']/1000

df.to_csv("energy_data.csv", index = True)
df


Unnamed: 0_level_0,target
Datetime,Unnamed: 1_level_1
2015-12-28 00:00:00+01:00,39.89700
2015-12-28 01:00:00+01:00,38.08825
2015-12-28 02:00:00+01:00,37.61350
2015-12-28 03:00:00+01:00,38.02725
2015-12-28 04:00:00+01:00,39.45200
...,...
2025-03-13 15:00:00+01:00,63.67975
2025-03-13 16:00:00+01:00,62.89950
2025-03-13 17:00:00+01:00,64.20900
2025-03-13 18:00:00+01:00,65.77825


In [5]:
# Start- und Enddatum im DataFrame-Index bestimmen
start_date = df.index.min()
end_date = df.index.max()

# Alle Stunden zwischen Start- und Enddatum erzeugen
all_hours = pd.date_range(start=start_date, end=end_date, freq='H')

# Fehlende Stunden finden
missing_hours = all_hours.difference(df.index)

# Anzahl der fehlenden Stunden
missing_count = len(missing_hours)

print(f"Es fehlen {missing_count} Stunden im Index.")
print("Liste der fehlenden Stunden:")
print(missing_hours)

nan_count = df['target'].isna().sum()
print(f"Anzahl der NaN-Werte in der target-Spalte: {nan_count}")

Es fehlen 2 Stunden im Index.
Liste der fehlenden Stunden:
DatetimeIndex(['2025-03-11 04:00:00+01:00', '2025-03-11 05:00:00+01:00'], dtype='datetime64[ns, Europe/Berlin]', freq=None)
Anzahl der NaN-Werte in der target-Spalte: 0
