## DWD Wetterstation: Recent Data

Wetterstation Kiel Holtenau: 02564

In [21]:
import requests
import zipfile
import io
import pandas as pd

# Dictionary to store each DataFrame with a unique name based on the file name
dataframes = {}

# URL of the data source
url = "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/subdaily/air_temperature/recent/terminwerte_TU_02564_akt.zip"

# Download and extract the ZIP file
response = requests.get(url)
if response.status_code == 200:
    with zipfile.ZipFile(io.BytesIO(response.content)) as z:
        # Iterate over files in the ZIP archive
        for file_name in z.namelist():
            if file_name.endswith(".txt"):
                with z.open(file_name) as file:
                    # Read the CSV into a DataFrame and store it in the dictionary
                    df = pd.read_csv(file, sep=';', encoding='ISO-8859-1')
                    dataframes[file_name] = df  # Use file name as key
                    print(f"Data from {file_name}:")
                    print(df.head())  # Display first few rows of the DataFrame
else:
    print(f"Fehler beim Herunterladen der Daten: {response.status_code}")


Data from Metadaten_Stationsname_Betreibername_02564.txt:
   Stations_ID   Stationsname  Von_Datum  Bis_Datum
0         2564  Kiel-Holtenau   19270201        NaN
1  Stations_ID  Betreibername  Von_Datum  Bis_Datum
2         2564   Wetterdienst   19270201   19510116
3         2564    GeophysBdBw   19510117   20021231
4         2564     GeoInfoDBw   20030101   20121231
Data from Metadaten_Parameter_tu_termin_02564.txt:
               Stations_ID               Von_Datum   Bis_Datum   Stationsname  \
0                     2564                19860601  20010331.0  Kiel-Holtenau   
1                     2564                20010401  20241027.0  Kiel-Holtenau   
2                     2564                19860601  20010331.0  Kiel-Holtenau   
3                     2564                20010401  20241027.0  Kiel-Holtenau   
4  Legende: FT  = Folgetag   GZ = Gesetzliche Zeit         NaN            NaN   

  Parameter              Parameterbeschreibung Einheit  \
0    RF_TER  Terminwerte der relat

In [22]:
def preprocess_data(df):
    # 1. Format der MESS_DATUM-Spalte anpassen
    df['MESS_DATUM'] = pd.to_datetime(df['MESS_DATUM'], format='%Y%m%d%H')

    # 2. Setze MESS_DATUM als Index, um die Interpolation zu erleichtern
    df.set_index('MESS_DATUM', inplace=True)
    
    # 3. Resample auf stündliche Intervalle und Interpolation
    df = df.resample('H').interpolate(method='linear')

    # 4. Reset Index, um MESS_DATUM wieder als Spalte zu haben
    df.reset_index(inplace=True)

    df.drop(['eor', 'QN_4', 'STATIONS_ID'], inplace=True, axis=1)
    df.rename(columns={'TT_TER':'Temperature',
                       'RF_TER':'Humidity',
                       'MESS_DATUM':'Timestamp'}, inplace=True)
    
    # Get the last row's data
    last_row = df.iloc[-1]

    # Create a list to store the new rows
    new_rows = []

    # Add 6 new rows with incremented timestamps
    for i in range(1, 7):
        new_row = last_row.copy()
        new_row['Timestamp'] = new_row['Timestamp'] + pd.Timedelta(hours=i)
        new_rows.append(new_row)

    # Append new rows to the DataFrame
    df = pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True)


    return df

preprocess_data(df)

  df = df.resample('H').interpolate(method='linear')
  df = df.resample('H').interpolate(method='linear')


Unnamed: 0,Timestamp,Temperature,Humidity
0,2023-04-27 06:00:00,3.2,88.000000
1,2023-04-27 07:00:00,4.2,83.166667
2,2023-04-27 08:00:00,5.2,78.333333
3,2023-04-27 09:00:00,6.2,73.500000
4,2023-04-27 10:00:00,7.2,68.666667
...,...,...,...
13190,2024-10-27 20:00:00,9.4,87.000000
13191,2024-10-27 21:00:00,9.4,87.000000
13192,2024-10-27 22:00:00,9.4,87.000000
13193,2024-10-27 23:00:00,9.4,87.000000
