In [31]:
from wetterdienst.provider.dwd.observation import DwdObservationRequest
from wetterdienst import Settings
from datetime import datetime

# Zeitraum definieren
start_date = datetime(2000, 1, 1)
end_date = datetime(2024, 12, 31)

settings = Settings(  # default
  ts_shape="long",  # tidy data
  ts_humanize=True,  # humanized parameters
  ts_convert_units=True  # convert values to SI units
)

# Anfrage konfigurieren
request = DwdObservationRequest(
    parameters=[("daily", "climate_summary")],
    start_date=datetime(2000, 1, 1),
    end_date=datetime(2024, 12, 31),
    settings=settings
).all()

# Beste verfügbare Station auswählen (z. B. Rang 1)
stations = request.df
stations.head()

# Daten abrufen
values = request.values.all().df
values.head()

# to get a pandas DataFrame and e.g. create some matplotlib plots    
df = values.to_pandas()

# Optional: Daten anzeigen
print(df.head())

  station_id resolution          dataset          parameter  \
0      00003      daily  climate_summary  cloud_cover_total   
1      00003      daily  climate_summary  cloud_cover_total   
2      00003      daily  climate_summary  cloud_cover_total   
3      00003      daily  climate_summary  cloud_cover_total   
4      00003      daily  climate_summary  cloud_cover_total   

                       date   value  quality  
0 2000-01-01 00:00:00+00:00  0.8750     10.0  
1 2000-01-02 00:00:00+00:00  0.9125     10.0  
2 2000-01-03 00:00:00+00:00  0.8375     10.0  
3 2000-01-04 00:00:00+00:00  0.9625     10.0  
4 2000-01-05 00:00:00+00:00  0.4125     10.0  


In [33]:
df_filtered = df.loc[df["parameter"].isin(['cloud_cover_total', 'humidity', 'precipitation_form', 
                                  'precipitation_height', 'snow_depth', 'sunshine_duration', 
                                  'temperature_air_mean_2m','wind_gust_max', 'wind_speed']), :]

In [34]:
df_stations = stations.to_pandas()

In [36]:
df_stations = df_stations.set_index(df_stations["station_id"])

In [37]:
df_filtered["state"] = df_filtered["station_id"].map(df_stations["state"])
df_filtered["name"] = df_filtered["station_id"].map(df_stations["name"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered["state"] = df_filtered["station_id"].map(df_stations["state"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered["name"] = df_filtered["station_id"].map(df_stations["name"])


In [None]:
# Pro Bundesland: Station mit längster Zeitreihe
best_stations = (
    df_filtered.sort_values(by=["state", "date"])
    .drop_duplicates(subset=["state"], keep="first")
    .reset_index(drop=True)
)

# Übersicht anzeigen
print(best_stations[["state", "station_id", "name", "date"]])

                     state station_id                         name  \
0        Baden-Württemberg      00011  Donaueschingen (Landeplatz)   
1                   Bayern      00073        Aldersbach-Kramersepp   
2                   Berlin      00399        Berlin-Alexanderplatz   
3              Brandenburg      00164                   Angermünde   
4                   Bremen      00691                       Bremen   
5                  Hamburg      01975          Hamburg-Fuhlsbüttel   
6                   Hessen      00090                      Alsfeld   
7   Mecklenburg-Vorpommern      00129                 Altentreptow   
8            Niedersachsen      00044                 Großenkneten   
9      Nordrhein-Westfalen      00003                       Aachen   
10         Rheinland-Pfalz      00150                        Alzey   
11                Saarland      00460                        Berus   
12                 Sachsen      00222                          Aue   
13          Sachsen-

In [48]:
df_filtered = df_filtered.loc[df_filtered["station_id"].isin(['00011',
 '00073',
 '00399',
 '00164',
 '00691',
 '01975',
 '00090',
 '00129',
 '00044',
 '00003',
 '00150',
 '00460',
 '00222',
 '00445',
 '00052',
 '00198']), :]

In [49]:
df_filtered.to_csv("Wetterdaten.csv", index=False)