In [134]:
import pandas as pd

In [137]:
df_power_consumption = pd.read_parquet('data/power_consumption_export_20250502.parquet')

In [136]:
df_power_consumption

Unnamed: 0,timestamp,device_name,power_watts
0,2024-12-15 00:00:31.740807+00:00,bedroom,3
1,2024-12-15 00:01:05.582668+00:00,bedroom,2
2,2024-12-15 00:01:39.548355+00:00,bedroom,2
3,2024-12-15 00:02:11.827688+00:00,bedroom,2
4,2024-12-15 00:02:45.433060+00:00,bedroom,3
...,...,...,...
3216591,2025-05-02 09:37:40.781062+00:00,washing_machine,1
3216592,2025-05-02 09:38:12.778407+00:00,washing_machine,1
3216593,2025-05-02 09:38:44.538445+00:00,washing_machine,1
3216594,2025-05-02 09:39:16.699257+00:00,washing_machine,1


In [139]:
# Convert power consumption timestamp to naive datetime for proper comparison
# Add 1 hour to convert from UTC to local time (UTC+1)
df_power_consumption['timestamp_naive'] = df_power_consumption['timestamp'].dt.tz_localize(None) + pd.Timedelta(hours=1)

# Define a date range for relevant data - using the date range from climate data
# Assuming you want data from 2024-12-15 to 2025-04-27 based on cell 11
start_date = pd.Timestamp('2024-12-15')
end_date = pd.Timestamp('2025-04-27')

# Filter power consumption data to match this date range
df_power_consumption_relevant = df_power_consumption[
    (df_power_consumption['timestamp_naive'] >= start_date) & 
    (df_power_consumption['timestamp_naive'] <= end_date)
]

# Check the number of rows in the filtered dataframe
print(f"Original power consumption data: {len(df_power_consumption)} rows")
print(f"Filtered power consumption data: {len(df_power_consumption_relevant)} rows")


Original power consumption data: 3216596 rows
Filtered power consumption data: 3085205 rows


In [140]:
df_power_consumption_relevant.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3085205 entries, 0 to 3086116
Data columns (total 4 columns):
 #   Column           Dtype              
---  ------           -----              
 0   timestamp        datetime64[ns, UTC]
 1   device_name      object             
 2   power_watts      int64              
 3   timestamp_naive  datetime64[ns]     
dtypes: datetime64[ns, UTC](1), datetime64[ns](1), int64(1), object(1)
memory usage: 117.7+ MB


In [141]:
df_power_consumption_relevant.tail()

Unnamed: 0,timestamp,device_name,power_watts,timestamp_naive
3086112,2025-04-26 22:57:23.349752+00:00,washing_machine,0,2025-04-26 23:57:23.349752
3086113,2025-04-26 22:57:55.354435+00:00,washing_machine,0,2025-04-26 23:57:55.354435
3086114,2025-04-26 22:58:26.880001+00:00,washing_machine,0,2025-04-26 23:58:26.880001
3086115,2025-04-26 22:58:58.500841+00:00,washing_machine,0,2025-04-26 23:58:58.500841
3086116,2025-04-26 22:59:30.197073+00:00,washing_machine,0,2025-04-26 23:59:30.197073


In [142]:
df_power_consumption_relevant[['timestamp_naive', 'device_name', 'power_watts']].set_index('timestamp_naive').groupby('device_name')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x38435fd40>

In [143]:
# resample the power consumption data to hourly data grouped by device_name
# Method 1: Using pd.Grouper (recommended approach)
df_power_consumption_hourly = (df_power_consumption_relevant[['timestamp_naive', 'device_name', 'power_watts']]
               .set_index('timestamp_naive')
               .groupby(['device_name', pd.Grouper(freq='h')])
               .sum()
               .reset_index())

In [144]:
df_power_consumption_hourly

Unnamed: 0,device_name,timestamp_naive,power_watts
0,bedroom,2024-12-15 01:00:00,275
1,bedroom,2024-12-15 02:00:00,444
2,bedroom,2024-12-15 03:00:00,470
3,bedroom,2024-12-15 04:00:00,312
4,bedroom,2024-12-15 05:00:00,268
...,...,...,...
28713,washing_machine,2025-04-26 19:00:00,0
28714,washing_machine,2025-04-26 20:00:00,0
28715,washing_machine,2025-04-26 21:00:00,0
28716,washing_machine,2025-04-26 22:00:00,0


In [145]:
# create a new column for each device_name in df_power_consumption_hourly
df_power_consumption_hourly_pivot = df_power_consumption_hourly.pivot(index='timestamp_naive', columns='device_name', values='power_watts')

In [146]:
df_power_consumption_hourly_pivot

device_name,bedroom,cooler,kitchen,living_room_window,office,solar,television,washing_dryer,washing_machine
timestamp_naive,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-12-15 01:00:00,275.0,4457.0,529.0,1166.0,3312.0,0.0,322.0,0.0,0.0
2024-12-15 02:00:00,444.0,4322.0,540.0,1188.0,3358.0,0.0,328.0,2.0,0.0
2024-12-15 03:00:00,470.0,4036.0,535.0,1177.0,3324.0,0.0,331.0,1.0,0.0
2024-12-15 04:00:00,312.0,4413.0,530.0,1166.0,3292.0,0.0,331.0,0.0,0.0
2024-12-15 05:00:00,268.0,3626.0,535.0,1177.0,3322.0,0.0,327.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
2025-04-26 19:00:00,193.0,4426.0,444.0,1110.0,557.0,271.0,419.0,0.0,0.0
2025-04-26 20:00:00,194.0,4435.0,444.0,1110.0,551.0,0.0,333.0,0.0,0.0
2025-04-26 21:00:00,208.0,4064.0,448.0,1120.0,557.0,0.0,336.0,0.0,0.0
2025-04-26 22:00:00,195.0,4072.0,444.0,1110.0,553.0,0.0,343.0,1.0,0.0


In [147]:
from plotly.subplots import make_subplots

import plotly.express as px
import plotly.graph_objects as go

# Create a line plot for each device showing power consumption over time
fig = px.line(
    df_power_consumption_hourly,
    x="timestamp_naive",
    y="power_watts",
    color="device_name",
    title="Power Consumption by Device Over Time"
)

# Improve layout
fig.update_layout(
    xaxis_title="Date & Time",
    yaxis_title="Power (Watts)",
    height=600,
    width=1000,
    legend_title="Device"
)

# Add a rangeslider for easier navigation through time
fig.update_xaxes(rangeslider_visible=True)

# Show the plot
fig.show()

# Create a summary plot showing total daily consumption by device
daily_consumption = (df_power_consumption_hourly
    .set_index('timestamp_naive')
    .groupby([pd.Grouper(freq='D'), 'device_name'])
    .sum()
    .reset_index())

fig2 = px.bar(
    daily_consumption, 
    x="timestamp_naive", 
    y="power_watts", 
    color="device_name",
    title="Daily Power Consumption by Device",
    barmode="stack"
)

fig2.update_layout(
    xaxis_title="Date",
    yaxis_title="Power (Watt-hours)",
    height=500,
    width=1000
)

fig2.show()

In [None]:
from wetterdienst.provider.dwd.observation import DwdObservationRequest
from wetterdienst import Settings

# if no settings are provided, default settings are used which are
# Settings(ts_shape="long", ts_humanize=True, ts_si_units=True)
settings = Settings(ts_skip_empty=True)
parameters = [
    ("hourly", "air_temperature"),
    ("hourly", "precipitation"),
    ("hourly", "wind"),
    ("hourly", "cloudiness"),
    ("hourly", "solar"),
]
df_list = []
for parameter in parameters:
    request = DwdObservationRequest(
        parameters=parameter,
        start_date="2024-12-15",
        end_date="2025-04-27",
        settings=settings,
    )
    attendorn = (51.1279, 7.9022)
    stations = request.filter_by_rank(latlon=attendorn, rank=5)

    # Query data all together
    df = stations.values.all().df.drop_nulls()
    df_list.append(df)

# df_all = pd.concat(df_list)

In [120]:
import pandas as pd
pd.DataFrame(df_list[4])

Unnamed: 0,0,1,2,3,4,5,6
0,01303,hourly,solar,radiation_global,2024-12-15 00:00:00,0.0,1.0
1,01303,hourly,solar,radiation_global,2024-12-15 01:00:00,0.0,1.0
2,01303,hourly,solar,radiation_global,2024-12-15 02:00:00,0.0,1.0
3,01303,hourly,solar,radiation_global,2024-12-15 03:00:00,0.0,1.0
4,01303,hourly,solar,radiation_global,2024-12-15 04:00:00,0.0,1.0
...,...,...,...,...,...,...,...
14608,01639,hourly,solar,sunshine_duration,2025-03-31 19:00:00,0.0,1.0
14609,01639,hourly,solar,sunshine_duration,2025-03-31 20:00:00,0.0,1.0
14610,01639,hourly,solar,sunshine_duration,2025-03-31 21:00:00,0.0,1.0
14611,01639,hourly,solar,sunshine_duration,2025-03-31 22:00:00,0.0,1.0


In [121]:
df_list_df = []
for i in range(len(df_list)):
    i_df = pd.DataFrame(df_list[i])
    df_list_df.append(i_df)
df_climate = pd.concat(df_list_df)
    

In [122]:
df_climate.columns =stations.values.all().df.drop_nulls().columns

In [123]:
df_climate['parameter'].unique()

array(['humidity', 'temperature_air_mean_2m', 'precipitation_height',
       'precipitation_index', 'wind_direction', 'wind_speed',
       'cloud_cover_total', 'radiation_global', 'radiation_sky_long_wave',
       'radiation_sky_short_wave_diffuse', 'sun_zenith_angle',
       'sunshine_duration'], dtype=object)

In [124]:
df_climate

Unnamed: 0,station_id,resolution,dataset,parameter,date,value,quality
0,02947,hourly,temperature_air,humidity,2024-12-15 00:00:00,0.93,3.0
1,02947,hourly,temperature_air,humidity,2024-12-15 01:00:00,0.93,3.0
2,02947,hourly,temperature_air,humidity,2024-12-15 02:00:00,0.92,3.0
3,02947,hourly,temperature_air,humidity,2024-12-15 03:00:00,0.87,3.0
4,02947,hourly,temperature_air,humidity,2024-12-15 04:00:00,0.86,3.0
...,...,...,...,...,...,...,...
14608,01639,hourly,solar,sunshine_duration,2025-03-31 19:00:00,0.0,1.0
14609,01639,hourly,solar,sunshine_duration,2025-03-31 20:00:00,0.0,1.0
14610,01639,hourly,solar,sunshine_duration,2025-03-31 21:00:00,0.0,1.0
14611,01639,hourly,solar,sunshine_duration,2025-03-31 22:00:00,0.0,1.0


In [125]:
# Extract datetime from the date column (assuming it's in column 4 based on earlier cells)
df_climate_processed = df_climate.copy()

# Create the pivot table
df_climate_pivot = df_climate_processed.pivot_table(
    index='date',
    columns='parameter',
    values='value',
    aggfunc='mean'  # Use mean in case there are multiple values for the same parameter and date
)

# Reset index to make date a column again
df_climate_pivot = df_climate_pivot.reset_index()

df_climate_pivot

parameter,date,cloud_cover_total,humidity,precipitation_height,precipitation_index,radiation_global,radiation_sky_long_wave,radiation_sky_short_wave_diffuse,sun_zenith_angle,sunshine_duration,temperature_air_mean_2m,wind_direction,wind_speed
0,2024-12-15 00:00:00,0.875,0.968,0.24,1.0,0.0,118.5,0.0,151.67,0.0,2.36,256.666667,4.266667
1,2024-12-15 01:00:00,1.0,0.96,0.14,0.8,0.0,113.0,0.0,147.265,0.0,2.8,260.0,4.8
2,2024-12-15 02:00:00,1.0,0.936,0.08,0.6,0.0,110.0,0.0,139.98,0.0,2.94,260.0,5.3
3,2024-12-15 03:00:00,0.875,0.924,0.04,0.2,0.0,115.0,0.0,131.23,0.0,2.68,260.0,5.6
4,2024-12-15 04:00:00,0.875,0.924,0.0,0.2,0.0,115.0,0.0,121.9,0.0,2.52,260.0,5.633333
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3188,2025-04-26 20:00:00,0.0,0.616,0.0,0.0,,,,,,11.1,83.333333,3.466667
3189,2025-04-26 21:00:00,0.0,0.66,0.0,0.0,,,,,,9.88,90.0,3.5
3190,2025-04-26 22:00:00,0.0,0.696,0.0,0.0,,,,,,8.9,90.0,3.366667
3191,2025-04-26 23:00:00,0.0,0.704,0.0,0.0,,,,,,8.28,93.333333,2.9


In [126]:
# drop rows where all values are missing
df_climate_cleaned = df_climate_pivot.dropna(how='any')
# drop any column which contains only the same value
df_climate_cleaned = df_climate_cleaned.loc[:, df_climate_cleaned.nunique() != 1]
df_climate_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2439 entries, 0 to 2567
Data columns (total 13 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   date                              2439 non-null   datetime64[ns]
 1   cloud_cover_total                 2439 non-null   object        
 2   humidity                          2439 non-null   object        
 3   precipitation_height              2439 non-null   object        
 4   precipitation_index               2439 non-null   object        
 5   radiation_global                  2439 non-null   object        
 6   radiation_sky_long_wave           2439 non-null   object        
 7   radiation_sky_short_wave_diffuse  2439 non-null   object        
 8   sun_zenith_angle                  2439 non-null   object        
 9   sunshine_duration                 2439 non-null   object        
 10  temperature_air_mean_2m           2439 non-null   obj

In [127]:
import plotly.express as px

# Create a plot with all columns except date
fig = px.line(df_climate_cleaned, x='date', y=df_climate_cleaned.columns.drop('date'))

# Improve layout
fig.update_layout(
    title="Climate Data Over Time",
    xaxis_title="Date",
    yaxis_title="Value",
    legend_title="Parameter",
    height=600,
    width=1000
)

# Show the plot
fig.show()

# To examine the distribution of values across different parameters
# Create a histogram for each parameter
fig2 = px.box(
    df_climate_cleaned.melt(id_vars=['date'], var_name='Parameter', value_name='Value'),
    x='Parameter', 
    y='Value',
    title="Distribution of Climate Parameters"
)
fig2.show()

In [133]:
df_merged = df_climate_cleaned.merge(
    df_power_consumption_hourly_pivot,
    left_on='date',
    right_on='timestamp_naive',
    how='inner'
)
# Display the merged dataframe  
df_merged

Unnamed: 0,date,cloud_cover_total,humidity,precipitation_height,precipitation_index,radiation_global,radiation_sky_long_wave,radiation_sky_short_wave_diffuse,sun_zenith_angle,sunshine_duration,...,wind_speed,bedroom,cooler,kitchen,living_room_window,office,solar,television,washing_dryer,washing_machine
0,2024-12-15 23:00:00,0.875,0.968,0.44,1.0,0.0,127.0,0.0,151.715,0.0,...,7.033333,42.0,124.0,75.0,165.0,465.0,0.0,45.0,0.0,0.0
1,2024-12-16 00:00:00,0.875,0.968,0.42,1.0,0.0,127.5,0.0,151.72,0.0,...,7.4,291.0,4377.0,525.0,1155.0,3258.0,0.0,379.0,0.0,0.0
2,2024-12-16 01:00:00,0.875,0.966,0.22,1.0,0.0,127.5,0.0,147.305,0.0,...,7.833333,428.0,3771.0,535.0,1179.0,3324.0,0.0,327.0,0.0,0.0
3,2024-12-16 02:00:00,1.0,0.958,0.24,1.0,0.0,128.0,0.0,140.01,0.0,...,8.333333,333.0,4470.0,540.0,1188.0,3360.0,0.0,329.0,0.0,0.0
4,2024-12-16 03:00:00,0.875,0.958,0.2,1.0,0.0,128.0,0.0,131.26,0.0,...,8.766667,212.0,4070.0,530.0,1166.0,3299.0,0.0,327.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2366,2025-03-31 17:00:00,0.5,0.558,0.0,0.0,15.0,113.0,14.0,81.81,120.0,...,4.133333,210.0,6571.0,421.0,971.0,3962.0,3839.0,364.0,1.0,0.0
2367,2025-03-31 18:00:00,0.25,0.588,0.0,0.0,2.0,108.0,2.0,91.29,0.0,...,2.766667,574.0,4624.0,420.0,977.0,3969.0,1116.0,316.0,3.0,0.0
2368,2025-03-31 19:00:00,0.0,0.664,0.0,0.0,0.0,102.0,0.0,100.48,0.0,...,2.2,212.0,5994.0,425.0,983.0,3996.0,0.0,327.0,0.0,0.0
2369,2025-03-31 22:00:00,0.0,0.804,0.0,0.0,0.0,98.0,0.0,121.57,0.0,...,1.333333,527.0,3888.0,421.0,1733.0,3983.0,0.0,315.0,0.0,0.0


In [227]:
# Copyright (C) 2018-2025, earthobservations developers.
# Distributed under the MIT License. See LICENSE for more info.
"""Example for DWD MOSMIX acquisition.

This program will request latest MOSMIX-L data for stations 01001 and 01008 and parameters DD and ww.
Other MOSMIX variants are also listed and can be enabled on demand.
"""

import polars as pl

from wetterdienst import Settings
from wetterdienst.provider.dwd.mosmix import (
    DwdForecastDate,
    DwdMosmixRequest,
)
from wetterdienst.util.cli import setup_logging


def mosmix_example() -> None:
    """Retrieve Mosmix mosmix data by DWD."""
    # A. MOSMIX-L -- Specific stations - each station with own file
    settings = Settings(ts_shape="wide", ts_humanize=True)

    request = DwdMosmixRequest(
        parameters=[("hourly", "large", "DD"), ("hourly", "large", "ww")],
        issue=DwdForecastDate.LATEST,  # automatically set if left empty
        settings=settings,
    )

    stations = request.filter_by_station_id(
        station_id=["01001", "01008"],
    )

    response = next(stations.values.query())

    # meta data enriched with information from metadata_for_forecasts()
    output_section("Metadata", response.stations.df)
    output_section("Forecasts", response.df)

    # B. MOSMIX-L -- All stations - specified stations are extracted.
    Settings.tidy = True
    Settings.humanize = True

    request = DwdMosmixRequest(
        parameters=[("hourly", "large", "DD"), ("hourly", "large", "ww")],
        issue=DwdForecastDate.LATEST,  # automatically set if left empty
        station_group="all_stations",
    )

    stations = request.filter_by_station_id(
        station_id=["01001", "01008"],
    )

    response = next(stations.values.query())

    # meta data enriched with information from metadata_for_forecasts()
    output_section("Metadata", response.stations.df)
    output_section("Forecasts", response.df)

    # C. MOSMIX-S -- All stations - specified stations are extracted.

    request = DwdMosmixRequest(
        parameters=[("hourly", "small", "DD"), ("hourly", "small", "ww")],
        issue=DwdForecastDate.LATEST,  # automatically set if left empty
    )

    stations = request.filter_by_station_id(
        station_id=["01001", "01008"],
    )

    response = next(stations.values.query())

    output_section("Metadata", response.stations.df)
    output_section("Forecasts", response.df)


def output_section(title: str, data: pl.DataFrame) -> None:  # pragma: no cover
    """Output section."""
    print("-" * len(title))
    print(title)
    print("-" * len(title))
    print(data)
    print()


def main() -> None:
    """Run example."""
    setup_logging()
    mosmix_example()


if __name__ == "__main__":
    main()

2025-03-25 21:31:05,667 [wetterdienst.settings                   ] INFO   : Wetterdienst cache is enabled [CACHE_DIR:/Users/woerenkaemper/Library/Caches/wetterdienst]
2025-03-25 21:31:05,668 [wetterdienst.settings                   ] INFO   : Wetterdienst cache is enabled [CACHE_DIR:/Users/woerenkaemper/Library/Caches/wetterdienst]
2025-03-25 21:31:05,668 [wetterdienst.core.timeseries.request    ] INFO   : option 'ts_drop_nulls' is only available with option 'ts_shape=long' and is thus ignored in this request.
2025-03-25 21:31:05,669 [wetterdienst.core.timeseries.request    ] INFO   : Processing request <wetterdienst.provider.dwd.mosmix.api.DwdMosmixRequest object at 0x16887fa10>
2025-03-25 21:31:05,669 [wetterdienst.util.network               ] INFO   : Downloading file https://www.dwd.de/DE/leistungen/met_verfahren_mosmix/mosmix_stationskatalog.cfg?view=nasPublication
2025-03-25 21:31:05,896 [wetterdienst.util.network               ] INFO   : Downloaded file https://www.dwd.de/DE/lei

--------
Metadata
--------
shape: (2, 11)
┌────────────┬─────────┬────────────┬─────────┬───┬───────────┬────────┬───────────┬───────┐
│ resolution ┆ dataset ┆ station_id ┆ icao_id ┆ … ┆ longitude ┆ height ┆ name      ┆ state │
│ ---        ┆ ---     ┆ ---        ┆ ---     ┆   ┆ ---       ┆ ---    ┆ ---       ┆ ---   │
│ str        ┆ str     ┆ str        ┆ str     ┆   ┆ f64       ┆ f64    ┆ str       ┆ str   │
╞════════════╪═════════╪════════════╪═════════╪═══╪═══════════╪════════╪═══════════╪═══════╡
│ hourly     ┆ large   ┆ 01001      ┆ ENJA    ┆ … ┆ -8.67     ┆ 10.0   ┆ JAN MAYEN ┆ null  │
│ hourly     ┆ large   ┆ 01008      ┆ ENSB    ┆ … ┆ 15.47     ┆ 29.0   ┆ SVALBARD  ┆ null  │
└────────────┴─────────┴────────────┴─────────┴───┴───────────┴────────┴───────────┴───────┘

---------
Forecasts
---------
shape: (247, 7)
┌────────────┬─────────┬───────────────┬──────────────┬──────────────┬──────────────┬──────────────┐
│ station_id ┆ dataset ┆ date          ┆ wind_directi ┆ qn_wind_di

2025-03-25 21:31:09,147 [wetterdienst.provider.dwd.mosmix.access ] INFO   : https://opendata.dwd.de/weather/local_forecasts/mos/MOSMIX_L/all_stations/kml/MOSMIX_L_LATEST.kmz:   0%|          | 0.00/83.4M [00:00<?, ?iB/s]
2025-03-25 21:31:09,212 [wetterdienst.provider.dwd.mosmix.access ] INFO   : https://opendata.dwd.de/weather/local_forecasts/mos/MOSMIX_L/all_stations/kml/MOSMIX_L_LATEST.kmz: 100%|##########| 83.4M/83.4M [00:00<00:00, 1.35GiB/s]
2025-03-25 21:31:10,679 [wetterdienst.provider.dwd.mosmix.access ] INFO   : Parsing KML data
2025-03-25 21:31:11,150 [wetterdienst.settings                   ] INFO   : Wetterdienst cache is enabled [CACHE_DIR:/Users/woerenkaemper/Library/Caches/wetterdienst]
2025-03-25 21:31:11,150 [wetterdienst.settings                   ] INFO   : Wetterdienst cache is enabled [CACHE_DIR:/Users/woerenkaemper/Library/Caches/wetterdienst]
2025-03-25 21:31:11,150 [wetterdienst.core.timeseries.request    ] INFO   : Processing request <wetterdienst.provider.dwd.mo

--------
Metadata
--------
shape: (2, 11)
┌────────────┬─────────┬────────────┬─────────┬───┬───────────┬────────┬───────────┬───────┐
│ resolution ┆ dataset ┆ station_id ┆ icao_id ┆ … ┆ longitude ┆ height ┆ name      ┆ state │
│ ---        ┆ ---     ┆ ---        ┆ ---     ┆   ┆ ---       ┆ ---    ┆ ---       ┆ ---   │
│ str        ┆ str     ┆ str        ┆ str     ┆   ┆ f64       ┆ f64    ┆ str       ┆ str   │
╞════════════╪═════════╪════════════╪═════════╪═══╪═══════════╪════════╪═══════════╪═══════╡
│ hourly     ┆ large   ┆ 01001      ┆ ENJA    ┆ … ┆ -8.67     ┆ 10.0   ┆ JAN MAYEN ┆ null  │
│ hourly     ┆ large   ┆ 01008      ┆ ENSB    ┆ … ┆ 15.47     ┆ 29.0   ┆ SVALBARD  ┆ null  │
└────────────┴─────────┴────────────┴─────────┴───┴───────────┴────────┴───────────┴───────┘

---------
Forecasts
---------
shape: (493, 7)
┌────────────┬────────────┬─────────┬─────────────────────┬──────────────────────┬───────┬─────────┐
│ station_id ┆ resolution ┆ dataset ┆ parameter           ┆ date  

2025-03-25 21:31:12,517 [wetterdienst.provider.dwd.mosmix.access ] INFO   : https://opendata.dwd.de/weather/local_forecasts/mos/MOSMIX_S/all_stations/kml/MOSMIX_S_LATEST_240.kmz:   0%|          | 0.00/37.4M [00:00<?, ?iB/s]
2025-03-25 21:31:12,548 [wetterdienst.provider.dwd.mosmix.access ] INFO   : https://opendata.dwd.de/weather/local_forecasts/mos/MOSMIX_S/all_stations/kml/MOSMIX_S_LATEST_240.kmz: 100%|##########| 37.4M/37.4M [00:00<00:00, 1.28GiB/s]
2025-03-25 21:31:12,914 [wetterdienst.provider.dwd.mosmix.access ] INFO   : Parsing KML data


--------
Metadata
--------
shape: (2, 11)
┌────────────┬─────────┬────────────┬─────────┬───┬───────────┬────────┬───────────┬───────┐
│ resolution ┆ dataset ┆ station_id ┆ icao_id ┆ … ┆ longitude ┆ height ┆ name      ┆ state │
│ ---        ┆ ---     ┆ ---        ┆ ---     ┆   ┆ ---       ┆ ---    ┆ ---       ┆ ---   │
│ str        ┆ str     ┆ str        ┆ str     ┆   ┆ f64       ┆ f64    ┆ str       ┆ str   │
╞════════════╪═════════╪════════════╪═════════╪═══╪═══════════╪════════╪═══════════╪═══════╡
│ hourly     ┆ small   ┆ 01001      ┆ ENJA    ┆ … ┆ -8.67     ┆ 10.0   ┆ JAN MAYEN ┆ null  │
│ hourly     ┆ small   ┆ 01008      ┆ ENSB    ┆ … ┆ 15.47     ┆ 29.0   ┆ SVALBARD  ┆ null  │
└────────────┴─────────┴────────────┴─────────┴───┴───────────┴────────┴───────────┴───────┘

---------
Forecasts
---------
shape: (480, 7)
┌────────────┬────────────┬─────────┬─────────────────────┬──────────────────────┬───────┬─────────┐
│ station_id ┆ resolution ┆ dataset ┆ parameter           ┆ date  

In [81]:
features = ['radiation_global', 'radiation_sky_long_wave', 'radiation_sky_short_wave_diffuse', 
                'sunshine_duration', 'sun_zenith_angle', 'temperature_air_mean_2m', 
                'cloud_cover_total', 'humidity']

In [118]:
import polars as pl
from datetime import datetime, timedelta
from wetterdienst import Settings
from wetterdienst.provider.dwd.mosmix import DwdForecastDate, DwdMosmixRequest
import plotly.express as px

def next_day_mosmix(tomorrow: datetime, features: list[str]):
    # 1. Compute start/end datetimes for midnight→midnight UTC
    start = datetime(tomorrow.year, tomorrow.month, tomorrow.day)
    end   = start + timedelta(days=1)

    # 2. Settings: wide table with humanized, non-SI names
    settings = Settings(ts_shape="wide", ts_humanize=True)

    # 3. Request only the 10-day forecast from the latest run, 
    #    but *limit* it to tomorrow via start_date/end_date
    # Use a list of parameters instead of None
    parameters = []
    for feature in features:
        parameters.append(("hourly", "large", feature))
    
    request = DwdMosmixRequest(
        parameters=parameters,           # Specify parameters to fetch
        issue=DwdForecastDate.LATEST,    # latest model run
        start_date=start,                # ← here
        end_date=end,                    # ← and here
        settings=settings,
    )

    # 4. Restrict to your stations
    attendorn = (51.1279, 7.9022)
    stations = request.filter_by_rank(latlon=attendorn, rank=5)

    # 5. Pull the first (and only) result
    response = next(stations.values.query())
    
    # 6. Convert to pandas DataFrame for easier handling with plotly
    df = response.df.to_pandas()
    
    # 7. Print
    print("-" * 20)
    print(f"Forecast for {tomorrow}")
    print("-" * 20)
    print(df.head())
    
    return df

# define your features
features = [
    "radiation_global",
    "radiation_sky_long_wave",
    "radiation_sky_short_wave_diffuse", 
    "sunshine_duration",
    "sun_zenith_angle", 
    "temperature_air_mean_2m",
    "cloud_cover_total", 
    "humidity",
]

# Tomorrow in UTC
tomorrow = datetime.now().date() + timedelta(days=1)

# Get forecast data
df_forecast = next_day_mosmix(tomorrow, features)

# Check what columns are actually available in the result
print("\nAvailable columns:", df_forecast.columns.tolist())

# Get only the columns that exist in the dataframe
available_features = [col for col in features if col in df_forecast.columns]

# Create plot with only available columns
if 'date' in df_forecast.columns and len(available_features) > 0:
    fig = px.line(df_forecast, x='date', y=available_features, 
                 title=f"Weather forecast for {tomorrow}")
    fig.show()
else:
    print("Required columns not found in the dataframe. Available columns:", df_forecast.columns)

--------------------
Forecast for 2025-04-29
--------------------
  station_id dataset                      date  temperature_air_mean_2m  \
0       H669   large 2025-04-29 00:00:00+00:00                      7.0   
1       H669   large 2025-04-29 01:00:00+00:00                      5.9   
2       H669   large 2025-04-29 02:00:00+00:00                      5.1   
3       H669   large 2025-04-29 03:00:00+00:00                      4.8   
4       H669   large 2025-04-29 04:00:00+00:00                      5.0   

   qn_temperature_air_mean_2m  cloud_cover_total  qn_cloud_cover_total  \
0                         NaN               0.11                   NaN   
1                         NaN               0.10                   NaN   
2                         NaN               0.11                   NaN   
3                         NaN               0.13                   NaN   
4                         NaN               0.17                   NaN   

   radiation_global  qn_radiation_glob