In [2]:
import plotly.express as px
from plotly.subplots import make_subplots
from weather_data import WeatherData, ModelBasedOptions, HourlyData
from datetime import datetime
import pandas as pd
import numpy as np
import seaborn as sns


In [3]:
latitude = 50.732817
longitude = 16.648050

start_date = datetime(2022, 1, 1)
end_date = datetime(2022, 1, 31)

options = ModelBasedOptions(
    hourly=[
        HourlyData.Temperature_2m,
        HourlyData.RelativeHumidity_2m,
        HourlyData.WindDirection_10m,
        HourlyData.WindSpeed_10m,
        HourlyData.Precipitation_rain_showers_snow,
    ]
)
meta_data_model, daily_model, hourly_model = WeatherData.getModelBasedData(
    latitude, longitude, start_date, end_date, options
)

meta_data_station, daily_station, hourly_station = WeatherData.getStationData(
    latitude, longitude, start_date, end_date, require_daily=False, require_hourly=True, skip_stations=["12150"]
)


In [4]:
fig = px.scatter_mapbox(pd.DataFrame({"lat": [latitude], "lng": [longitude]}), lat="lat", lon="lng", zoom=5, height=600)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show()


In [7]:
df = pd.read_csv("../../data/pollution/2022_16_101.csv")
df["date"] = pd.to_datetime(df["date"], format="%Y-%m-%d %H:%M:%S")

# JANUAR
januar = df[
    (df["date"] >= pd.Timestamp(year=2022, month=1, day=1))
    & (df["date"] <= pd.Timestamp(year=2022, month=1, day=31, hour=23, minute=59))
]
result_januar = januar.groupby(januar.index // 24).agg({"value": "mean"}).round(5)
result_januar["date"] = index = pd.date_range("2022-01-01", periods=31)
result_januar = result_januar.set_index("date")

# add missing values to Januar
januar = januar.__deepcopy__()
januar.loc[194.5] = ["2022-01-09 03:00:00", 0.00]
januar.loc[194.7] = ["2022-01-09 04:00:00", 0.00]

januar = januar.sort_index().reset_index(drop=True)


In [8]:
pm10 = januar["value"]

temp = hourly_model["temperature_2m"]
humidity = hourly_model["relativehumidity_2m"]
winddirection = hourly_model["winddirection_10m"]
windspeed = hourly_model["windspeed_10m"]
precipitation = hourly_model["precipitation"]

print(pm10.size, temp.size, humidity.size, winddirection.size, windspeed.size, precipitation.size)


744 744 744 744 744 744


In [9]:
correlation_df = pd.DataFrame(
    {
        "temp": temp.to_numpy(),
        "humidity": humidity.to_numpy(),
        "winddirection": winddirection.to_numpy(),
        "windspeed": windspeed.to_numpy(),
        "precipitation": precipitation.to_numpy(),
        "pm10": pm10.to_numpy(),
    }
)


In [10]:
# sns.heatmap(correlation_df.corr(), vmin=-1, vmax=1, annot=True, cmap="rocket_r")


### Korrelation über einen Monat (01.2022)


In [11]:
px.imshow(correlation_df.corr().round(2), text_auto=True, color_continuous_scale="RdBu")


### Korrelation über einen Tag (01.01.2022)


In [12]:
correlation_day_df = correlation_df.loc[0:23]
px.imshow(correlation_day_df.corr().round(2), text_auto=True, color_continuous_scale="RdBu")


### Korrelation über 14 Tage


In [18]:
correlation_month_df = correlation_df.loc[0:167]
px.imshow(correlation_month_df.corr().round(2), text_auto=True, color_continuous_scale="RdBu")


### Korrelation in einem Zeitraum ohne Regen


In [17]:
correlation_rain_df = correlation_df.loc[225:301]
px.imshow(correlation_rain_df.corr().round(2), text_auto=True, color_continuous_scale="RdBu")


In [16]:
date_time = pd.to_datetime(df["date"], format="%Y-%m-%d %H:%M:%S")
timestamp_s = date_time.map(pd.Timestamp.timestamp)

day = 24 * 60 * 60
year = (365.2425) * day

correlation_df["Day sin"] = np.sin(timestamp_s * (2 * np.pi / day))
correlation_df["Day cos"] = np.cos(timestamp_s * (2 * np.pi / day))
correlation_df["Year sin"] = np.sin(timestamp_s * (2 * np.pi / year))
correlation_df["Year cos"] = np.cos(timestamp_s * (2 * np.pi / year))
