In [19]:
from copy import deepcopy

from cleaned_data import aggregated as main_aggregated
from linear_regression import linear_regression

aggregated = deepcopy(main_aggregated)


def add_lags(on_column: str):
    new_columns = []
    for offset in range(1, 4):
        col = f"{on_column}_J-{offset}"
        new_columns.append(col)
        aggregated[col] = aggregated[on_column].shift(offset)
    return new_columns


temp_lags_cols = add_lags("temperature_2m_max")
predictions, error = linear_regression(
    dataset=aggregated,
    x_columns=[
        "temperature_2m_max",
        "relative_humidity_2m",
        "precipitation",
        "wind_speed_10m",
        "surface_pressure",
        *temp_lags_cols,
    ],
)
print(error)
print(aggregated["next_day_temperature_max"].head(10))
print(predictions[:10])

0.6490714675460271
0    32.80
1    33.05
2    33.15
3    31.45
4    30.25
5    30.90
6    30.90
7    30.85
8    29.80
9    30.95
Name: next_day_temperature_max, dtype: float64
[32.80564087 32.34499357 31.66687769 31.30550918 30.8664792  30.79371783
 30.45754799 30.74250438 30.62332407 30.85705409]


In [20]:
precipitation_lags_cols = add_lags("precipitation")
predictions, error = linear_regression(
    dataset=aggregated,
    x_columns=[
        "temperature_2m_max",
        "relative_humidity_2m",
        "precipitation",
        "wind_speed_10m",
        "surface_pressure",
        *temp_lags_cols,
        *precipitation_lags_cols,
    ],
)
print(error)
print(aggregated["next_day_temperature_max"].head(10))
print(predictions[:10])

0.6470033975126238
0    32.80
1    33.05
2    33.15
3    31.45
4    30.25
5    30.90
6    30.90
7    30.85
8    29.80
9    30.95
Name: next_day_temperature_max, dtype: float64
[32.83868722 32.37084143 31.62186035 31.26051657 30.85690393 30.77361112
 30.43624734 30.70602995 30.62930289 30.84307413]


In [21]:
wind_lags_cols = add_lags("wind_speed_10m")
predictions, error = linear_regression(
    dataset=aggregated,
    x_columns=[
        "temperature_2m_max",
        "relative_humidity_2m",
        "precipitation",
        "wind_speed_10m",
        "surface_pressure",
        *temp_lags_cols,
        *precipitation_lags_cols,
        *wind_lags_cols,
    ],
)
print(error)
print(aggregated["next_day_temperature_max"].head(10))
print(predictions[:10])

0.6412201960508458
0    32.80
1    33.05
2    33.15
3    31.45
4    30.25
5    30.90
6    30.90
7    30.85
8    29.80
9    30.95
Name: next_day_temperature_max, dtype: float64
[32.75256725 32.3568276  31.48062027 31.19291455 30.82667595 30.71389855
 30.38037239 30.65514965 30.5656246  30.91685754]


In [22]:
pressure_lags_cols = add_lags("surface_pressure")
predictions, error = linear_regression(
    dataset=aggregated,
    x_columns=[
        "temperature_2m_max",
        "relative_humidity_2m",
        "precipitation",
        "wind_speed_10m",
        "surface_pressure",
        *temp_lags_cols,
        *precipitation_lags_cols,
        *wind_lags_cols,
        *pressure_lags_cols,
    ],
)
print(error)
print(aggregated["next_day_temperature_max"].head(10))
print(predictions[:10])

0.6392799270523569
0    32.80
1    33.05
2    33.15
3    31.45
4    30.25
5    30.90
6    30.90
7    30.85
8    29.80
9    30.95
Name: next_day_temperature_max, dtype: float64
[32.9289605  32.41048723 31.38779951 31.09686394 30.85452269 30.71420771
 30.38544208 30.57299028 30.55233828 30.83276307]
