In [None]:
import os

import pandas as pd
import numpy as np

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

from sklearn.metrics import mean_absolute_error, mean_squared_error

import torch
import torch.nn as nn

from datetime import date, timedelta, datetime

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
os.chdir("..")
os.getcwd()

# Load data

In [None]:
origin_df = pd.read_csv("./resources/weather_prediction_dataset.csv")
base_columns = ["DATE", "MONTH"]
oslo_columns = [x for x in origin_df.columns if x.startswith("OSLO")]
columns = base_columns + oslo_columns
origin_df = origin_df[columns][:-1]

origin_df["YEAR"] = origin_df["DATE"].apply(lambda x: int(str(x)[:4]))
origin_df["DAY"] = origin_df["DATE"].apply(lambda x: int(str(x)[-2:]))
origin_df["DATE"] = [
    date(year=origin_df['YEAR'].iloc[i], month=origin_df['MONTH'].iloc[i], day=origin_df['DAY'].iloc[i])
    for i in range(len(origin_df))
]

origin_df.head()

In [None]:
stationary_df = pd.read_csv("resources/weather_prediction_stationary_dataset.csv")

# Prepare LSTM model

In [None]:
input_dim = 4  # Number of features (4 in this case)
hidden_dim = 20
num_layers = 4
output_dim = 4  # Predicting next step for all features

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        return self.fc(lstm_out[:, -1, :])  # Use last time step's output

In [None]:
lstm = LSTMModel(input_dim, hidden_dim, num_layers, output_dim)
lstm.load_state_dict(torch.load("resources/lstm"))
lstm.eval()

# Prepare dataset

In [None]:
feature_cols = ["OSLO_temp_mean", "OSLO_cloud_cover", "OSLO_humidity", "OSLO_pressure"]

In [None]:
monthly_temp = origin_df.groupby('MONTH').agg({col: "mean" for col in feature_cols}).reset_index()
monthly_temp

In [None]:
feature_df = stationary_df[["DATE"] + feature_cols]
feature_df["DATE"] = pd.to_datetime(feature_df["DATE"])
feature_df = feature_df[feature_df["DATE"] >= datetime(year=2008, month=12, day=25)].reset_index(drop=True)
feature_df.head(10)

In [None]:
stationary_df["DATE"] = pd.to_datetime(stationary_df["DATE"])
stationary_df[stationary_df["DATE"] >= datetime(year=2009, month=1, day=1)]

# Implement LSTM predictions

In [None]:
def create_x(data, seq_length):
    sequences = []
    seq = data[0:min(seq_length, len(data))]
    sequences.append(seq)
    return np.array(sequences)


def show_predictions(targets, preds, mae, mse, lag):
    fig = go.Figure()

    fig.add_trace(
        go.Scattergl(
            name="Real",
            x=[x for x in range(len(targets))],
            y=targets,
            mode="lines"
        )
    )
    fig.add_trace(
        go.Scattergl(
            name="7-days predictions",
            x=[x for x in range(len(targets))],
            y=preds,
            mode="lines"
        )
    )

    fig.update_layout(
        title=f"<b>7 days predictions in 2009</b><br>Lag days = {lag}<br>mae={round(mae, 2)}, mse={round(mse, 2)}",
        height=500,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=0.91,
            xanchor="left",
            x=0
        ),
        xaxis_title="Day",
        yaxis_title="Temp [C]"
    )

    fig.show()

In [None]:
starting_date = datetime(year=2008, month=12, day=25)
finish_date = datetime(year=2009, month=1, day=1)
y_id = 7
predictions, targets, months = [], [], []
run = True

while run:
    feature_df_preds = feature_df.copy()
    
    for i in range(7):
        x = create_x(
            (
                feature_df_preds
                [(feature_df_preds["DATE"] >= starting_date) & (feature_df_preds["DATE"] < finish_date)]
                [feature_cols]
            ),
            7
        )
        y_true = feature_df.loc[y_id][feature_cols].to_list()
        month = feature_df.loc[y_id]["DATE"].month
        preds = lstm.forward(torch.tensor(x, dtype=torch.float32)).tolist()[0]
        feature_df_preds.loc[y_id, feature_cols[0]] = preds[0]
        feature_df_preds.loc[y_id, feature_cols[1]] = preds[1]
        feature_df_preds.loc[y_id, feature_cols[2]] = preds[2]
        feature_df_preds.loc[y_id, feature_cols[3]] = preds[3]
        
        predictions.append(preds)
        targets.append(targets)
        months.append(month)
        
        starting_date += timedelta(days=1)
        finish_date += timedelta(days=1)
        y_id += 1
        
        if y_id >= len(feature_df)-1:
            run = False
 
assert len(predictions) == len(targets)
assert len(predictions) == len(months)
# targets = [monthly_temp[months[i]] + x for i, x in enumerate(targets)]
# preds = [monthly_temp[months[i]] + x for i, x in enumerate(preds)]

# mae = mean_absolute_error(targets, preds)
# mse = mean_squared_error(targets, preds)

# mae_data[lag] = mae
# mse_data[lag] = mse

# show_predictions(targets, preds, mae, mse, lag)

In [None]:
# predictions df
df_results = feature_df[feature_df["DATE"] >= datetime(year=2009, month=1, day=1)][:-1]
df_results[feature_cols[0] + "_pred"] = [x[0] for x in predictions]
df_results[feature_cols[1] + "_pred"] = [x[1] for x in predictions]
df_results[feature_cols[2] + "_pred"] = [x[2] for x in predictions]
df_results[feature_cols[3] + "_pred"] = [x[3] for x in predictions]


# add monthly values
for col in feature_cols:
    values = df_results[col].to_list()
    new_values = []
    for i in range(len(values)):
        new_values.append(monthly_temp[monthly_temp["MONTH"] == months[i]][col].to_list()[0] + values[i])
    df_results[col] = new_values
    
    values = df_results[col + "_pred"].to_list()
    new_values = []
    for i in range(len(values)):
        new_values.append(monthly_temp[monthly_temp["MONTH"] == months[i]][col].to_list()[0] + values[i])
    df_results[col + "_pred"] = new_values

df_results    

In [None]:
x = [i for i in range(1, len(df_results)+1)]

for col in feature_cols:
    mae = mean_absolute_error(
        df_results[col].to_numpy(),
        df_results[col+"_pred"].to_numpy()
    )
    mse = mean_squared_error(
        df_results[col].to_numpy(),
        df_results[col+"_pred"].to_numpy()
    )
    fig = go.Figure()
    
    fig.add_trace(
        go.Scatter(
            name="Real",
            x=x,
            y=df_results[col],
            mode="lines"
        )
    )
    fig.add_trace(
        go.Scatter(
            name="7-days prediction",
            x=x,
            y=df_results[col+"_pred"],
            mode="lines"
        )
    )
    
    fig.update_layout(
        title=f"<b>{col.upper()}: 7 days predictions in 2009</b><br>mae={round(mae, 2)}, mse={round(mse, 2)}",
        height=500,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1,
            xanchor="left",
            x=0
        ),
        xaxis_title="Day"
    )
    fig.show()