In [29]:
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from preprocessing import preprocess_df
from sklearn.metrics import mean_squared_error
from datetime import datetime, timedelta
from meteostat import Daily, Hourly, Point
import torch

In [18]:
def merge_data(df: pd.DataFrame):
    start, end = (
        df["Timestamp"][df.index[0]],
        df.loc[:, "Timestamp"][df[df["Room"] == "Raum 004"].index[-1]],
    )
    print(start, end)
    start, end = datetime.strptime(start, "%Y-%m-%d %H:%M:%S"), datetime.strptime(
        end, "%Y-%m-%d %H:%M:%S"
    )
    phoenix = Point(51.98589, 9.25246, 111)
    # Get daily data for 2018
    data = Hourly(phoenix, start, end + timedelta(weeks=4))
    tdf = data.fetch()
    tdf = tdf[["dwpt", "rhum", "prcp", "wspd", "tsun", "coco"]]

    tdf = tdf.resample("15T", kind="timestamp").interpolate()

    print(tdf.shape, df.shape)

    # merge
    df.set_index("Timestamp", inplace=True)
    df.index = pd.to_datetime(df.index, format="%Y-%m-%d %H:%M:%S")

    res = df.join(tdf, how="left").drop(columns=["WindVelocity", "RelativeHumidity"])

    return res

In [68]:
# load models
models = []
for i in range(6):
    tft = TemporalFusionTransformer.load_from_checkpoint(f"final_model/room{i+1}.ckpt")
    models.append(tft)

In [24]:
# load test datasets
df_april = pd.read_csv("data/test/test_april2022.csv", sep=",")
df_feb = pd.read_csv("data/test/test_feb2022.csv", sep=",")
df_jan = pd.read_csv("data/test/test_jan2022.csv", sep=",")
df_march = pd.read_csv("data/test/test_march2022.csv", sep=",")

# merge dataset with weather data
df_april = merge_data(df_april)
df_feb = merge_data(df_feb)
df_jan = merge_data(df_jan)
df_march = merge_data(df_march)
# discretize, impute etc.
df_april = preprocess_df(df_april)
df_feb = preprocess_df(df_feb)
df_jan = preprocess_df(df_jan)
df_march = preprocess_df(df_march)

test_data = [df_april, df_feb, df_jan, df_march]

2022-04-01 00:00:00 2022-04-23 23:45:00
(4505, 6) (13248, 20)
2022-02-01 00:00:00 2022-02-21 23:45:00
(4701, 6) (12096, 20)
2022-01-01 00:00:00 2022-01-24 23:45:00
(4989, 6) (13824, 20)
2022-03-01 00:00:00 2022-03-24 23:45:00
(4989, 6) (13824, 20)


In [69]:
test_preds = []
for test in test_data:
    g = test.groupby("Room")
    split = [g.get_group(x) for x in g.groups]
    rmses_groups = []
    predictions_groups = []
    for i, room in enumerate(split):
        room = room.reset_index()
        room["time_idx"] = room.index
        max_prediction_length = 4*24*7
        max_encoder_length = 3*4*24*7

        test = TimeSeriesDataSet(
            room,
            time_idx="time_idx",
            target="RoomTemperature",
            group_ids=["Room"],
            min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
            max_encoder_length=max_encoder_length,
            min_prediction_length=4*24,
            max_prediction_length=max_prediction_length,
            static_categoricals=[],
            time_varying_known_categoricals=[],
            time_varying_unknown_reals=["RoomTemperature"],
            add_relative_time_idx=True,
            add_target_scales=True,
            add_encoder_length=True,
        )
        testset = TimeSeriesDataSet.from_dataset(test, room, predict=True, stop_randomization=True)
        batch_size = 64  # set this between 32 to 128
        test_dataloader = testset.to_dataloader(train=False, batch_size=batch_size, num_workers=3)
        actuals = torch.cat([y[0] for x, y in iter(test_dataloader)])
        predictions = models[i].predict(test_dataloader)
        print(actuals.shape, predictions.shape)
        rmse = mean_squared_error(actuals, predictions, squared=False)
        rmses_groups.append(rmse)
        tdf = pd.DataFrame(predictions.squeeze().numpy(), columns=["Prediction"], index=room["index"][-actuals.shape[1]:])
        tdf.reset_index(inplace=True)
        tdf = tdf.rename(columns={'index': 'Timestamp'})
        tdf["Room"] = f"Room 00{i+4}"
        tdf = tdf[['Timestamp', 'Room', 'Prediction']]
        predictions_groups.append(tdf)
    print(np.mean(rmses_groups))
    test_preds.append(pd.concat(predictions_groups, axis=0))


    
    

    

torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
0.6255435
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
0.7948215
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
0.93979174
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
torch.Size([1, 672]) torch.Size([1, 672])
tor

In [72]:
for df, name in zip(test_preds, ["april", "feb", "jan", "march"]):
    df.reset_index(drop=True, inplace=True)
    df.to_csv(f"results/2/pred_{name}2022.csv")

In [66]:
for k, v in scores.items():
    print(f"{k}: 1: {np.mean(v[0])}, 2: {np.mean(v[1])}")

0: 1: 0.960051417350769, 2: 0.9707706570625305
1: 1: 0.8535737991333008, 2: 0.6850458383560181
2: 1: 0.9124508500099182, 2: 0.6031814813613892
3: 1: 0.6960859298706055, 2: 0.7537965774536133
4: 1: 0.7254562973976135, 2: 0.6786210536956787
5: 1: 1.02659273147583, 2: 1.0201828479766846
