In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_profiling
from visualize import visualize_rooms
from preprocessing import preprocess_df, merge_data
from sklearn.ensemble import RandomForestRegressor
from model import train_model
from autots import AutoTS

In [None]:
from autots.evaluator.benchmark import Benchmark
bench = Benchmark()
bench.run(n_jobs="auto", times=3)
bench.results

In [None]:
# load dataset and preprocess
df = pd.read_csv("data/train/data.csv", sep=";")
# merge dataset with weather data
df = merge_data(df)

In [None]:
# discretize, impute etc.
dfp = preprocess_df(df.copy())
print(dfp.isna().sum())


In [None]:
# export
dfp.to_csv("./data/train/preprocessed_train.csv")

In [None]:
# import
df = pd.read_csv("./data/train/preprocessed_train.csv")

In [None]:
df = df.rename(columns={'Unnamed: 0': 'timestamp'})
#df.set_index('timestamp', inplace=True)

In [None]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet, DecoderMLP
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss, RMSE
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

In [None]:
g = df.groupby("Room")
split = [g.get_group(x) for x in g.groups]
dfone = split[0].reset_index(drop=True)
dfone["time_idx"] = dfone.index

In [None]:
print(dfone.columns)

In [None]:
print(dfone["RoomTemperature"])

In [117]:
max_prediction_length = 4*24*30*4
max_encoder_length = 4*24*30*4*4
training_cutoff = dfone["time_idx"].max() - max_prediction_length

training = TimeSeriesDataSet(
    dfone[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="RoomTemperature",
    group_ids=["Room"],
    min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=[],
    #static_reals=["Room"],
    time_varying_known_categoricals=[],
    # time_varying_known_reals=["time_idx",  'AirqualityPerc', 'HeaterPerc',
    #    'CoolerPerc', 'TempSupplyAir', 'RelativeHumiditySupplyAir',
    #    'HeatingPower', 'CoolingPower', 'AirTemperature', 'WindDirection',
    #    'BrightnessNorth', 'BrightnessEast', 'BrightnessSouth',
    #    'BrightnessWest', 'dwpt', 'rhum',
    #    'prcp', 'wspd', 'tsun'],
    # time_varying_unknown_categoricals=[],
    # time_varying_unknown_reals=[
    #     "RoomTemperature"
    # ],
    add_relative_time_idx=True,
    #add_target_scales=True,
    add_encoder_length=True,
)

validation = TimeSeriesDataSet.from_dataset(training, dfone, predict=True, stop_randomization=True)
batch_size = 128  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

In [118]:
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)])
baseline_predictions = Baseline().predict(val_dataloader)
(actuals - baseline_predictions).abs().mean().item()



1.4035757780075073

In [119]:
# configure network and trainer
pl.seed_everything(42)
trainer = pl.Trainer(
    gpus=0,
    # clipping gradients is a hyperparameter and important to prevent divergance
    # of the gradient for recurrent neural networks
    gradient_clip_val=0.1,
)


tft = DecoderMLP.from_dataset(
    training,
    n_hidden_layers=5,
    # not meaningful for finding the learning rate but otherwise very important
    learning_rate=0.03,
    hidden_size=16,  # most important hyperparameter apart from learning rate
    dropout=0.1,  # between 0.1 and 0.3 are good values
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    # reduce learning rate if no improvement in validation loss after x epochs
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 1.7k


In [None]:
res = trainer.tuner.lr_find(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
    max_lr=10.0,
    min_lr=1e-6,
)

print(f"suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)
fig.show()

In [111]:
# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=30,
    gpus=0,
    weights_summary="top",
    gradient_clip_val=0.1,
    #limit_train_batches=30,  # coment in for training, running valiation every 30 batches
    fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Running in fast_dev_run mode: will run a full train, val, test and prediction loop using 1 batch(es).
`Trainer(limit_train_batches=1)` was configured so 1 batch per epoch will be used.
`Trainer(limit_val_batches=1)` was configured so 1 batch will be used.
`Trainer(limit_test_batches=1)` was configured so 1 batch will be used.
`Trainer(limit_predict_batches=1)` was configured so 1 batch will be used.
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


In [112]:
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)


  | Name             | Type                 | Params
----------------------------------------------------------
0 | loss             | QuantileLoss         | 0     
1 | logging_metrics  | ModuleList           | 0     
2 | input_embeddings | MultiEmbedding       | 0     
3 | mlp              | FullyConnectedModule | 2.1 K 
----------------------------------------------------------
2.1 K     Trainable params
0         Non-trainable params
2.1 K     Total params
0.008     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]



Validation: 0it [00:00, ?it/s]



In [None]:
df_ts = df.copy()
df_ts = df_ts.reset_index().rename(columns={'index':'timestamp'})
print(df_ts.head(2))
# Evaluate different simple time series models
model = AutoTS(
    forecast_length=3,
    frequency='infer',
    ensemble='simple',
    max_generations=10,
    num_validations=2,
    n_jobs='auto'
)

model.fit(df_ts, date_col='timestamp', value_col='RoomTemperature', id_col="Room")