Required packets

In [None]:
from darts import TimeSeries
from darts.metrics import mape, mse, mae
from darts.models import NaiveMean, NaiveSeasonal, NaiveDrift, NaiveMovingAverage, RandomForest, LinearRegressionModel, AutoARIMA, Theta, StatsForecastAutoETS, Prophet, NBEATSModel, NLinearModel

import pandas as pd
import matplotlib.pyplot as plt

Load dataset as a series and prepare it by splitting into train and test

In [None]:
df = pd.read_csv("drive/MyDrive/dataset/Water/task_df_aggr_persone_media.csv")
series = TimeSeries.from_dataframe(df, "timestamp")
train, test = series.split_after(0.75)
if series.is_univariate:
  train.plot()
  test.plot()
else:
  header = train.columns
  train.plot()
  test.plot()


df = pd.read_csv("final_dataset.csv")
series_w = TimeSeries.from_dataframe(df, "timestamp")
train_w, test_w = series_w.split_after(0.75)

## Naive Models - Baseline

In [None]:
model0 = NaiveMean()
model1 = NaiveSeasonal(K = 24)
model2 = NaiveDrift()
model3 = NaiveMovingAverage(input_chunk_length = 24)

models = {
    "Mean": model0,
    "Seasonal": model1,
    "Drift": model2,
    "MA": model3
}

In [None]:
fig = plt.figure(figsize=(16, 10))
for i, (name, model) in enumerate(models.items()):
  print(name)
  model.fit(series=train)
  predictions = model.predict(n = len(test))
  mae_score = round(mae(test, predictions), 2)
  mse_score = round(mse(test, predictions), 2)

  fig.add_subplot(2, 2, i+1)
  train.plot(label="train"); test.plot(label="test"); predictions.plot(label=name)
  plt.title(f"Method: {name}, MAE: {mae_score}, MSE: {mse_score} with sample selection"); plt.legend()
fig.tight_layout()
plt.show()

## Classical Models

In [None]:
model0 = Theta()
model1 = LinearRegressionModel(lags=24)
model2 = StatsForecastAutoETS()
model3 = AutoARIMA(start_p=2, max_p=12, start_q=1)
model4 = RandomForest(lags=24, n_estimators=300)
model5 = Prophet()

models = {
    #"Theta": model0,
    "Linear": model1,
    "Exponential": model2,
    "ARIMA": model3,
    "RandomForest": model4,
    "Prophet": model5
}

In [None]:
mae_list = []
mse_list = []
fig = plt.figure(figsize=(16, 10))
for i, (name, model) in enumerate(models.items()):
  print(name)
  for index, column in enumerate(header):
    tmp_train = train.univariate_component(column)
    tmp_test = test.univariate_component(column)
    model.fit(series=tmp_train)
    predictions = model.predict(n = len(tmp_test))
    mae_score = round(mae(tmp_test, predictions), 5)
    mae_list.append([mae_score, name, column])
    mse_score = round(mse(tmp_test, predictions), 5)
    mse_list.append([mse_score, name, column])


    fig.add_subplot(6, 4, i+index+1) #method, variable, position
    tmp_train.plot(label="train"); tmp_test.plot(label="test"); predictions.plot(label=name)
    plt.title(f"Method: {name}, Variable: {column} (std)")
fig.tight_layout()
plt.savefig('w_o.png')
plt.show()

# fig = plt.figure(figsize=(16, 10))
# for i, (name, model) in enumerate(models.items()):
#   print(name)
#   for index, column in enumerate(header):
#     if name == "RandomForest":
#       train = train_w.univariate_component(column).pd_dataframe()
#       train = train.dropna()
#       train = TimeSeries.from_dataframe()
#     else:
#       train = train_w.univariate_component(column)
#       train = train.longest_contiguous_slice(max_gap_size=0)
#       print(len(train))
#     model.fit(series=train)
#     predictions = model.predict(n = len(test))
#     mae_score = round(mae(test.univariate_component(column), predictions), 5)
#     mae_list.append([mae_score, name])
#     mse_score = round(mse(test.univariate_component(column), predictions), 5)
#     mse_list.append([mse_score, name])


#     fig.add_subplot(6, 4, i+index+1)
#     train_w.plot(label="train"); test.plot(label="test"); predictions.plot(label=name)
#     plt.title(f"Method: {name}, Variable: {column} with selection");
# fig.tight_layout()
# plt.savefig('w.png')
# plt.show()

print(mae_list)
print(mse_list)

## Deep Learning models

In [None]:
model6 = NBEATSModel(
    input_chunk_length = 24,
    output_chunk_length= 4,
    num_stacks = 3,
    num_blocks = 1,
    num_layers = 2,
    layer_widths = 32,
    n_epochs = 50,
    batch_size = 12,
)

model7 = NLinearModel(
    input_chunk_length=24,
    output_chunk_length=4,
    n_epochs=50,
)

models = {
    "NBEATS": model6,
    "NLinear": model7
}

In [None]:
mae_list = []
mse_list = []
fig = plt.figure(figsize=(16, 10))
for i, (name, model) in enumerate(models.items()):
  print(name)
  for index, column in enumerate(header):
    tmp_train = train.univariate_component(column)
    tmp_test = test.univariate_component(column)
    model.fit(series=tmp_train)
    predictions = model.predict(n = len(test))
    mae_score = round(mae(tmp_test, predictions), 5)
    mae_list.append([mae_score, name])
    mse_score = round(mse(tmp_test, predictions), 5)
    mse_list.append([mse_score, name])
  # mae_score = mae(test, predictions)
  # mse_score = mse(test, predictions)

    fig.add_subplot(2, 4, i+index+1) # method, variable, position
    train.plot(label="train"); test.plot(label="test"); predictions.plot(label=name)
    plt.title(f"Method: {name}, Variable: {column} (std)"); plt.legend()
fig.tight_layout()
plt.savefig('nn_wo.png')
plt.show()

fig = plt.figure(figsize=(16, 10))
for i, (name, model) in enumerate(models.items()):
  print(name)
  for index, column in enumerate(header):
    tmp_train = train_w.univariate_component(column)
    tmp_test = test.univariate_component(column)
    model.fit(series=train)
    predictions = model.predict(n = len(test))
    mae_score = round(mae(tmp_test, predictions), 5)
    mae_list.append([mae_score, name])
    mse_score = round(mse(tmp_test, predictions), 5)
    mse_list.append([mse_score, name])

  fig.add_subplot(2, 4, i+1+index)
  train.plot(label="train"); test.plot(label="test"); predictions.plot(label=name)
  plt.title(f"Method: {name}, Variable: {column} with sample selection"); plt.legend()
fig.tight_layout()
plt.savefig('nn_w.png')
plt.show()