In [1]:
import sys
import pandas as pd
import matplotlib.pyplot as plt

from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from gluonts.torch import DeepAREstimator

sys.path.append("..")
from timefusion.utils import metrics

In [3]:
# Import data
train_data = pd.read_csv("../datasets/electricity/train.csv", index_col="date")
val_data = pd.read_csv("../datasets/electricity/val.csv", index_col="date")
test_data = pd.read_csv("../datasets/electricity/test.csv", index_col="date")

# Normalize data standard deviation
stds = train_data.std()
train_data /= stds
val_data /= stds
test_data /= stds

# Some parameters
prediction_length = 24

In [19]:
# Flatten pandas dataframe and add column names in new column
new_data = train_data.stack().reset_index()
new_data.columns = ["date", "series", "value"]
new_dataset = PandasDataset(new_data, target="value",item_id="series",timestamp="date",freq="H")

TypeError: PandasDataset.__init__() got an unexpected keyword argument 'item_id'

In [23]:
k = PandasDataset(train_data, target=train_data.columns,freq="H")

In [24]:
# Split the data for training and testing
training_data, test_gen = split(k, offset=-36)
test_data = test_gen.generate_instances(prediction_length=12, windows=3)

# Train the model and make predictions
model = DeepAREstimator(
    prediction_length=12, freq="M", trainer_kwargs={"max_epochs": 5}
).train(training_data)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Missing logger folder: /Users/edvard/Documents/Imperial College/Year 4/Final Year Project/Final-Year-Project/deepar/lightning_logs


RuntimeError: Multi-layer LSTM support in MPS available only on MacOS 13 onwards

In [None]:
forecasts = list(model.predict(test_data.input))

In [None]:
train_ds = PandasDataset.from_long_dataframe(train_data, target='sales', item_id='family', 
                                       timestamp='date', freq='D')

In [18]:
new_data

Unnamed: 0,date,series,value
0,2012-01-01 01:00:00,0,0.597613
1,2012-01-01 01:00:00,1,3.474225
2,2012-01-01 01:00:00,2,5.483391
3,2012-01-01 01:00:00,3,3.516469
4,2012-01-01 01:00:00,4,4.178658
...,...,...,...
6522588,2014-05-01 23:00:00,364,1.327435
6522589,2014-05-01 23:00:00,365,1.251796
6522590,2014-05-01 23:00:00,366,5.533703
6522591,2014-05-01 23:00:00,367,2.302554


In [13]:
train_data.pivot(columns=train_data.columns)

ValueError: operands could not be broadcast together with shapes (0,) (319,) 

In [None]:
train_dataset = PandasDataset(
    train_data,
    target=

In [32]:
indices = list(range(val_data.shape[0] - prediction_length, val_data.shape[0] - prediction_length - 14*prediction_length, - prediction_length))
realisations = torch.tensor(np.array([val_data.values[idx:idx+prediction_length] for idx in indices])).permute((0,2,1))

# Find best lag order using validation data
for i in range(9):
    maxlags = 2**i
    model = VAR(train_data, freq="H")
    results = model.fit(maxlags=maxlags)

    # Test on validation data
    samples = []
    for idx in indices:
        predictions = results.forecast(val_data.values[idx-maxlags:idx], prediction_length)
        samples.append(torch.tensor(predictions).T)
    samples = torch.stack(samples)

    # Compute metrics
    mse = mean_squared_error(realisations.flatten(), samples.flatten())
    mae = mean_absolute_error(realisations.flatten(), samples.flatten())
    mdae = median_absolute_error(realisations.flatten(), samples.flatten())
    crps_sum = np.mean([metrics.crps_sum(samples[i].unsqueeze(0), realisations[i]) for i in range(realisations.shape[0])])
    variogram_score = np.mean([metrics.variogram_score(samples[i].unsqueeze(0), realisations[i], weights="local", window_size=2) for i in range(realisations.shape[0])])
    print(maxlags, mse, mae, mdae, crps_sum, variogram_score)

1 0.42809645346403336 0.5037136108854312 0.4162448194311368 78.30510365282612 0.12931775661936545
2 0.33741997265193774 0.4326887075433596 0.34147022973514995 48.611834718694695 0.11728394631300333
4 0.3348306441558083 0.4316285149086197 0.33842160591304704 52.447653252223084 0.11276774423632414
8 0.29316377267101074 0.3903048875443268 0.29005621017771865 28.269334931856907 0.10805193984487983
16 0.30094982988254454 0.391862092974616 0.2855206266153585 39.21508338970704 0.10772115789238683
32 0.43851576342307963 0.48588394598339224 0.36437633821310755 30.968111362522855 0.147259184528114
64 320.69901703605007 13.30320995427674 10.206449440930175 1044.2206077181154 11.464323262676938


In [5]:
# Test performance
indices = list(range(test_data.shape[0] - prediction_length, test_data.shape[0] - prediction_length - 14*prediction_length, - prediction_length))
realisations = torch.tensor(np.array([test_data.values[idx:idx+prediction_length] for idx in indices])).permute((0,2,1))

maxlags = 16
model = VAR(train_data, freq="H")
results = model.fit(maxlags=maxlags)

samples = []
for idx in indices:
    predictions = results.forecast(test_data.values[idx-maxlags:idx], prediction_length)
    samples.append(torch.tensor(predictions).T)
samples = torch.stack(samples)

# Compute metrics
mse = mean_squared_error(realisations.flatten(), samples.flatten())
mae = mean_absolute_error(realisations.flatten(), samples.flatten())
mdae = median_absolute_error(realisations.flatten(), samples.flatten())
crps_sum = np.mean([metrics.crps_sum(samples[i].unsqueeze(0), realisations[i]) for i in range(realisations.shape[0])])
variogram_score = np.mean([metrics.variogram_score(samples[i].unsqueeze(0), realisations[i], weights="local", window_size=2) for i in range(realisations.shape[0])])
print(maxlags, mse, mae, mdae, crps_sum, variogram_score)


16 0.2370868538876482 0.3698434929681436 0.2941041310678142 39.73870525246162 0.09652434138596963


# Exchange

In [13]:
# Import data
train_data = pd.read_csv("../datasets/exchange/train.csv")
val_data = pd.read_csv("../datasets/exchange/val.csv")
test_data = pd.read_csv("../datasets/exchange/test.csv")

# Normalize the signal power of each column
stds = train_data.std()
train_data /= stds
val_data /= stds
test_data /= stds

# Some parameters
prediction_length = 30

In [14]:
indices = list(range(val_data.shape[0] - prediction_length, val_data.shape[0] - prediction_length - 14*prediction_length, - prediction_length))
realisations = torch.tensor(np.array([val_data.values[idx:idx+prediction_length] for idx in indices])).permute((0,2,1))

# Find best lag order using validation data
for i in range(9):
    maxlags = 2**i
    model = VAR(train_data)
    results = model.fit(maxlags=maxlags)

    # Test on validation data
    samples = []
    for idx in indices:
        predictions = results.forecast(val_data.values[idx-maxlags:idx], prediction_length)
        samples.append(torch.tensor(predictions).T)
    samples = torch.stack(samples)

    # Compute metrics
    mse = mean_squared_error(realisations.flatten(), samples.flatten())
    mae = mean_absolute_error(realisations.flatten(), samples.flatten())
    mdae = median_absolute_error(realisations.flatten(), samples.flatten())
    crps_sum = np.mean([metrics.crps_sum(samples[i].unsqueeze(0), realisations[i]) for i in range(realisations.shape[0])])
    variogram_score = np.mean([metrics.variogram_score(samples[i].unsqueeze(0), realisations[i], weights="local", window_size=2) for i in range(realisations.shape[0])])
    print(maxlags, mse, mae, mdae, crps_sum, variogram_score)

1 0.016887865244289584 0.08285313547978906 0.05705872776203247 0.3875437108550224 0.007100809866249035
2 0.01600563017641964 0.08044273913772125 0.055898168412147786 0.39755590904899546 0.006611613659898859
4 0.01590528870426354 0.08038796570215245 0.05603026165879221 0.40289661524336184 0.006757353077013506
8 0.016255463089877683 0.08148263063959078 0.055387505138935644 0.40420081397144525 0.00682316228610307
16 0.016890449324062723 0.08352513982132848 0.05839952420968331 0.3928941018252983 0.007186250230818358
32 0.017348761068782945 0.08499863718211402 0.059467347076881616 0.4203400007524149 0.00730509482813132
64 0.018445937495137472 0.08971356049586499 0.06483874747782226 0.43286799068109894 0.008342773692809
128 0.02308240927641897 0.10339533717536958 0.07781495882099909 0.44621025599620834 0.010199360586690215
256 0.0349718814716449 0.12775029740485672 0.09355699615764612 0.5278636048292684 0.012212015622223734


In [15]:
# Test performance
indices = list(range(test_data.shape[0] - prediction_length, test_data.shape[0] - prediction_length - 14*prediction_length, - prediction_length))
realisations = torch.tensor(np.array([test_data.values[idx:idx+prediction_length] for idx in indices])).permute((0,2,1))

maxlags = 2
model = VAR(train_data)
results = model.fit(maxlags=maxlags)

samples = []
for idx in indices:
    predictions = results.forecast(test_data.values[idx-maxlags:idx], prediction_length)
    samples.append(torch.tensor(predictions).T)
samples = torch.stack(samples)

# Compute metrics
mse = mean_squared_error(realisations.flatten(), samples.flatten())
mae = mean_absolute_error(realisations.flatten(), samples.flatten())
mdae = median_absolute_error(realisations.flatten(), samples.flatten())
crps_sum = np.mean([metrics.crps_sum(samples[i].unsqueeze(0), realisations[i]) for i in range(realisations.shape[0])])
variogram_score = np.mean([metrics.variogram_score(samples[i].unsqueeze(0), realisations[i], weights="local", window_size=2) for i in range(realisations.shape[0])])
print(maxlags, mse, mae, mdae, crps_sum, variogram_score)


2 0.02264539828816738 0.10316918016684003 0.07164596930846923 0.5658821770696283 0.009198252098755463


# Solar

In [16]:
train_data = pd.read_csv("../datasets/solar/train.csv", index_col="LocalTime")
val_data = pd.read_csv("../datasets/solar/val.csv", index_col="LocalTime")
test_data = pd.read_csv("../datasets/solar/test.csv", index_col="LocalTime")

# Normalize the signal power of each column
stds = train_data.std()
train_data /= stds
val_data /= stds
test_data /= stds

# Some parameters
prediction_length = 24

In [17]:
indices = list(range(val_data.shape[0] - prediction_length, val_data.shape[0] - prediction_length - 14*prediction_length, - prediction_length))
realisations = torch.tensor(np.array([val_data.values[idx:idx+prediction_length] for idx in indices])).permute((0,2,1))

# Find best lag order using validation data
for i in range(9):
    maxlags = 2**i
    model = VAR(train_data, freq="H")
    results = model.fit(maxlags=maxlags)

    # Test on validation data
    samples = []
    for idx in indices:
        predictions = results.forecast(val_data.values[idx-maxlags:idx], prediction_length)
        samples.append(torch.tensor(predictions).T)
    samples = torch.stack(samples)

    # Compute metrics
    mse = mean_squared_error(realisations.flatten(), samples.flatten())
    mae = mean_absolute_error(realisations.flatten(), samples.flatten())
    mdae = median_absolute_error(realisations.flatten(), samples.flatten())
    crps_sum = np.mean([metrics.crps_sum(samples[i].unsqueeze(0), realisations[i]) for i in range(realisations.shape[0])])
    variogram_score = np.mean([metrics.variogram_score(samples[i].unsqueeze(0), realisations[i], weights="local", window_size=2) for i in range(realisations.shape[0])])
    print(maxlags, mse, mae, mdae, crps_sum, variogram_score)

1 0.747472462482723 0.6998342341962218 0.6485854361559291 93.5627800741968 0.1292754332383914
2 0.6849015885095434 0.6649527929845294 0.6148021790995875 88.38290055292362 0.11043431932795945
4 0.616923598002049 0.612875709890228 0.5030629824558094 80.80328673138362 0.11340872921981952
8 0.4294411478493363 0.5079374355569413 0.39868764346455754 66.16652101183354 0.12165627854076758
16 0.18786273542706916 0.3051231158531769 0.22002305778954429 38.20234638453345 0.0935745133534021
32 0.23727260781902831 0.3298026001645596 0.2075377920066261 35.95539817484711 0.16208593626150886
64 0.8935950611997517 0.7095407398978686 0.5377350766005918 72.36272555254483 0.4589726460094808
128 0.19365192504008913 0.3027622832364533 0.20533938167854202 31.76083669868841 0.1474379142634344
256 0.1456419727746064 0.24729169478599491 0.14717721127324013 24.785998286676136 0.12111754803348981


In [18]:
# Test performance
indices = list(range(test_data.shape[0] - prediction_length, test_data.shape[0] - prediction_length - 14*prediction_length, - prediction_length))
realisations = torch.tensor(np.array([test_data.values[idx:idx+prediction_length] for idx in indices])).permute((0,2,1))

maxlags = 16
model = VAR(train_data, freq="H")
results = model.fit(maxlags=maxlags)

samples = []
for idx in indices:
    predictions = results.forecast(test_data.values[idx-maxlags:idx], prediction_length)
    samples.append(torch.tensor(predictions).T)
samples = torch.stack(samples)

# Compute metrics
mse = mean_squared_error(realisations.flatten(), samples.flatten())
mae = mean_absolute_error(realisations.flatten(), samples.flatten())
mdae = median_absolute_error(realisations.flatten(), samples.flatten())
crps_sum = np.mean([metrics.crps_sum(samples[i].unsqueeze(0), realisations[i]) for i in range(realisations.shape[0])])
variogram_score = np.mean([metrics.variogram_score(samples[i].unsqueeze(0), realisations[i], weights="local", window_size=2) for i in range(realisations.shape[0])])
print(maxlags, mse, mae, mdae, crps_sum, variogram_score)


16 0.2921837702543927 0.4064584723453067 0.28972340584731704 51.25028968520726 0.10033451264590383
