In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd
import numpy as np
import torch as t
import plotly.graph_objects as go
import seaborn as sns
from sklearn.model_selection import TimeSeriesSplit
import model_definitions as md
import itertools

In [4]:
# load in clean dataset
clean_data = pd.read_csv(r"/home/tobi/Desktop/Capstone/EnergyDemandForecasting/src/Saved/Datasets/clean_training.csv", index_col=0)
clean_data.index = pd.to_datetime(clean_data.index)
clean_data.loc[:,"HourlyPrecipitation"] = clean_data["HourlyPrecipitation"].replace({np.nan:"None"})
display(clean_data)

Unnamed: 0,Energy Demand (MWH),HourlyDryBulbTemperature,HourlyDewPointTemperature,HourlyWetBulbTemperature,HourlyStationPressure,HourlyPrecipitation,HourlyWindSpeed,Energy Price (cents/KWH),Civilian Noninstitutional Population,Labor Force Participation,CPI-U
2018-06-19 05:00:00,7221.0,78.0,72.0,74.0,29.62,,6.000000,19.28,7105823.0,60.1,274.170
2018-06-19 06:00:00,6911.0,79.0,70.0,73.0,29.63,,6.000000,19.28,7105823.0,60.1,274.170
2018-06-19 07:00:00,6691.0,81.0,69.0,73.0,29.63,,3.565778,19.28,7105823.0,60.1,274.170
2018-06-19 08:00:00,6582.0,82.0,67.0,72.0,29.63,,3.814241,19.28,7105823.0,60.1,274.170
2018-06-19 09:00:00,6600.0,81.0,69.0,73.0,29.65,,7.000000,19.28,7105823.0,60.1,274.170
...,...,...,...,...,...,...,...,...,...,...,...
2023-09-01 14:00:00,5757.0,75.0,47.0,59.0,30.12,,3.000000,23.33,6744964.0,61.2,325.613
2023-09-01 15:00:00,5895.0,73.0,49.0,60.0,30.11,,6.000000,23.33,6744964.0,61.2,325.613
2023-09-01 16:00:00,6011.0,72.0,49.0,59.0,30.10,,0.000000,23.33,6744964.0,61.2,325.613
2023-09-01 17:00:00,6096.0,70.0,53.0,60.0,30.11,,0.000000,23.33,6744964.0,61.2,325.613


In [4]:
input_data = clean_data.copy()
# input_data = residual_data.copy()

In [5]:
# encode hour of the day, day of the week, and day of the year into a new dataframe
input_time = pd.DataFrame(data={"Hour of Day":input_data.index.hour, "Day of Week":input_data.index.dayofweek, "Day of Year":input_data.index.dayofyear}, index=input_data.index)
display(input_data)
display(input_time)

Unnamed: 0,Energy Demand (MWH),HourlyDryBulbTemperature,HourlyDewPointTemperature,HourlyWetBulbTemperature,HourlyStationPressure,HourlyPrecipitation,HourlyWindSpeed,Energy Price (cents/KWH),Civilian Noninstitutional Population,Labor Force Participation,CPI-U
2018-06-19 05:00:00,7221.0,78.0,72.0,74.0,29.62,,6.000000,19.28,7105823.0,60.1,274.170
2018-06-19 06:00:00,6911.0,79.0,70.0,73.0,29.63,,6.000000,19.28,7105823.0,60.1,274.170
2018-06-19 07:00:00,6691.0,81.0,69.0,73.0,29.63,,3.565778,19.28,7105823.0,60.1,274.170
2018-06-19 08:00:00,6582.0,82.0,67.0,72.0,29.63,,3.814241,19.28,7105823.0,60.1,274.170
2018-06-19 09:00:00,6600.0,81.0,69.0,73.0,29.65,,7.000000,19.28,7105823.0,60.1,274.170
...,...,...,...,...,...,...,...,...,...,...,...
2023-09-01 14:00:00,5757.0,75.0,47.0,59.0,30.12,,3.000000,23.33,6744964.0,61.2,325.613
2023-09-01 15:00:00,5895.0,73.0,49.0,60.0,30.11,,6.000000,23.33,6744964.0,61.2,325.613
2023-09-01 16:00:00,6011.0,72.0,49.0,59.0,30.10,,0.000000,23.33,6744964.0,61.2,325.613
2023-09-01 17:00:00,6096.0,70.0,53.0,60.0,30.11,,0.000000,23.33,6744964.0,61.2,325.613


Unnamed: 0,Hour of Day,Day of Week,Day of Year
2018-06-19 05:00:00,5,1,170
2018-06-19 06:00:00,6,1,170
2018-06-19 07:00:00,7,1,170
2018-06-19 08:00:00,8,1,170
2018-06-19 09:00:00,9,1,170
...,...,...,...
2023-09-01 14:00:00,14,4,244
2023-09-01 15:00:00,15,4,244
2023-09-01 16:00:00,16,4,244
2023-09-01 17:00:00,17,4,244


In [6]:
if t.cuda.is_available():
    device = t.device("cuda")
    print("PyTorch is using:", t.cuda.get_device_name(0))
else:
    print("PyTorch is using CPU")

PyTorch is using: Radeon RX 7900 XTX


Encode Categorical Variables

In [17]:
input_data = pd.get_dummies(input_data, drop_first=True).astype("float32")
# output_data = pd.get_dummies(output_data, drop_first=True).astype("float32")

Normalize features into [0,1]

In [7]:
input_min_vals = np.min(input_data, axis=0)
input_max_vals = np.max(input_data, axis=0)
input_time_min_vals = np.min(input_time, axis=0)
input_time_max_vals = np.max(input_time, axis=0)

# Normalize each column to be in the range [0, 1]
input_data = (input_data - input_min_vals) / (input_max_vals - input_min_vals)
input_time = (input_time - input_time_min_vals) / (input_time_max_vals - input_time_min_vals)
# output_data = (output_data - output_min_vals) / (output_max_vals - output_min_vals)

Format using DataLoader

In [9]:
# formatted dataset: (N x B x S x K)
B = 100   # Batch size
S = 24*7*2   # Sequence length
K = input_data.shape[1]  # Number of features
steps_ahead = 24

# Reshape data to have dimensions (N x B x S x K)
x = input_data.values
x_time = input_time.values
# y = output_data.values

# Calculate the number of sequences of length S that can be produced
num_sequences = x.shape[0] - (S + steps_ahead)

# Initialize an empty list to store the groups
x_inputs = []
x_time_inputs = []
y_outputs = []

# Iterate over the array to create groups
for i in range(num_sequences):
    input = x[i:i+S]
    time_input = x_time[i:i+S]
    # output = y[i+S]
    output = x[i+S+steps_ahead-1,0]
    x_inputs.append(input)
    y_outputs.append(output)
    x_time_inputs.append(time_input)

x_inputs = t.Tensor(np.array(x_inputs))
x_time_inputs = t.Tensor(np.array(x_time_inputs))
y_outputs = t.Tensor(np.array(y_outputs))
display(y_outputs.shape)
display(x_inputs.shape)

validation_size = int(np.floor(x_inputs.shape[0]*0.1))
# define train_loader from 90% of training data
train_dataset = t.utils.data.TensorDataset(x_inputs[:-validation_size], x_time_inputs[:-validation_size], y_outputs[:-validation_size])
train_loader = t.utils.data.DataLoader(train_dataset, batch_size=B, shuffle=False)

# define validation_loader from 10% of training data
validation_dataset = t.utils.data.TensorDataset(x_inputs[-validation_size:], x_time_inputs[-validation_size:], y_outputs[-validation_size:])
validation_loader = t.utils.data.DataLoader(validation_dataset, batch_size=B, shuffle=False)
# initial_validation_time = input_time.index[-validation_size]

torch.Size([45254])

torch.Size([45254, 336, 13])

In [10]:
for x_input, time_input, output in train_loader:
    print(x_input.shape)
    print(time_input.shape)
    print(output.shape)
    break

# Test formatting
y_index = np.random.randint(y_outputs.shape[0])

x_index = y_index + steps_ahead

display(train_loader.dataset[x_index][0][-1].numpy() * (input_max_vals-input_min_vals) + input_min_vals)
display(train_loader.dataset[y_index][2].numpy() * (input_max_vals.iloc[0]-input_min_vals.iloc[0]) + input_min_vals.iloc[0])

torch.Size([100, 336, 13])
torch.Size([100, 336, 3])
torch.Size([100])


Energy Demand (MWH)                     5.420000e+03
HourlyDryBulbTemperature                6.200000e+01
HourlyDewPointTemperature               6.100000e+01
HourlyWetBulbTemperature                6.100000e+01
HourlyStationPressure                   2.980000e+01
HourlyWindSpeed                         0.000000e+00
Energy Price (cents/KWH)                2.186000e+01
Civilian Noninstitutional Population    6.800103e+06
Labor Force Participation               6.000000e+01
CPI-U                                   3.092430e+02
HourlyPrecipitation_Light Rain          0.000000e+00
HourlyPrecipitation_Medium Rain         0.000000e+00
HourlyPrecipitation_None                1.000000e+00
dtype: float32

5420.0

In [11]:
import numpy as np

class LSTM(t.nn.Module):
    """
    Constructor for the LSTM class. Currently, the general architecture of this class cannot be adjusted from outside of it. 
        The only adjustable attributes are the network's input size, hidden state size, number of layers, output size, and dropout.
    """
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout:float = 0):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = t.nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.dropout = t.nn.Dropout(dropout)
        self.fc = t.nn.Sequential(
            t.nn.Dropout(dropout),
            t.nn.Linear(hidden_size, hidden_size),
            t.nn.Dropout(dropout),
            t.nn.Linear(hidden_size, output_size),
            t.nn.Softplus() 
        ) 
        self.c0 = None
        self.h0 = None

    """
    This method runs the network. It evaluates the network as a function to a batch of input samples with size 
        (sequence length x number of features). This means that all tensor arguments for this method should have 3 
        dimensions (batch size, sequence length, number of features)
    """
    def forward(self, x_observed, x_time, bayesian_predict:bool=True):
        if bayesian_predict: self.train() 
        else: self.eval()
        x = t.cat([x_observed, x_time], dim=-1)
        device = next(self.parameters()).device
        h0 = t.zeros(self.num_layers, x.size(0), self.hidden_size, device=device)
        c0 = t.zeros(self.num_layers, x.size(0), self.hidden_size, device=device)

        # Forward propagate LSTM
        out, (h0, c0) = self.lstm(x, (h0, c0))

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out
    
    """
    
    """
    def predict_variance(self, x_observed, x_time, n:int):
        x_observed = x_observed.repeat(n)
        x_time = x_time.repeat(n)
        predictions = self.forward(x_observed, x_time, bayesian_predict=True)
        return t.std(predictions).item()

        


# Example usage:
display(train_loader.dataset[0][0].shape)
input_size = train_loader.dataset[0][0].shape[-1] + train_loader.dataset[0][1].shape[-1]
hidden_size = 32  # Number of LSTM units (hidden size)
num_layers = 1  # Number of LSTM layers
output_size = 1 #train_loader.dataset[0][2].shape[-1]
seq_length = S  # Length of input sequence (assuming hourly data)

# Forward pass
model = LSTM(input_size, hidden_size, num_layers, output_size)
output = model(train_loader.dataset[0][0][None,:,:], train_loader.dataset[0][1][None,:,:])
print("Output shape:", output.shape)

torch.Size([336, 13])

Output shape: torch.Size([1, 1])


Train

In [12]:
# set device
device = "cuda"
# device = "cpu"
# Initialize the LSTM model
model = LSTM(input_size, hidden_size, num_layers, output_size, dropout=0.1).to(device=device)

# pull out last 10% of data to use for early stopping validation

# define optimizer
# criterion = t.nn.MSELoss(reduction="none")
criterion = t.nn.MSELoss()
optimizer = t.optim.Adam(model.parameters(), lr=0.0005, weight_decay=0)
# weights = t.Tensor([1]+[0.1]*(train_loader.dataset[0][2].shape[-1]-1)).to(device=device)

best_val_loss = np.inf
patience = 5
best_model_state = None
counter = 0
loss_scalar = 1000
for epoch in range(100):
    # model.train()
    # for b in range(x.shape[0]):
    losses = []
    for b, (inputs, time_inputs, targets) in enumerate(train_loader):
        inputs = inputs.to(device=device)
        time_inputs = time_inputs.to(device=device)
        targets = targets.to(device=device)
        # print(inputs.shape)
        # print(time_inputs.shape)
        # print(targets.shape)
        # break
        # outputs = model(x_device[b])
        outputs = model(inputs, time_inputs)[:,0]
        # print(outputs.shape)
        # break
        optimizer.zero_grad()
        loss = criterion(targets, outputs) * loss_scalar
        
        # loss = loss * weights[None,:]
        # loss = t.mean(loss)
        # print(loss.item())
        losses.append(loss.item())
        # loss = criterion(y_gpu[b], outputs)
        loss.backward()
        # Print gradients of model parameters
        # if (epoch % 5 == 0) & (b == 0):
        #     for name, param in model.named_parameters():
        #         if param.grad is not None:
        #             print(f"Gradient of {name}:")
        #             print(param.grad)
        #             print(param)
        optimizer.step()
    losses = np.mean(losses)
    print("Epoch {}, Training Loss: {}".format(epoch+1, losses.item()))

    # here implement early stopping using validation data
    with t.no_grad():
        # model.eval()
        val_loss = []
        for b, (inputs, time_inputs, targets) in enumerate(validation_loader):
            inputs = inputs.to(device=device)
            time_inputs = time_inputs.to(device=device)
            # print(inputs.shape)
            # print(time_inputs.shape)
            targets = targets.to(device=device)
            outputs = model(inputs, time_inputs, bayesian_predict=False)[:,0]
            loss = criterion(targets, outputs) * loss_scalar
            # loss = loss * weights[None,:]
            # loss = t.mean(loss)
            val_loss.append(loss.item())
        val_loss = np.mean(val_loss)
        print("Epoch {}, Validation Loss: {}".format(epoch+1, val_loss.item()))

    # Check for improvement in validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
        # Save the best model state
        best_model_state = model.state_dict()
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping!")
            break

# Restore the best performing model
if best_model_state is not None:
    model.load_state_dict(best_model_state)


Epoch 1, Training Loss: 33.28500431542303
Epoch 1, Validation Loss: 25.241794233736783
Epoch 2, Training Loss: 27.374032475200355
Epoch 2, Validation Loss: 19.791476757630058
Epoch 3, Training Loss: 17.961743388690202
Epoch 3, Validation Loss: 10.177632072697515
Epoch 4, Training Loss: 12.368791257341703
Epoch 4, Validation Loss: 7.382863879203796
Epoch 5, Training Loss: 10.452091115362505
Epoch 5, Validation Loss: 6.488255588904671
Epoch 6, Training Loss: 9.266100761352801
Epoch 6, Validation Loss: 6.066049257050389
Epoch 7, Training Loss: 8.38611062513847
Epoch 7, Validation Loss: 5.760863348193791
Epoch 8, Training Loss: 7.653036782116282
Epoch 8, Validation Loss: 5.507841120595518
Epoch 9, Training Loss: 7.090192756524273
Epoch 9, Validation Loss: 5.166304870792057
Epoch 10, Training Loss: 6.510203619505845
Epoch 10, Validation Loss: 4.8898177250571875
Epoch 11, Training Loss: 6.108459731673493
Epoch 11, Validation Loss: 4.640707597784374
Epoch 12, Training Loss: 5.969558026130293


Evaluate on training data. Initially, we want the model to be able to overfit the training data.

In [13]:
model = model.to(device=device)
predictions = []
with t.no_grad():
    losses = []
    for b, (inputs, time_inputs, targets) in enumerate(train_loader):
        inputs = inputs.to(device=device)
        time_inputs = time_inputs.to(device=device)
        targets = targets.to(device=device)
        outputs = model(inputs, time_inputs, bayesian_predict=False)[:,0]
        predictions.append(outputs.cpu().numpy())
        loss = criterion(targets, outputs) * 1000
        # loss = loss * weights[None,:]
        # loss = t.mean(loss)
        losses.append(loss.item())
    losses = np.mean(losses)
    predictions = np.concatenate(predictions, axis=0)
    print("Training Loss: {}".format(losses.item()))

Training Loss: 3.0881037998433207


In [14]:
# plot training data. Trying to troubleshoot validation.
display(predictions.shape)
dependent_variable = "Energy Demand (MWH)"
prediction_data = (predictions * (input_max_vals[dependent_variable]-input_min_vals[dependent_variable]) + input_min_vals[dependent_variable]).flatten()
# prediction_data = (predictions[:,0] * (output_max_vals[dependent_variable]-output_min_vals[dependent_variable]) + output_min_vals[dependent_variable]).flatten()
display(prediction_data)

train_data = t.cat([batch[2] for batch in train_loader])
display(train_data.shape)
train_data = (train_data.cpu().numpy() * (input_max_vals[dependent_variable]-input_min_vals[dependent_variable]) + input_min_vals[dependent_variable]).flatten()
display(train_data)

trace1 = go.Scatter(x = np.array(range(0, train_data.shape[0])), y=train_data, mode="lines")
trace2 = go.Scatter(x = np.array(range(0, prediction_data.shape[0])), y=prediction_data, mode="lines")

fig = go.Figure([trace1, trace2])
fig.show()

(40729,)

array([8215.323 , 8048.9497, 7904.056 , ..., 5502.26  , 5303.7188,
       5014.2246], dtype=float32)

torch.Size([40729])

array([7960., 7525., 7188., ..., 5870., 5730., 5485.], dtype=float32)

Evaluate on validation data.

In [15]:
model = model.to(device=device)
predictions = []
with t.no_grad():
    losses = []
    for b, (inputs, time_inputs, targets) in enumerate(validation_loader):
        inputs = inputs.to(device=device)
        time_inputs = time_inputs.to(device=device)
        targets = targets.to(device=device)
        outputs = model(inputs, time_inputs, bayesian_predict=False)[:,0]
        predictions.append(outputs.cpu().numpy())
        loss = criterion(targets, outputs) * 1000
        # loss = loss * weights[None,:]
        loss = t.mean(loss)
        losses.append(loss.item())
    losses = np.mean(losses)
    predictions = np.concatenate(predictions, axis=0)
    print("Validation Loss: {}".format(losses.item()))

Validation Loss: 3.4276903252238813


In [16]:
# tensor_dataset = torch.cat([batch for batch in data_loader])
display(predictions.shape)
dependent_variable = "Energy Demand (MWH)"
prediction_data = (predictions * (input_max_vals[dependent_variable]-input_min_vals[dependent_variable]) + input_min_vals[dependent_variable]).flatten()
display(prediction_data)

val_data = t.cat([batch[2] for batch in validation_loader])
val_data = (val_data.cpu().numpy() * (input_max_vals[dependent_variable]-input_min_vals[dependent_variable]) + input_min_vals[dependent_variable]).flatten()
display(val_data)

trace1 = go.Scatter(x = np.array(range(0, val_data.shape[0])), y=val_data, mode="lines")
trace2 = go.Scatter(x = np.array(range(0, prediction_data.shape[0])), y=prediction_data, mode="lines")

fig = go.Figure([trace1, trace2])
fig.show()

(4525,)

array([4714.649 , 4461.3145, 4268.914 , ..., 5586.9556, 5659.0435,
       5704.0273], dtype=float32)

array([5244., 5030., 4867., ..., 5895., 6011., 6096.], dtype=float32)

In [18]:
prediction_sets = []
for n in range(100):
    predictions = []
    with t.no_grad():
        losses = []
        for b, (inputs, time_inputs, targets) in enumerate(validation_loader):
            inputs = inputs.to(device=device)
            time_inputs = time_inputs.to(device=device)
            targets = targets.to(device=device)
            outputs = model(inputs, time_inputs, bayesian_predict=True)[:,0]
            predictions.append(outputs.cpu().numpy())
        predictions = np.concatenate(predictions, axis=0)
    prediction_sets.append(predictions)

In [19]:
test = np.array(prediction_sets)
mean = np.mean(test, axis=0)
std = np.std(test, axis=0)
display(test.shape)
display(mean.shape)

(100, 4525)

(4525,)

In [20]:
# Combine error forecasts

# 1.96 is the z-value for the 2.5th percentile of the standard normal distribution
lower_cuts = mean - 1.96 * std  
upper_cuts = mean + 1.96 * std

x = np.array(range(len(mean)))
# Create figure showing forecasts and actual values
long_term_fig = go.Figure()
long_term_fig.add_trace(go.Scatter(x=x, y=mean, mode='lines', 
    name='Point Forecasts', line=dict(color='rgba(255, 0, 0)')))
# long_term_fig.add_trace(go.Scatter(x=evaluation_data.index, y=actual_values, mode='lines', 
#     name='Ground Truth', line=dict(color='rgba(0, 0, 255)')))
long_term_fig.add_trace(go.Scatter(x=x, y=upper_cuts, mode='lines', 
    name='Upper 95 CI', line=dict(color='rgba(0, 255, 0, 0.2)')))
long_term_fig.add_trace(go.Scatter(x=x, y=lower_cuts, mode='lines', 
    name='Lower 95 CI', line=dict(color='rgba(200, 125, 200, 0.2)')))

# Customize the layout
long_term_fig.update_layout(
    title="Long-term Evaluation of Forecasting Pipeline with Holdout Data",
    xaxis_title='Time',
    yaxis_title='Energy Demand (MWH)',
    template='plotly_dark' 
)

Hyperparameter Tune LSTM

In [None]:
# define hyperparameter sets
hyperparameter_sets = dict(lr=[0.001, 0.0005, 0.0001], batch_size=[100],
    hidden_size=[16,32,64], num_layers=[1,2], dropout=[0.01,0.1,0.2,0.3])

# Generate all possible combinations of values
keys = hyperparameter_sets.keys()
values = hyperparameter_sets.values()
combinations = itertools.product(*values)

# Create a list of dictionaries with all combinations
hyperparameter_sets = [dict(zip(keys, vals)) for vals in combinations]

# run grid-search hyperparameter tuning using 5-fold rolling cross-validation
hyperparameter_results = model.tune_hyperparameters(clean_training_data=clean_data, 
    dependent_variable="Energy Demand (MWH)", hyperparameter_sets=hyperparameter_sets, num_cv_folds=5)

In [6]:
best_lstm_hyperparameters = {"batch_size":100, "lr":0.0005, "dropout":0.1, "num_layers":1, "hidden_size":32, "sequence_length":24*7*3}

Cross Validation. Want to use rolling cross validation.

In [78]:
f = md.Forecaster()
# model, validation_inputs, validation_time_inputs, validation_targets = f.cross_validate_lstm(5, clean_training_data=clean_data, dependent_variable="Energy Demand (MWH)", device="cuda", sequence_length=24*7*3, forecasting_steps_ahead=24, lstm_hyperparameters=best_lstm_hyperparameters, verbose_validation=True, verbose_training=False)
# model, relative_confidence, point_forecasts, test_data = f.cross_validate_lstm(5, clean_training_data=clean_data, dependent_variable="Energy Demand (MWH)", device="cuda", forecasting_steps_ahead=24, lstm_hyperparameters=best_lstm_hyperparameters, verbose_validation=True, verbose_training=False)
avg_mse, avg_wmse = f.cross_validate_lstm(5, clean_training_data=clean_data, dependent_variable="Energy Demand (MWH)", device="cuda", forecasting_steps_ahead=24, lstm_hyperparameters=best_lstm_hyperparameters, verbose_validation=True, verbose_training=False)
# test_data = f.cross_validate_lstm(5, clean_training_data=clean_data, dependent_variable="Energy Demand (MWH)", device="cuda", forecasting_steps_ahead=24, lstm_hyperparameters=best_lstm_hyperparameters, verbose_validation=True, verbose_training=False)


Training Model for CV Fold 1 out of 5.
Validation Fold 1 out of 5. MSE: 2355191.6865989566, WMSE: 2355217.804640438.
Training Model for CV Fold 2 out of 5.
Validation Fold 2 out of 5. MSE: 1726503.0857673357, WMSE: 1726279.6307988367.
Training Model for CV Fold 3 out of 5.
Validation Fold 3 out of 5. MSE: 651318.4364938369, WMSE: 651241.8465901187.
Training Model for CV Fold 4 out of 5.
Validation Fold 4 out of 5. MSE: 441654.4263317488, WMSE: 441619.6803457872.
Training Model for CV Fold 5 out of 5.
Validation Fold 5 out of 5. MSE: 500273.8247376779, WMSE: 500245.6533802902.


In [79]:
print(avg_mse)
print(avg_wmse)

1134988.2919859113
1134920.9231510942


Test functions

In [22]:
lstm_device = "cuda"
f = md.Forecaster(short_term_horizon=24)
lstm = f.format_fit_lstm(clean_data, proportion_validation=0.1, lstm_device=lstm_device, **best_lstm_hyperparameters)

Average Parameter Absolute Value: 0.08891
Average Gradient Absolute Value: 1.30949
Epoch 1, Training Loss: 37.807886040269445
Epoch 1, Validation Loss: 27.798140650210172
Average Parameter Absolute Value: 0.09127
Average Gradient Absolute Value: 1.28848
Epoch 2, Training Loss: 31.04689477934626
Epoch 2, Validation Loss: 23.72108421118363
Average Parameter Absolute Value: 0.09245
Average Gradient Absolute Value: 1.16348
Epoch 3, Training Loss: 23.896932852679285
Epoch 3, Validation Loss: 15.496276331984479
Average Parameter Absolute Value: 0.09413
Average Gradient Absolute Value: 1.15609
Epoch 4, Training Loss: 14.850872492555327
Epoch 4, Validation Loss: 8.897569239139557
Average Parameter Absolute Value: 0.09586
Average Gradient Absolute Value: 0.73228
Epoch 5, Training Loss: 11.132658765821034
Epoch 5, Validation Loss: 7.4515281060467595
Average Parameter Absolute Value: 0.09755
Average Gradient Absolute Value: 0.55394
Epoch 6, Training Loss: 9.648099548710977
Epoch 6, Validation Los

In [None]:
lstm_device = "cuda"
f = md.Forecaster()
train_loader, val_loader, input_scaling, input_time_scaling = f.format_lstm_data(clean_data, forecasting_steps_ahead=24, proportion_validation=0.1, **best_lstm_hyperparameters)
lstm = md.LSTM(input_size=train_loader.dataset[0][0].shape[-1]+train_loader.dataset[0][1].shape[-1], output_size=1, **best_lstm_hyperparameters).to(device=lstm_device)
f.fit_lstm(lstm, train_loader=train_loader, val_loader=val_loader, input_scaling=input_scaling, input_time_scaling=input_time_scaling, device=lstm_device, verbose=True)

In [89]:
f = md.Forecaster()
train_loader, val_loader, input_scaling, input_time_scaling = f.format_lstm_data(clean_data, sequence_length=24*7*3, batch_size=100, forecasting_steps_ahead=24, proportion_validation=0.1)
lstm = md.LSTM(input_size=train_loader.dataset[0][0].shape[-1]+train_loader.dataset[0][1].shape[-1], hidden_size=32, num_layers=1, output_size=1, training_sequence_length=24*7*3, dropout=0.01).to(device="cuda")
f.fit_lstm(lstm, train_loader=train_loader, val_loader=val_loader, input_scaling=input_scaling, input_time_scaling=input_time_scaling, device="cuda", verbose=True, patience=1)

Average Parameter Absolute Value: 0.08868
Average Gradient Absolute Value: 1.68306
Epoch 1, Training Loss: 36.51101412326832
Epoch 1, Validation Loss: 26.986047236815743
Average Parameter Absolute Value: 0.09129
Average Gradient Absolute Value: 1.27498
Epoch 2, Training Loss: 27.335000308276395
Epoch 2, Validation Loss: 22.081485385480136
Average Parameter Absolute Value: 0.09288
Average Gradient Absolute Value: 1.38076
Epoch 3, Training Loss: 17.466644768057197
Epoch 3, Validation Loss: 12.369698122791622
Average Parameter Absolute Value: 0.09486
Average Gradient Absolute Value: 0.71545
Epoch 4, Training Loss: 12.419565059281336
Epoch 4, Validation Loss: 10.560663611992545
Average Parameter Absolute Value: 0.09619
Average Gradient Absolute Value: 0.14031
Epoch 5, Training Loss: 10.579024640797394
Epoch 5, Validation Loss: 10.264653251222942
Average Parameter Absolute Value: 0.09754
Average Gradient Absolute Value: 0.02231
Epoch 6, Training Loss: 9.629763339247022
Epoch 6, Validation L

In [24]:
# HERE
test = f.short_term_predict(clean_data, clean_data.shape[0]-(best_lstm_hyperparameters["sequence_length"]+f.short_term_horizon), best_lstm_hyperparameters["sequence_length"])

ValueError: ('Lengths must match to compare', (12,), (11,))

In [None]:
# TODO
# add capability to add L1 term when training LSTM (that way the results of the permutation analysis are more reliable)
# write code to conduct permutation analysis
# write code to conduct hyperparameter tuning for lstm
# write code to evaluate Forecaster class on short term forecasts (using EIA)

# All the unit tests.

Define validation function

Rolling Cross Validation