In [16]:
%load_ext autoreload 
%autoreload 2

In [28]:
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import r2_score, mean_absolute_error
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import random
from pathlib import Path
import os
from datetime import datetime
from sequence_models_utility import get_num_examples_in_data, load_and_preprocess_sequence_data, \
                                    train_model_with_params_batched, mse_loss_l1_coeff_reg, mse_loss_l2_coeff_reg

## Parameters

In [23]:
data_file = os.path.join(os.getcwd(), "CAISO_Data_2019_2021_NN.csv")
work_dir = os.path.join(os.getcwd(), "lstm_models")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
device

device(type='cuda')

Take a look at split of data amongst train/val/test sets depending on train/val/test-block-sizes and sequence length

In [39]:
feature_cols = feature_cols = ['Load', 'VRE', 'Hour', 'Day_of_Year']
days_in_train_block = 6
days_in_val_block = 2
days_in_test_block = 2
seq_len = 4
data_sets = load_and_preprocess_sequence_data(data_file, feature_cols, days_in_train_block, 
                                              days_in_val_block, days_in_test_block, seq_len, device)
print(f"After splitting data to blocks of example sequences...")
total_examples = sum(len(data["X"]) for data in data_sets.values())
for set_name, data in data_sets.items():
    print(f"Number of example sequences in {set_name} set: {len(data['X'])} ({100*len(data['X'])/total_examples:.1f}%)")

After splitting data to blocks of example sequences...
Number of example sequences in train set: 15158 (60.5%)
Number of example sequences in val set: 4944 (19.7%)
Number of example sequences in test set: 4935 (19.7%)


In [11]:
data_sets["train"]["y"].shape

torch.Size([15158, 4])

In [12]:
data_sets["train"]["X"].shape

torch.Size([15158, 4, 4])

## define model

In [19]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size,
                 final_point_only, num_layers=1, dropout=0, debug=False):
        super().__init__()
        self.final_point_only = final_point_only
        self.debug = debug
        self.num_layers = num_layers
        lstm_layers = [nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)]
        lstm_layers += [nn.LSTM(input_size=hidden_size, hidden_size=hidden_size,
                            batch_first=True) for l in range(num_layers - 1)] # subsequent layers take hidden dims as input
        self.lstm_layers = nn.ModuleList(lstm_layers)
        self.drop_layers = nn.ModuleList([nn.Dropout(dropout) for l in range(num_layers)])
        self.bn_layers = nn.ModuleList([nn.BatchNorm1d(hidden_size) for l in range(num_layers)])
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input_seq):
        for l in range(self.num_layers):
            lstm_out, (hn, cn) = self.lstm_layers[l](input_seq)
            lstm_out = self.bn_layers[l](lstm_out.permute(0,2,1)) # batch x hidden x seq_len is needed as input
            lstm_out = lstm_out.permute(0, 2, 1) # switch hidden and seq_len dims back
            lstm_out = self.drop_layers[l](lstm_out)
            input_seq = lstm_out
        if self.final_point_only:
            lstm_out = torch.unsqueeze(lstm_out[:,-1,:], 1) # batch x seq_len x hidden
        fc_out = self.fc(lstm_out)
        if self.debug:
            print(f"lstm_out: {lstm_out}")
            print(f"hn: {hn}")
            print(f"cn: {cn}")
            print(f"self.fc: {self.fc}")
            print(f"fc_out: {fc_out}")
        return fc_out


## Model Training / Hyperparam Tuning

In [40]:
# data settings
feature_cols = ['Load', 'VRE', 'Hour', 'Day_of_Year']
days_in_train_block = 6
days_in_val_block = 2
days_in_test_block = 2
seq_len = 4

# model settings
input_size = len(feature_cols)
hidden_size = 8
output_size = 3  # 2 for no intercept term, 3 to include an intercept term
final_point_only = True
num_layers = 2
dropout = .5
model = LSTM(input_size, hidden_size, output_size, final_point_only, num_layers, dropout)
model.to(device)

# optimizer settings
learning_rate = .003
weight_decay = .01

# loss function settings
loss_function = mse_loss_l1_coeff_reg
MEF_reg_weight, MDF_reg_weight = 1e7, 1e7

# training settings
model_dir_prefix = f"predict_{'final' if final_point_only else 'all'}_in_seq4_BNs"
batch_size = 2048
epochs = 500 #1500
min_save_r2 = .7 #.86
max_save_mae = 200000 #150000
print_freq = 100 # 50


# Load and preprocess data
data_sets = load_and_preprocess_sequence_data(data_file, feature_cols, days_in_train_block, 
                                              days_in_val_block, days_in_test_block, seq_len, device)

model_dir = os.path.join(work_dir, model_dir_prefix)

model_settings = {"input_size": input_size,
                  "hidden_size": hidden_size,
                  "output_size": output_size,
                  "final_point_only": final_point_only,
                  "num_layers": num_layers,
                  "dropout": dropout}
data_settings = {"feature_cols": feature_cols,
                 "days_in_train_block": days_in_train_block,
                 "days_in_val_block": days_in_val_block,
                 "days_in_test_block": days_in_test_block,
                 "seq_len": seq_len}

# evaluate
save_model_path = train_model_with_params_batched(data_sets, data_settings, # data
                            model, model_settings,  # model settings
                            learning_rate, weight_decay,  # optimizer settings
                            loss_function, MEF_reg_weight, MDF_reg_weight,  # loss function settings
                            model_dir, batch_size, epochs, print_freq, min_save_r2, max_save_mae)  # train settings

Model Settings:
	input_size=4
	hidden_size=8
	output_size=3
	final_point_only=True
	num_layers=2
	dropout=0.5
Model Architecture:
LSTM(
  (lstm_layers): ModuleList(
    (0): LSTM(4, 8, batch_first=True)
    (1): LSTM(8, 8, batch_first=True)
  )
  (drop_layers): ModuleList(
    (0): Dropout(p=0.5, inplace=False)
    (1): Dropout(p=0.5, inplace=False)
  )
  (bn_layers): ModuleList(
    (0): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (fc): Linear(in_features=8, out_features=3, bias=True)
)
Data Settings:
	feature_cols=['Load', 'VRE', 'Hour', 'Day_of_Year']
	days_in_train_block=6
	days_in_val_block=2
	days_in_test_block=2
	seq_len=4
Optimizer Settings:
	learning_rate=0.003
	weight_decay=0.01
Loss Function Settings:
	loss_function=<function mse_loss_l1_coeff_reg at 0x0000016C9332BF70>
	MEF_reg_weight=10000000.0
	MDF_reg_weight=10000000.0
Train Process Settings:
	

  0%|          | 0/500 [00:00<?, ?it/s]

[Epoch 0]
	Train Set: Loss=3.494e+12, R Squared=0.0007, Invalid MEFs=0, Invalid MDFs=13744
	Val Set: Loss=1.362e+12, R Squared=0.0007, Invalid MEFs=0, Invalid MDFs=4553


KeyboardInterrupt: 

## Inference on all data

In [102]:
## below we can load specific models instead of best one found in most recent experiment
model_path= "drive/MyDrive/NN_MEFs/lstm_models/predict_all_in_seq8/2022_12_08-12:36:39_AM/epoch=998,r2=0.8741,Invalids=0.pth"

train_set, val_set = data_sets["train"], data_sets["val"]      
train_X, train_bottleneck_X, train_y = train_set["X"], train_set["bottleneck_X"], train_set["y"]
val_X, val_bottleneck_X, val_y = val_set["X"], val_set["bottleneck_X"], val_set["y"]
input_size = train_X.shape[-1]
output_size = 3  # 3 means we have an intercept
hidden_size = 32
final_point_only = False
num_layers = 1
dropout = 0

model = LSTM(input_size, hidden_size, output_size, final_point_only, num_layers, dropout)
model.to(device)
model.load_state_dict(torch.load(model_path))
model.eval()

train_pred_coeff = model(train_X.float())
val_pred_coeff = model(val_X.float())


In [103]:
train_y_pred = get_y_pred(train_pred_coeff, train_bottleneck_X)

In [112]:
train_y_pred.shape

torch.Size([14443, 8])

In [113]:
r2_score(train_y.cpu().detach().numpy(), train_y_pred.cpu().detach().numpy())

0.8834384493448952

In [116]:
[r2_score(y, y_pred) for y, y_pred in \
 zip(train_y.permute(1,0).cpu().detach().numpy(), train_y_pred.permute(1,0).cpu().detach().numpy())]

[0.8529343380942997,
 0.8696363903200108,
 0.8795923203147114,
 0.8862990346813329,
 0.8909820117950451,
 0.8939545501303765,
 0.8963344742449033,
 0.897774475458195]

In [117]:
val_y_pred = get_y_pred(val_pred_coeff, val_bottleneck_X)
print(f"overall val r2: {r2_score(val_y.cpu().detach().numpy(), val_y_pred.cpu().detach().numpy())}")
print("val r2 by sequence position:")
print([r2_score(y, y_pred) for y, y_pred in \
 zip(val_y.permute(1,0).cpu().detach().numpy(), val_y_pred.permute(1,0).cpu().detach().numpy())])

overall val r2: 0.8741031307517685
val r2 by sequence position:
[0.8568319386070357, 0.8682907729195138, 0.8745469021399369, 0.8777426423604695, 0.8794796853834416, 0.8781586552040923, 0.8787055918233209, 0.8790688571549825]


In [None]:
hidden_dims = [512,256]
bias_term = True
dropout_p = 0.5
n_input = train_x.shape[1]


model = get_model(n_input, hidden_dims, n_out, dropout_p)
model.to(device)
model.load_state_dict(torch.load(best_model_path))
model.eval()

train_pred_coeff = model(train_x.float()).cpu()
val_pred_coeff = model(val_x.float()).cpu()
test_pred_coeff = model(test_x.float()).cpu()

print("R Squared:")
print(f"\tTrain: {get_r_squared(train_pred_coeff, CAISO_train, bias_term):.4f}")
print(f"\tVal: {get_r_squared(val_pred_coeff, CAISO_val, bias_term):.4f}")
print(f"\tTest: {get_r_squared(test_pred_coeff, CAISO_test, bias_term):.4f}")
print("Mean Absolute Error:")
print(f"\tTrain: {get_mean_abs_err(train_pred_coeff, CAISO_train, bias_term):.2f}")
print(f"\tVal: {get_mean_abs_err(val_pred_coeff, CAISO_val, bias_term):.2f}")
print(f"\tTest: {get_mean_abs_err(test_pred_coeff, CAISO_test, bias_term):.2f}")
print("Count Invalid Values Predicted:")
invalid_train_MEFs, invalid_train_MDFs = get_count_invalid_preds(train_pred_coeff)
invalid_val_MEFs, invalid_val_MDFs = get_count_invalid_preds(val_pred_coeff)
invalid_test_MEFs, invalid_test_MDFs = get_count_invalid_preds(test_pred_coeff)
print(f"\tTrain: Invalid MEFs={invalid_train_MEFs}, Invalid MDFs={invalid_train_MDFs}")
print(f"\tVal: Invalid MEFs={invalid_val_MEFs}, Invalid MDFs={invalid_val_MDFs}")
print(f"\tTest: Invalid MEFs={invalid_test_MEFs}, Invalid MDFs={invalid_test_MDFs}")


R Squared:
	Train: 0.8921
	Val: 0.8805
	Test: 0.8755
Mean Absolute Error:
	Train: 131028.76
	Val: 140016.71
	Test: 139890.26
Count Invalid Values Predicted:
	Train: Invalid MEFs=0, Invalid MDFs=0
	Val: Invalid MEFs=0, Invalid MDFs=0
	Test: Invalid MEFs=0, Invalid MDFs=0


### Put the MEFs and MDFs from all sets back together and in order into the original DF for viewing

In [None]:
all_preds_w_timestamps = list(zip(CAISO_val.index, val_pred_coeff.detach().numpy())) \
                        + list(zip(CAISO_train.index, train_pred_coeff.detach().numpy())) \
                        + list(zip(CAISO_test.index, test_pred_coeff.detach().numpy()))
all_preds_w_timestamps.sort(key=lambda pair: pair[0])
all_preds_ordered = np.array([pair[1] for pair in all_preds_w_timestamps])

In [None]:
all_MEFs_ordered = all_preds_ordered[:,0]
all_MDFs_ordered = all_preds_ordered[:,1]
all_intercepts_ordered = all_preds_ordered[:,2]

In [None]:
CAISO_Data.loc[:,"MEF"] = all_MEFs_ordered
CAISO_Data.loc[:,"MDF"] = all_MDFs_ordered
if bias_term:
    CAISO_Data.loc[:,"Intercept"] = all_intercepts_ordered

#calculate some error stuff. rn i am thinking R2 is the best measure of error
d_emissions = CAISO_Data.loc[:,'MEF'] * CAISO_Data.loc[:,'delta_Load'] \
            + CAISO_Data.loc[:,'MDF'] * CAISO_Data.loc[:,'delta_VRE']
if bias_term:
    d_emissions += CAISO_Data.loc[:,"Intercept"]
CAISO_Data.loc[:,'Predicted_delta_Total_CO2_Emissions'] = d_emissions
CAISO_Data.loc[:,'Error']=CAISO_Data.loc[:,'Predicted_delta_Total_CO2_Emissions']-CAISO_Data.loc[:,'delta_Total_CO2_Emissions']
CAISO_Data.loc[:,'%_Error']=np.abs(CAISO_Data.loc[:,'Error'])/np.abs(CAISO_Data.loc[:,'delta_Total_CO2_Emissions'])
print("Whole Data Set:")
print(f"\tMean Emissions Change = {np.mean(np.abs(CAISO_Data['delta_Total_CO2_Emissions'])):.2f}")
print(f"\tR Squared = {r2_score(CAISO_Data['delta_Total_CO2_Emissions'], CAISO_Data['Predicted_delta_Total_CO2_Emissions']):.4f}")
print(f"\tMean Absolute Error = {mean_absolute_error(CAISO_Data['delta_Total_CO2_Emissions'], CAISO_Data['Predicted_delta_Total_CO2_Emissions']):.2f}")

Whole Data Set:
	Mean Emissions Change = 413165.73
	R Squared = 0.8864
	Mean Absolute Error = 134598.78


In [None]:
CAISO_Data.head()

Unnamed: 0,Load,Net Load,Total_CO2_Emissions,Total_SO2_Emissions,Total_NOX_Emissions,VRE,delta_Load,delta_Net_Load,delta_Total_CO2_Emissions,delta_Total_SO2_Emissions,...,Day_of_Week=3,Day_of_Week=4,Day_of_Week=5,Day_of_Week=6,MEF,MDF,Intercept,Predicted_delta_Total_CO2_Emissions,Error,%_Error
2019-01-01 00:00:00,22822.964472,20502.358502,5103942.0,425.327933,1632.821698,2320.593616,-1285.054865,-1255.110267,-337029.794143,-24.14218,...,False,False,False,False,370.729431,-350.03064,6396.021973,-459526.000872,-122496.206729,0.363458
2019-01-01 01:00:00,21879.620618,19606.836908,4867578.0,404.315852,1557.650531,2272.780097,-944.689268,-896.922625,-243021.8337,-21.594332,...,False,False,False,False,345.051025,-323.291504,6071.847168,-304454.710093,-61432.876394,0.252787
2019-01-01 02:00:00,21257.45402,19056.267637,4723101.0,383.695714,1496.197481,2201.182455,-614.64102,-545.206677,-144846.797503,-20.952957,...,False,False,False,False,323.416351,-301.258514,5678.250977,-172189.482236,-27342.684733,0.18877
2019-01-01 03:00:00,20974.800758,18871.418601,4693112.0,380.561848,1466.329836,2103.388502,-281.391674,-191.565227,-24776.569759,-2.164379,...,False,False,False,False,305.443024,-282.996887,5341.07666,-55190.639304,-30414.069545,1.227534
2019-01-01 04:00:00,20327.083333,18012.666667,5032423.0,711.911968,2391.65787,2314.666667,30.416667,74.416667,49254.136541,69.703951,...,False,False,False,False,303.949066,-272.05777,5023.662109,26216.649928,-23037.486613,0.467727


In [None]:
CAISO_Data.to_csv(f"{best_model_dir}/CAISO_Data_2019_2021_NN_Ts.with_coeff_preds.csv")

In [None]:
len([val for val in CAISO_Data.loc[:,"MEF"] if val <=0])

0

In [None]:
len([val for val in CAISO_Data.loc[:,"MEF"] if val >600])

53

In [None]:
CAISO_Data.loc[:,"MEF"].max()

719.43835