In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
from utils_data_reader import get_datasets_pytorch, prep_data_range, move_alt_axis_for_numpy
from utils_plot import plot_rho_24, get_rho_at_date
from ipynb.fs.full.model import Encoder, Decoder, Autoencoder # type: ignore
from utils_optim import CustomLoss, EarlyStopping, numpy_custom_loss


# Notes

This file is written to save .npy data for the outpus of NN. The .npy will be used in the next script. hw1_c_dicussion.ipynb.

In [6]:
# Pytorch's Dataset objects. By default, reading in log10
train, val, test, original_trainset= get_datasets_pytorch(train_ds=[2011, 2012, 2013, 2014, 2015, 2016], test_ds=[2017], train_ratio=0.8, shuffle=True)


class StandardScaler():
    def __init__(self, data) -> None:
        self.mean = np.mean(data)
        self.std = np.std(data)
    
    def transform(self, data):
        data = (data - self.mean) / self.std
        self.min = np.min(data)
        self.max = np.max(data)
        data = (data - self.min) / (self.max - self.min)
        return data
    
    def inverse_transform(self, data):
        data = data * (self.max - self.min) + self.min
        data = data * self.std + self.mean
        return data

ss = StandardScaler(original_trainset)

# # # Now, all NN's input is scaled to [0, 1]
# train.data = ss.transform(train.data)
# val.data = ss.transform(val.data)
test.data = ss.transform(test.data)


Preparing training data...
Loading data for year 2011
Loading data for year 2012
Loading data for year 2013
Loading data for year 2014
Loading data for year 2015
Loading data for year 2016
Concatenating data...
Preparing test data...
Loading data for year 2017
Concatenating data...
Train Set Shape: (41972, 36, 20, 24)
Val Set Shape:  (10492, 36, 20, 24)
Test Set Shape: (8736, 36, 20, 24)


In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Create instances of the encoder and decoder for this cell in the nb.
encoder = Encoder()
decoder = Decoder()
autoencoder = Autoencoder(encoder, decoder)

# double precision
double_precision = False

if double_precision:
    autoencoder = autoencoder.double()

# Define the loss function and optimizer
# criterion = nn.MSELoss()
criterion = CustomLoss(alpha=0.9995122197766073, epsillon=1e-6)

# instantiate the model
encoder = Encoder()
decoder = Decoder()
autoencoder = Autoencoder(encoder, decoder)
# test now

testLoader = DataLoader(test, batch_size=64)
# load weight from the last
autoencoder.load_state_dict(torch.load(f'./weights/autoencoder_best3.pth'))
nn.DataParallel(autoencoder).to(device)


autoencoder.eval()
test_loss = 0.0
with torch.no_grad():
    for data in testLoader:
        inputs = data.to(device) if double_precision else data.float().to(device)
        outputs = autoencoder(inputs)
        loss = criterion(outputs, inputs)
        test_loss += loss.item() * inputs.size(0)

    test_loss = test_loss / len(testLoader.dataset)

print(f"Test Loss: {test_loss:.4f}")
        
# get all outputs as numpy
outputs = []
with torch.no_grad():
    for data in testLoader:
        inputs = data.to(device) if double_precision else data.float().to(device)
        outputs.append(autoencoder(inputs).cpu().numpy())

nn_prediction = np.concatenate(outputs, axis=0)   

Test Loss: 0.0011


In [8]:
# show loss in NN's ditribution
mse, mape, total = numpy_custom_loss(nn_prediction, test.data)
np.save('pr_2017_nn.npy', nn_prediction)
np.save('gt_2017_nn.npy', test.data)
print("Loss in NN's distribution")
print(f"MSE: {mse:.4f}, MAPE: {mape:.4f}, Total: {total:.4f}")

# inverse transform to the original distrubiton and scale
pr_2017 = ss.inverse_transform(nn_prediction)
mse, mape, total = numpy_custom_loss(nn_prediction, ss.inverse_transform(pr_2017))
print("Loss in original distribution")
print(f"MSE: {mse:.4f}, MAPE: {mape:.4f}, Total: {total:.4f}")

# for original dimensions
pr_2017_orignal_dim = move_alt_axis_for_numpy(nn_prediction)
print(pr_2017_orignal_dim.shape)

# # save the npy
# np.save('pr_2017.npy', pr_2017_orignal_dim)
# print(f"saved pr_2017.npy")


Loss in NN's distribution
MSE: 0.0011, MAPE: 0.0902, Total: 0.0100
Loss in original distribution
MSE: 15864.6699, MAPE: 641.6058, Total: 14342.3635
(8736, 24, 20, 36)
