# Utils

In [28]:
import os, random, warnings
import numpy as np
import pandas as pd
import datetime
import torch
# import cv2
import re
import matplotlib.pyplot as plt
import sciencebasepy
import urllib.request
from sklearn import preprocessing
from urllib.request import urlopen
from zipfile import ZipFile
from convlstm import ConvLSTM, ConvLSTMCell

def download_data(data_dir):
    if os.listdir(data_dir) == []: ## data not downloaded yet
        print('Data folder is empty! Download the files now!')
        # set the url
        zipurl = 'https://github.com/leap-stc/LEAPCourse-Climate-Pred-Challenges/raw/main/Project-StarterCodes/Project2-PhysicsML/data/numpy_files.zip'
        # download the file from the URL
        zipresp = urlopen(zipurl)
        # create a new file on the hard drive
        tempzip = open(data_dir + 'numpy_files.zip', "wb")
        # write the contents of the downloaded file into the new file
        tempzip.write(zipresp.read())
        # close the newly-created file
        tempzip.close()
        # re-open the newly-created file with ZipFile()
        zf = ZipFile(data_dir + 'numpy_files.zip')
        # extract its contents into <extraction_path>
        # note that extractall will automatically create the path
        zf.extractall(path = data_dir)
        # close the ZipFile instance
        zf.close()
        print('Files all downloaded!')
        
def read_data(data_dir, simulate = True):
    # load data
    x_full = np.load(data_dir + '/processed_features.npy') #standardized inputs
    x_raw_full = np.load(data_dir + '/features.npy') #raw inputs
    if simulate:
        diag_full = np.load(data_dir + '/diag.npy') 
        label_full = np.load(data_dir + '/labels.npy') #simulated lake temperatures

        # process data
        mask_full = np.ones(label_full.shape) # no missing values to mask for simulated data
        phy_full = np.concatenate((x_raw_full[:,:,:(-2)], diag_full), axis=2) 
        ## phy: 4-air temp, 5-rel hum, 6-wind speed, 9-ice flag
    else:
        diag_full = np.load(data_dir + 'diag.npy')
        label_full = np.load(data_dir + 'Obs_temp.npy') # real observation data
        mask_full = np.load(data_dir + 'Obs_mask.npy') # flags of missing values
        phy_full = np.concatenate((x_raw_full[:,:,:-2], diag_full), axis = 2) #physics variables
    full_dict = {
        'x':x_full,
        'x_raw': x_raw_full,
        'diag': diag_full,
        'label':label_full,
        'mask':mask_full,
        'phy':phy_full
    }
    return full_dict



def data_split(full_dict):
    N = full_dict['x'].shape[1]
    idx_tr, idx_va, idx_te = (int(N/3), int(N/3*2), N)
    
    train_dict = {}
    valid_dict = {}
    test_dict = {}
    train_full_dict = {}
    for key, val in full_dict.items():
        train_dict[key] = val[:, :idx_tr]
        valid_dict[key] = val[:, idx_tr:idx_va]
        train_full_dict[key] = val[:, :idx_va]
        
        test_dict[key] = val[:, idx_va:]
    return train_dict, valid_dict, train_full_dict, test_dict

def generate_samples(dict_, window_size = 352, strides = 352//2,):
    sample_dict = {}
    for key, val in dict_.items():
        sample_dict[key] = []
    
    size = dict_['x'].shape[1]
    for key, val in dict_.items():
        loc = 0
        while loc + window_size < size:
            
            tmp_array = val[:, loc: loc + window_size]
            tmp_array = np.expand_dims(tmp_array, axis = 0)
            sample_dict[key].append(tmp_array)
            
            loc += strides
        sample_dict[key] = np.vstack(sample_dict[key]).astype(np.float32)
    return sample_dict
        
    

# Dataset

In [15]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Dataset


from typing import List


class LakeDataset(Dataset):
    def __init__(self, dic):
        self.dic = dic
        
    def __getitem__(self, index):
        tmp_dic = {}
        for key, val in self.dic.items():
            tmp_dic[key] = val[index, :]
        
        return tmp_dic
    
    def __len__(self):
        return self.dic['x'].shape[0]
    
    
def mask_helper(label, mask, mask_rate = 0.99):
    if mask_rate != 0:
        depth, days = label.shape
        label = label.reshape(-1, 1)
        mask = mask.reshape(-1, 1)
        idx = np.random.choice(np.arange(label.shape[0]), 
                               replace=False, 
                               size=int(label.shape[0] * mask_rate))
        label[idx, ] = 0
        mask[idx, ] = 0
        label = label.reshape(depth, days)
        mask = label.reshape(depth, days)
    else:
        pass
    return label, mask

def get_dataloader(path, window_size, strides, batch_size, mask_rate = 0.99, simulate = True):
    full_dict = read_data(path, simulate = simulate)
    train_dict, valid_dict, train_full_dict, test_dict = data_split(full_dict)
    
    train_dict['label'], train_dict['mask'] = mask_helper(train_dict['label'], train_dict['mask'], mask_rate)
    valid_dict['label'], valid_dict['mask'] = mask_helper(valid_dict['label'], valid_dict['mask'], mask_rate)
    train_full_dict['label'], train_full_dict['mask'] = mask_helper(train_full_dict['label'], 
                                                                    train_full_dict['mask'], 
                                                                    mask_rate)
    
    train_samples = generate_samples(train_dict, window_size = window_size, strides = strides)
    valid_samples = generate_samples(valid_dict, window_size = window_size, strides = strides)
    test_samples = generate_samples(test_dict, window_size = window_size, strides = strides)
    


    train_dataset = LakeDataset(train_samples)
    valid_dataset = LakeDataset(valid_samples)
    test_dataset = LakeDataset(test_samples)
    tr_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle=True)
    va_loader = DataLoader(dataset = valid_dataset, batch_size = batch_size)

    te_loader = DataLoader(dataset = test_dataset, batch_size = batch_size)


    full_samples = generate_samples(train_full_dict, window_size = window_size, strides = strides)
    full_dataset = LakeDataset(full_samples)
    full_loader = DataLoader(dataset = train_dataset, batch_size = batch_size)
    
    return tr_loader, va_loader, full_loader, te_loader


# Loss

In [16]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn


def density_calculation(temp):
    # converts temperature to density
    # parameter:
        # @temp: single value or array of temperatures to be transformed
    densities = 1000 * (1 - ((temp + 288.9414) * (temp - 3.9863)**2) / (508929.2 * (temp + 68.12963)))
    return densities

def lake_energy_calculation(temps, densities, depth_areas):
    # calculate the total energy of the lake for every timestep
    # sum over all layers the (depth cross-sectional area)*temp*density*layer_height)
    # then multiply by the specific heat of water 
    dz = 0.5 # thickness for each layer, hardcoded for now
    cw = 4186 # specific heat of water
    depth_areas = torch.reshape(depth_areas, (-1, 1))
    energy = torch.sum(depth_areas * temps * densities * dz * cw, axis=1)
    return energy

def calculate_lake_energy_deltas(energies, combine_days, surface_area):
    # given a time series of energies, compute and return the differences
    # between each time step, or time step interval (parameter @combine_days)
    # as specified by parameter @combine_days
    time = 86400 #seconds per day
    energy_deltas = (energies[:,1:] - energies[:,:-1]) / (time * surface_area)
    return energy_deltas

def calculate_vapour_pressure_saturated(temp):
    # returns in miilibars
    # Converted pow function to exp function workaround pytorch not having autograd implemented for pow
    exponent = (9.28603523 - (2332.37885 / (temp + 273.15))) * np.log(10)
    return torch.exp(exponent)

def calculate_vapour_pressure_air(rel_hum, temp):
    rh_scaling_factor = 1
    return rh_scaling_factor * (rel_hum / 100) * calculate_vapour_pressure_saturated(temp)


def calculate_wind_speed_10m(ws, ref_height = 2.):
    # from GLM code glm_surface.c
    c_z0 = torch.tensor(0.001) #default roughness
    return ws * (torch.log(10.0 / c_z0) / torch.log(ref_height / c_z0))


def calculate_air_density(air_temp, rh):
    # returns air density in kg / m^3
    # equation from page 13 GLM/GLEON paper(et al Hipsey)
    # Ratio of the molecular (or molar) weight of water to dry air
    mwrw2a = 18.016 / 28.966
    c_gas = 1.0e3 * 8.31436 / 28.966

    # atmospheric pressure
    p = 1013. #mb

    # water vapor pressure
    vapPressure = calculate_vapour_pressure_air(rh, air_temp)

    # water vapor mixing ratio (from GLM code glm_surface.c)
    r = mwrw2a * vapPressure / (p - vapPressure)
    return (1.0 / c_gas * (1 + r)/(1 + r / mwrw2a) * p / (air_temp + 273.15)) * 100

def calculate_heat_flux_sensible(surf_temp, air_temp, rel_hum, wind_speed):
    # equation 22 in GLM/GLEON paper(et al Hipsey)
    # GLM code ->  Q_sensibleheat = -CH * (rho_air * 1005.) * WindSp * (Lake[surfLayer].Temp - MetData.AirTemp);
    # calculate air density 
    rho_a = calculate_air_density(air_temp, rel_hum)

    # specific heat capacity of air in J/(kg*C)
    c_a = 1005.

    # bulk aerodynamic coefficient for sensible heat transfer
    c_H = 0.0013

    # wind speed at 10m
    U_10 = calculate_wind_speed_10m(wind_speed)
    return -rho_a * c_a * c_H * U_10 * (surf_temp - air_temp)

def calculate_heat_flux_latent(surf_temp, air_temp, rel_hum, wind_speed):
    # equation 23 in GLM/GLEON paper(et al Hipsey)
    # GLM code-> Q_latentheat = -CE * rho_air * Latent_Heat_Evap * (0.622/p_atm) * WindSp * (SatVap_surface - MetData.SatVapDef)
    # where,         SatVap_surface = saturated_vapour(Lake[surfLayer].Temp);
    #                rho_air = atm_density(p_atm*100.0,MetData.SatVapDef,MetData.AirTemp);
    # air density in kg/m^3
    rho_a = calculate_air_density(air_temp, rel_hum)

    # bulk aerodynamic coefficient for latent heat transfer
    c_E = 0.0013

    # latent heat of vaporization (J/kg)
    lambda_v = 2.453e6

    # wind speed at 10m height
    # U_10 = wind_speed
    U_10 = calculate_wind_speed_10m(wind_speed)
    # 
    # ratio of molecular weight of water to that of dry air
    omega = 0.622

    # air pressure in mb
    p = 1013.

    e_s = calculate_vapour_pressure_saturated(surf_temp)
    e_a = calculate_vapour_pressure_air(rel_hum, air_temp)
    return -rho_a * c_E * lambda_v * U_10 * (omega / p) * (e_s - e_a)

def calculate_energy_fluxes(phys, surf_temps, combine_days):    
    e_s = 0.985 # emissivity of water, given by Jordan
    alpha_sw = 0.07 # shortwave albedo, given by Jordan Read
    alpha_lw = 0.03 # longwave, albeda, given by Jordan Read
    sigma = 5.67e-8 # Stefan-Baltzmann constant
    R_sw_arr = phys[:-1,2] + (phys[1:,2] - phys[:-1,2]) / 2
    R_lw_arr = phys[:-1,3] + (phys[1:,3] - phys[:-1,3]) / 2
    R_lw_out_arr = e_s * sigma * (torch.pow(surf_temps[:] + 273.15, 4))
    R_lw_out_arr = R_lw_out_arr[:-1] + (R_lw_out_arr[1:] - R_lw_out_arr[:-1]) / 2

    air_temp = phys[:-1,4] 
    air_temp2 = phys[1:,4]
    rel_hum = phys[:-1,5]
    rel_hum2 = phys[1:,5]
    ws = phys[:-1, 6]
    ws2 = phys[1:,6]
    t_s = surf_temps[:-1]
    t_s2 = surf_temps[1:]
    E = calculate_heat_flux_latent(t_s, air_temp, rel_hum, ws)
    H = calculate_heat_flux_sensible(t_s, air_temp, rel_hum, ws)
    E2 = calculate_heat_flux_latent(t_s2, air_temp2, rel_hum2, ws2)
    H2 = calculate_heat_flux_sensible(t_s2, air_temp2, rel_hum2, ws2)
    E = (E + E2) / 2
    H = (H + H2) / 2
    fluxes = (R_sw_arr[:-1] * (1-alpha_sw) + R_lw_arr[:-1] * (1-alpha_lw) - R_lw_out_arr[:-1] + E[:-1] + H[:-1])
    return fluxes


def energy_fluxes_calculation(phys, surf_temps):
    e_s = 0.985 # emissivity of water, given by Jordan
    alpha_sw = 0.07 # shortwave albedo, given by Jordan Read
    alpha_lw = 0.03 # longwave, albeda, given by Jordan Read
    sigma = 5.67e-8 # Stefan-Baltzmann constant
    
    R_sw_arr = phys[:, :-1, 2] + (phys[:, 1:, 2] - phys[:, :-1, 2]) / 2
    R_lw_arr = phys[:, :-1, 3] + (phys[:, 1:, 3] - phys[:, :-1, 3]) / 2
    R_lw_out_arr = e_s * sigma * (torch.pow(surf_temps[:] + 273.15, 4))
    R_lw_out_arr = R_lw_out_arr[:,:-1] + (R_lw_out_arr[:, 1:] - R_lw_out_arr[:, :-1]) / 2
    
    air_temp = phys[:, :-1, 4] 
    air_temp2 = phys[:, 1:, 4]
    rel_hum = phys[:, :-1,5]
    rel_hum2 = phys[:, 1:,5]
    ws = phys[:, :-1, 6]
    ws2 = phys[:, 1:,6]
    t_s = surf_temps[:, :-1]
    t_s2 = surf_temps[:, 1:]
    
    E = calculate_heat_flux_latent(t_s, air_temp, rel_hum, ws)
    H = calculate_heat_flux_sensible(t_s, air_temp, rel_hum, ws)
    E2 = calculate_heat_flux_latent(t_s2, air_temp2, rel_hum2, ws2)
    H2 = calculate_heat_flux_sensible(t_s2, air_temp2, rel_hum2, ws2)
    E = (E + E2) / 2
    H = (H + H2) / 2
    fluxes = (R_sw_arr[:,:-1] * (1-alpha_sw) + \
              R_lw_arr[:,:-1] * (1-alpha_lw) - \
              R_lw_out_arr[:,:-1] + E[:,:-1] + H[:, :-1])
    return fluxes


def EC_loss(preds, phys, depth_areas, n_depths, ec_threshold):
    
    densities = density_calculation(preds)
    lake_energies = lake_energy_calculation(preds, densities, depth_areas=depth_areas)
    lake_energy_deltas = calculate_lake_energy_deltas(lake_energies, None, depth_areas[0])

    lake_energy_deltas = lake_energy_deltas[:,1:]

    surf_phys = phys[:, 0, :, :]
    surf_pred = preds[:, 0, :]

    lake_energy_fluxes = energy_fluxes_calculation(surf_phys, surf_pred)
    diff_vec = torch.abs(lake_energy_deltas - lake_energy_fluxes) 

    tmp_mask = 1 - phys[:, 0, 1:-1, 9] 
    tmp_loss = torch.mean(diff_vec * tmp_mask, axis = 1)


    ec_threshold = 20
    diff_per_set = torch.clamp(tmp_loss - ec_threshold, min=0, max=999999)
    diff_loss = torch.mean(diff_per_set)
    return diff_loss



class LakeLoss:
    def __init__(self,
                 elam = 0.005, ## loss weight
                 n_depths = None, 
                 depth_areas = None, 
                 ec_threshold = None, 
                 depth_loss = False, 
                 ec_loss = False):
        self.n_depths = n_depths
        self.depth_loss = depth_loss
        self.ec_loss =ec_loss
        self.depth_areas = depth_areas
        self.ec_threshold = ec_threshold
        self.elam = elam
        
        if self.ec_loss:
            if depth_areas is None:
                self.depth_areas = torch.Tensor([
                39865825,38308175,38308175,35178625,35178625,33403850,31530150,31530150,30154150,30154150,29022000,
                29022000,28063625,28063625,27501875,26744500,26744500,26084050,26084050,25310550,24685650,24685650,
                23789125,23789125,22829450,22829450,21563875,21563875,20081675,18989925,18989925,17240525,17240525,
                15659325,14100275,14100275,12271400,12271400,9962525,9962525,7777250,7777250,5956775,4039800,4039800,
                2560125,2560125,820925,820925,216125])
                self.n_depths = 50
                self.ec_threshold = 24
    
    def __call__(self, pred, label, mask, phy):
        
        rmse_loss_val = self.weighted_rmse_loss(pred, label, mask)
        if self.ec_loss:
            ec_loss_val = EC_loss(pred, phy, self.depth_areas, self.n_depths, self.ec_threshold)
            total_loss = rmse_loss_val + self.elam * ec_loss_val
        else:
            total_loss = rmse_loss_val
            
        return total_loss
        
    
    def weighted_rmse_loss(self, input, target, weight):
        # defined weighted rmse loss
        # used in model training
        # weight means mask
        return torch.sqrt(torch.sum(weight * (input - target) ** 2) / torch.sum(weight))


# Trainer

In [17]:
import inspect
import logging
import time

from tqdm import tqdm

import torch.nn as nn
import torch.optim as optim
import torch
import numpy as np
import pandas as pd

def get_module_forward_input_names(module: nn.Module):
    params = inspect.signature(module.forward).parameters
    param_names = [k for k, v in params.items() if not str(v).startswith("*")]
    return param_names

logging.basicConfig(level = 'INFO', # DEBUG
        format = "%(asctime)s %(levelname)s:%(lineno)d] %(message)s",
        datefmt = "%Y-%m-%d %H:%M:%S")

class Trainer:
    def __init__(self, 
                 epochs = 500,
                 learning_rate = 1e-2,
                 device = 'cpu',
                 early_stopping_patience = 3):
        
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.device = device
        self.early_stopping_patience = early_stopping_patience
    
    def __call__(self, 
                 model,
                 loss_func,
                 train_loader, 
                 valid_loader):
        is_validation_available = valid_loader is not None
        model.to(self.device)
        self.input_names = get_module_forward_input_names(model)
        optimizer = optim.Adam(model.parameters(), lr=self.learning_rate)
        
        patience = 0
        
        epoch_info = {
                "epoch_no": -1,
                "loss": np.Inf,}
        
        for epoch_no in range(self.epochs):

            epoch_loss = self.loop(epoch_no, model, loss_func, train_loader, optimizer, is_train = True)
            if is_validation_available:
                epoch_loss = self.loop(
                    epoch_no, model, loss_func, valid_loader, optimizer, is_train=False
                    )
            if epoch_loss < epoch_info['loss']:
                epoch_info['loss'] = epoch_loss
                epoch_info['epoch_no'] = epoch_no
                patience = 0
            else:
                patience += 1
            if patience >= self.early_stopping_patience:
                logging.info("Early Stopping.")
                break
        
        self.epoch_info = epoch_info
        return model, epoch_info
        
        
    
    def loop(self, epoch_no, model, loss_func, batch_iter, optimizer, is_train):
        epoch_loss = 0
        tic = time.time()
        with tqdm(batch_iter, disable = not is_train) as it:
            for batch_no, data_entry in enumerate(it, start = 1):
                optimizer.zero_grad()
                inputs = [data_entry[k].to(self.device) for k in self.input_names]
                if is_train:
                    output = model(*inputs)
                else:
                    with torch.no_grad():
                        output = model(*inputs)

                loss = loss_func(output, data_entry['label'], data_entry['mask'], data_entry['phy'])
                if is_train:
                    loss.backward()
                    optimizer.step()
                epoch_loss += loss.item()
                
            lv = epoch_loss/batch_no
            it.set_postfix(
                    ordered_dict={
                        "epoch": f"{epoch_no + 1}/{self.epochs}",
                        ("" if is_train else "validation_")
                        + "avg_epoch_loss": lv,},refresh=False,)
            
        toc = time.time()  
        
        if is_train:
            logging.info("Epoch[%d] Elapsed time %.3f seconds",
                epoch_no,
                (toc - tic),)
        logging.info("Epoch[%d] Evaluation metric '%s'=%.4f",
                epoch_no,
                ("" if is_train else "validation_") + "epoch_loss",
                lv, )
        return lv
    

# Model

In [18]:
import torch.nn as nn
import torch


class LSTM_base(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim, n_layers):
        super(LSTM_base, self).__init__()
        self.output_size = output_dim
        self.n_layers = n_layers
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(input_size = input_dim, 
                            hidden_size = hidden_dim, 
                            num_layers = n_layers, 
                            batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        batch, depth, seq_len, feat_dim = x.shape
        
        x = x.reshape(batch * depth, seq_len, feat_dim)
        lstm_out, hidden = self.lstm(x)
        out = self.fc(lstm_out)
        out = out.reshape(batch, depth, seq_len, 1).squeeze()
        return out

# Inference

In [19]:
import time
import inspect
from  tqdm import tqdm

def get_module_forward_input_names(module: nn.Module):
    params = inspect.signature(module.forward).parameters
    param_names = [k for k, v in params.items() if not str(v).startswith("*")]
    return param_names


def model_inference(model, test_loader, device = 'cpu'):
    epoch_loss = 0
    tic = time.time()
    
    input_names = get_module_forward_input_names(model)
    with tqdm(test_loader, disable = True) as it:
        for batch_no, data_entry in enumerate(it, start = 1):
            inputs = [data_entry[k].to(device) for k in input_names]
            with torch.no_grad():
                output = model(*inputs)
            target = data_entry['label']
            weight = data_entry['mask']
            loss = torch.sqrt(torch.sum(weight * (output - target) ** 2) / torch.sum(weight))
            epoch_loss += loss.item()

        lv = epoch_loss/len(te_loader)

    toc = time.time()
    return lv, toc - tic

# Pipeline

## Pre-Trained

In [41]:
import os, random, warnings
import numpy as np
import pandas as pd
import sys
import warnings
warnings.filterwarnings("ignore")

data_dir = '../Data/'
output_dir = '../Output/'

strides = 352//2
window_size = 352

tr_loader, va_loader, full_loader, te_loader = get_dataloader(path = data_dir, 
                                                              window_size = window_size,
                                                              strides = strides,
                                                              batch_size = 2,
                                                              mask_rate = 0,
                                                              simulate = True,
                                                              )



input_size = 9
state_size = 7

trainer = Trainer(device = 'cpu')

model = LSTM_base(input_dim = input_size, output_dim = 1, hidden_dim = state_size, n_layers = 1)

loss_func = LakeLoss(ec_loss = False)
model, epoch_info = trainer(model, loss_func, tr_loader, va_loader)


# trainer.epochs = epoch_info['epoch_no']
# trainer.early_stopping_patience = 100
# model, epoch_info_retrain = trainer(model, loss_func, full_loader, full_loader)

100%|██████████| 12/12 [00:01<00:00,  8.69it/s]
2022-03-01 17:37:30 INFO:101] Epoch[0] Elapsed time 1.391 seconds
2022-03-01 17:37:30 INFO:104] Epoch[0] Evaluation metric 'epoch_loss'=10.7066
2022-03-01 17:37:30 INFO:104] Epoch[0] Evaluation metric 'validation_epoch_loss'=10.3925
100%|██████████| 12/12 [00:01<00:00, 11.66it/s]
2022-03-01 17:37:31 INFO:101] Epoch[1] Elapsed time 1.031 seconds
2022-03-01 17:37:31 INFO:104] Epoch[1] Evaluation metric 'epoch_loss'=10.0185
2022-03-01 17:37:31 INFO:104] Epoch[1] Evaluation metric 'validation_epoch_loss'=9.3409
100%|██████████| 12/12 [00:01<00:00, 11.89it/s]
2022-03-01 17:37:32 INFO:101] Epoch[2] Elapsed time 1.011 seconds
2022-03-01 17:37:32 INFO:104] Epoch[2] Evaluation metric 'epoch_loss'=8.7889
2022-03-01 17:37:33 INFO:104] Epoch[2] Evaluation metric 'validation_epoch_loss'=8.1502
100%|██████████| 12/12 [00:01<00:00, 11.87it/s]
2022-03-01 17:37:34 INFO:101] Epoch[3] Elapsed time 1.012 seconds
2022-03-01 17:37:34 INFO:104] Epoch[3] Evaluat

100%|██████████| 12/12 [00:01<00:00, 12.00it/s]
2022-03-01 17:38:08 INFO:101] Epoch[29] Elapsed time 1.002 seconds
2022-03-01 17:38:08 INFO:104] Epoch[29] Evaluation metric 'epoch_loss'=1.4280
2022-03-01 17:38:09 INFO:104] Epoch[29] Evaluation metric 'validation_epoch_loss'=1.6073
100%|██████████| 12/12 [00:00<00:00, 12.05it/s]
2022-03-01 17:38:10 INFO:101] Epoch[30] Elapsed time 0.997 seconds
2022-03-01 17:38:10 INFO:104] Epoch[30] Evaluation metric 'epoch_loss'=1.3682
2022-03-01 17:38:10 INFO:104] Epoch[30] Evaluation metric 'validation_epoch_loss'=1.5415
100%|██████████| 12/12 [00:00<00:00, 12.08it/s]
2022-03-01 17:38:11 INFO:101] Epoch[31] Elapsed time 0.994 seconds
2022-03-01 17:38:11 INFO:104] Epoch[31] Evaluation metric 'epoch_loss'=1.3171
2022-03-01 17:38:11 INFO:104] Epoch[31] Evaluation metric 'validation_epoch_loss'=1.4994
100%|██████████| 12/12 [00:00<00:00, 12.14it/s]
2022-03-01 17:38:12 INFO:101] Epoch[32] Elapsed time 0.990 seconds
2022-03-01 17:38:12 INFO:104] Epoch[32]

In [22]:
state_size

7

## Test on rely data without training on real data

In [8]:
tr_loader, va_loader, full_loader, te_loader = get_dataloader(path = data_dir, 
                                                              window_size = window_size,
                                                              strides = strides,
                                                              batch_size = 2,
                                                              mask_rate = 0,
                                                              simulate = False,
                                                              )



test_loss, test_time = model_inference(model, te_loader)
print('Test loss is:', test_loss)

# 1.6136637772481466

Test loss is: 2.0938355227311454


# Test with fine-tuneon real data

In [9]:
tr_loader, va_loader, full_loader, te_loader = get_dataloader(path = data_dir, 
                                                              window_size = window_size,
                                                              strides = strides,
                                                              batch_size = 2,
                                                              mask_rate = 0,
                                                              simulate = False,
                                                              )


trainer.epochs = epoch_info['epoch_no']
trainer.early_stopping_patience = 100
model, epoch_info_retrain = trainer(model, loss_func, full_loader, full_loader)

test_loss, test_time = model_inference(model, te_loader)
print('Test loss is:', test_loss)

# 1.6136637772481466

100%|██████████| 12/12 [00:01<00:00, 11.68it/s]
2022-02-28 14:17:09 INFO:101] Epoch[0] Elapsed time 1.029 seconds
2022-02-28 14:17:09 INFO:104] Epoch[0] Evaluation metric 'epoch_loss'=2.0869
2022-02-28 14:17:09 INFO:104] Epoch[0] Evaluation metric 'validation_epoch_loss'=1.7556
100%|██████████| 12/12 [00:01<00:00, 11.26it/s]
2022-02-28 14:17:10 INFO:101] Epoch[1] Elapsed time 1.068 seconds
2022-02-28 14:17:10 INFO:104] Epoch[1] Evaluation metric 'epoch_loss'=1.6813
2022-02-28 14:17:11 INFO:104] Epoch[1] Evaluation metric 'validation_epoch_loss'=1.5679
100%|██████████| 12/12 [00:01<00:00, 11.49it/s]
2022-02-28 14:17:12 INFO:101] Epoch[2] Elapsed time 1.046 seconds
2022-02-28 14:17:12 INFO:104] Epoch[2] Evaluation metric 'epoch_loss'=1.5755
2022-02-28 14:17:12 INFO:104] Epoch[2] Evaluation metric 'validation_epoch_loss'=1.5094
100%|██████████| 12/12 [00:01<00:00, 11.74it/s]
2022-02-28 14:17:13 INFO:101] Epoch[3] Elapsed time 1.023 seconds
2022-02-28 14:17:13 INFO:104] Epoch[3] Evaluation

Test loss is: 1.7111382285753887


In [33]:
model = ConvLSTM(input_dim = input_size, hidden_dim = 1, kernel_size = (3,3), num_layers = 2)

In [37]:
tmp_batch = next(iter(tr_loader))

input_tensor = tmp_batch['x']


In [40]:
tmp_batch['x'].size()

torch.Size([2, 50, 352, 9])

In [43]:
x_full = np.load(data_dir + '/processed_features.npy') #standardized inputs
x_raw_full = np.load(data_dir + '/features.npy') #raw inputs

In [44]:
x_full[0,-2,:]

array([ 1.70143216, -1.69774938, -1.50129899,  0.07368764, -0.4396963 ,
        1.06835533,  0.34256487, -0.38742411, -0.17148342])

In [45]:
x_full[40,-2,:]

array([ 1.70143216,  1.07408634, -1.50129899,  0.07368764, -0.4396963 ,
        1.06835533,  0.34256487, -0.38742411, -0.17148342])