## Import modules

In [1]:
import pickle
import json

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorchtools
import glob as gl
import random
import os
import time
import itertools

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.datasets import make_regression
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import log_loss
from numpy import hstack
from numpy import vstack

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import warnings
warnings.filterwarnings(action='ignore')

## Data loading

In [2]:
def find_directory(foldername, filename = None, back_num = 0):
    cur = os.getcwd()
    for i in range(back_num):
        cur = os.path.abspath(os.path.join(cur, os.pardir))
    for folder in foldername:
        cur = os.path.join(cur, folder)
    if not os.path.exists(cur):
        os.makedirs(cur)
        print(f'{cur} created')
    if filename != None:
        cur = os.path.join(cur, filename)
    return cur

os.getcwd()
find_directory(back_num = 1, foldername = ['Dataset'], filename = 'bat_dict.pkl')

'C:\\Users\\User\\Desktop\\LIB\\CNN work\\Variable time length\\All dataset'

'C:\\Users\\User\\Desktop\\LIB\\CNN work\\Variable time length\\Dataset\\bat_dict.pkl'

In [3]:
bat_dict_add_1_2 = find_directory(back_num = 2, foldername = ['Dataset'], filename = 'bat_1_2_dict_VITQcQd.pkl')
with open(bat_dict_add_1_2, 'rb') as tf:
    bat_dict_1_2 = pickle.load(tf)

bat_sel_dict = bat_dict_1_2
max_time = 120

In [4]:
# batch1: 0~40(41)
# batch2: 41~83(43)
# batch3: 64~123(40)

b1 = 0
b2 = 0
b3 = 0

for cell in bat_sel_dict.keys():
    if cell.startswith('b1'):
        b1 += 1
    elif cell.startswith('b2'):
        b2 += 1
    else:
        b3 += 1

print(b1, b2, b3)

# Tensor dimension: (cell, cycle, time, variable)
regression_cycles = [30, 50, 80]

# variable = [Qc, I, V, T, Qd]
num_variables = len(bat_sel_dict['b1c0']['1']['1.0'].keys())

for reg in regression_cycles:
    globals()[f'x_tensor_{reg}'] = torch.zeros(b1+b2+b3, reg, max_time, num_variables)
    cell_index = 1
    for cell in bat_sel_dict.keys():
        for cycle in range(1, reg+1):
            for time in bat_sel_dict[cell][str(cycle)].keys():
    #             print(bat_sel_dict[cell][str(cycle)][str(time)])
                value_list = [i for i in bat_sel_dict[cell][str(cycle)][str(time)].values()]
    #             print(value_list)
                globals()[f'x_tensor_{reg}'][cell_index-1, cycle-1, int(float(time)*max_time/60), :] = torch.FloatTensor(value_list)

        cell_index += 1
    
    globals()[f'x_tensor_{reg}'].size()
    
num_variables = globals()[f'x_tensor_{reg}'].shape[3]
time_length = globals()[f'x_tensor_{reg}'].shape[2]

41 43 40


torch.Size([124, 30, 120, 5])

torch.Size([124, 50, 120, 5])

torch.Size([124, 80, 120, 5])

In [5]:
y_df_add = find_directory(back_num = 2, foldername = ['Dataset', 'y_df.csv'])
y_df = pd.read_csv(y_df_add)

# 2. Regression problem
# KO, KP, CL
y_cl_numpy = y_df['Cycle life'].to_numpy()
y_kp_numpy = y_df['kneepoints'].to_numpy()
y_ko_numpy = y_df['kneeonsets'].to_numpy()
# y_ko_numpy.shape

y_regression_numpy = y_ko_numpy

y_ko_tensor = torch.FloatTensor(y_ko_numpy)
y_ko_tensor.size()

torch.Size([124])

In [6]:
def setRandomSeed(random_seed=0):
    os.environ['PYTHONHASHSEED'] = str(random_seed)
    torch.manual_seed(random_seed) # torch 
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
    torch.backends.cudnn.deterministic = True # cudnn
    torch.backends.cudnn.benchmark = False # cudnn
    np.random.seed(random_seed) # numpy
    random.seed(random_seed) # random

### Dataloading as 2D CNN's method

In [7]:
# y: no scaling
class dataPrep_RNN_CNN(Dataset):  
    def __init__(self, x_tensor, y_tensor, batch_size, scaler, batches, test_split: float):
        setRandomSeed(random_seed = 100)
        self.xdata = torch.permute(x_tensor, (0, 3, 2, 1))
        self.ydata = y_tensor
        
        self.batch_size = batch_size
                
        self.xscaler = scaler()
        batches = [0] + batches
        self.batches = batches
        self.test_split = test_split
        
        # Tensor dimension: (cell, variable, time, cycle)
        # If problem emerges, check this part first.
        # Issue: 각 batch마다 따로 scaling을 해야하나?
        for i in range(len(self.xdata)):
            for j in range(self.xdata.shape[1]):
                temp = np.expand_dims(self.xscaler.fit_transform(self.xdata[i,j,:,:]), axis=0)
                if j ==0:
                    temp2 = temp
                else:
                    temp2 = np.vstack((temp2,temp))
            temp2 = np.expand_dims(temp2, axis=0)
            if i==0:
                self.xdata_scaled = temp2
            else:
                self.xdata_scaled = np.vstack((self.xdata_scaled, temp2))

        self.ydata_scaled = self.ydata
        
#         self.all = list(zip(self.xdata_scaled, self.ydata_scaled))
        
        prev = 0
        for i in range(1, len(self.batches)):
            globals()[f'self.b{i}_xdata_scaled'] = self.xdata_scaled[prev:prev + self.batches[i]]
            globals()[f'self.b{i}_ydata_scaled'] = self.ydata_scaled[prev:prev + self.batches[i]]
            
            prev +=self.batches[i]
            
            globals()[f'self.b{i}_train_size'], globals()[f'self.b{i}_val_size'], globals()[f'self.b{i}_test_size'] = self.get_splits_per_batch(self.batches[i])
            
            globals()[f'self.b{i}_x_train_scaled'], globals()[f'self.b{i}_x_test_scaled'], globals()[f'self.b{i}_y_train_scaled'], globals()[f'self.b{i}_y_test_scaled'] = train_test_split(globals()[f'self.b{i}_xdata_scaled'], 
                                                                                                            globals()[f'self.b{i}_ydata_scaled'], 
                                                                                                            test_size = globals()[f'self.b{i}_test_size'], 
                                                                                                            shuffle = True, 
                                                                                                            random_state=100)
        
            globals()[f'self.b{i}_x_train_scaled'], globals()[f'self.b{i}_x_val_scaled'], globals()[f'self.b{i}_y_train_scaled'], globals()[f'self.b{i}_y_val_scaled'] = train_test_split(globals()[f'self.b{i}_x_train_scaled'], 
                                                                                                            globals()[f'self.b{i}_y_train_scaled'], 
                                                                                                            test_size = globals()[f'self.b{i}_val_size'], 
                                                                                                            shuffle = True, 
                                                                                                            random_state=100)
            
            print(f'Batch {i}, total {self.batches[i]} cells')
            print('x_train, x_val, x_test:', globals()[f'self.b{i}_x_train_scaled'].shape, globals()[f'self.b{i}_x_val_scaled'].shape, globals()[f'self.b{i}_x_test_scaled'].shape)
            print('y_train, y_val, y_test:', globals()[f'self.b{i}_y_train_scaled'].shape, globals()[f'self.b{i}_y_val_scaled'].shape, globals()[f'self.b{i}_y_test_scaled'].shape)
        
            globals()[f'self.b{i}_train'] = list(zip(globals()[f'self.b{i}_x_train_scaled'], globals()[f'self.b{i}_y_train_scaled']))
            globals()[f'self.b{i}_val'] = list(zip(globals()[f'self.b{i}_x_val_scaled'], globals()[f'self.b{i}_y_val_scaled']))
            globals()[f'self.b{i}_test'] = list(zip(globals()[f'self.b{i}_x_test_scaled'], globals()[f'self.b{i}_y_test_scaled']))

            globals()[f'self.b{i}_train_dataloader'] = DataLoader(globals()[f'self.b{i}_train'], batch_size = self.batch_size, shuffle=True)
            globals()[f'self.b{i}_val_dataloader'] = DataLoader(globals()[f'self.b{i}_val'], batch_size = self.batch_size, shuffle=True)
            globals()[f'self.b{i}_test_dataloader'] = DataLoader(globals()[f'self.b{i}_test'], batch_size = self.batch_size, shuffle=True)
            
    # split the datasets into training and testing
    def get_splits_per_batch(self, batch_len):
        # test size
        test_size = int(self.test_split*batch_len)
        # val size
        val_size = int(0.2*(batch_len - test_size))
        # train_size
        train_size = batch_len - val_size - test_size
        return train_size, val_size, test_size
    
    # Returns dataloaders for training and validation datasets
    def train_data(self):
        self.train_dataloader = [globals()[f'self.b1_train_dataloader']]
        self.val_dataloader = [globals()[f'self.b1_val_dataloader']]
        if len(self.batches)>2:
            for i in range(2, len(self.batches)):
                self.train_dataloader.append(globals()[f'self.b{i}_train_dataloader'])
                self.val_dataloader.append(globals()[f'self.b{i}_val_dataloader'])
        return self.train_dataloader, self.val_dataloader
    
    # Returns dataloader for test datasets
    def test_data(self):
        self.test_dataloader = [globals()[f'self.b1_test_dataloader']]
        if len(self.batches)>2:
            for i in range(2, len(self.batches)):
                self.test_dataloader.append(globals()[f'self.b{i}_test_dataloader'])
        return self.test_dataloader
    
    def scaler(self):
        return self.xscaler
    
    # finding length of x 
    def __len__(self):
        return len(self.X)
    
    # indexing rows for calling
    def __getitem__(self, idx):
        return [self.X[idx], self.y[idx]]


In [8]:
# reg_dataset = dataPrep1D(x_tensor, y_ko_tensor, batch_size = 16, scaler = MinMaxScaler, purpose = 'regression', test_split = 0.2)
# reg_train_dataloader, reg_val_dataloader = reg_dataset.train_data()
# reg_test_dataloader = reg_dataset.test_data()
# reg_xscaler = reg_dataset.scaler('regression')

regression_cycles = [30, 50, 80]
# regression_cycles = [100]
batches = [b1, b2, b3]

# x와 y가 1대1 대응이어야 dataloader를 만들 수 있어서 2D array 그대로 놓고 학습시 tensor transform 예정
for reg in regression_cycles:
    print(reg, 'cycles')
    globals()[f'reg_dataset_{reg}'] = dataPrep_RNN_CNN(globals()[f'x_tensor_{reg}'], y_ko_tensor, batch_size = 4, scaler = MinMaxScaler, batches = batches, test_split = 0.2)
    globals()[f'reg_train_dataloader_{reg}'], globals()[f'reg_val_dataloader_{reg}'] = globals()[f'reg_dataset_{reg}'].train_data()
    globals()[f'reg_test_dataloader_{reg}'] = globals()[f'reg_dataset_{reg}'].test_data()
    globals()[f'reg_xscaler_{reg}'] = globals()[f'reg_dataset_{reg}'].scaler()
    for batch in range(len(batches)):
        globals()[f'b{batch+1}_train_dataloader_{reg}'], globals()[f'b{batch+1}_val_dataloader_{reg}'], globals()[f'b{batch+1}_test_dataloader_{reg}'] = globals()[f'reg_train_dataloader_{reg}'][batch], globals()[f'reg_val_dataloader_{reg}'][batch], globals()[f'reg_test_dataloader_{reg}'][batch]

30 cycles
Batch 1, total 41 cells
x_train, x_val, x_test: (27, 5, 120, 30) (6, 5, 120, 30) (8, 5, 120, 30)
y_train, y_val, y_test: torch.Size([27]) torch.Size([6]) torch.Size([8])
Batch 2, total 43 cells
x_train, x_val, x_test: (28, 5, 120, 30) (7, 5, 120, 30) (8, 5, 120, 30)
y_train, y_val, y_test: torch.Size([28]) torch.Size([7]) torch.Size([8])
Batch 3, total 40 cells
x_train, x_val, x_test: (26, 5, 120, 30) (6, 5, 120, 30) (8, 5, 120, 30)
y_train, y_val, y_test: torch.Size([26]) torch.Size([6]) torch.Size([8])
50 cycles
Batch 1, total 41 cells
x_train, x_val, x_test: (27, 5, 120, 50) (6, 5, 120, 50) (8, 5, 120, 50)
y_train, y_val, y_test: torch.Size([27]) torch.Size([6]) torch.Size([8])
Batch 2, total 43 cells
x_train, x_val, x_test: (28, 5, 120, 50) (7, 5, 120, 50) (8, 5, 120, 50)
y_train, y_val, y_test: torch.Size([28]) torch.Size([7]) torch.Size([8])
Batch 3, total 40 cells
x_train, x_val, x_test: (26, 5, 120, 50) (6, 5, 120, 50) (8, 5, 120, 50)
y_train, y_val, y_test: torch.Siz

## Define Model Class 

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [10]:
class RNNcell(nn.Module):
    def __init__(self, model, input_size, hidden_size, dropout = 0.1, bidirectional = False, num_layer=1):
        super(RNNcell, self).__init__()
        
        if torch.cuda.is_available():
            self.device = 'cuda'
        else:
            self.device = 'cpu'
            
        self.model = model
        self.hidden_size = hidden_size
        self.num_layer = num_layer
        self.bidirectional = bidirectional
        
        self.D = 1 + self.bidirectional
        self.batch_size = 4
        
        #Batch size * cycles, timestep, input_size
        if self.model.endswith('RNN'):
            self.rnn = nn.RNN(input_size, hidden_size, num_layer, batch_first = True, dropout = dropout, bidirectional = bidirectional).to(self.device)
        elif self.model.endswith('LSTM'):
            self.rnn = nn.LSTM(input_size, hidden_size, num_layer, batch_first = True, dropout = dropout, bidirectional = bidirectional).to(self.device)
        elif self.model.endswith('GRU'):
            self.rnn = nn.GRU(input_size, hidden_size, num_layer, batch_first = True, dropout = dropout, bidirectional = bidirectional).to(self.device)
            
        for name, param in self.rnn.named_parameters():
            if name.startswith('weight'):
                nn.init.xavier_uniform(param)
            else:
                nn.init.normal(param)
        
    def forward(self, x):
        # x made from (cell * n_cy, timestep(seq_len), num_vars)
        self.batch_size = x.size(0)
        
        D = 1 + self.bidirectional
        #out: Containing output features h_t from the last layer of LSTM for each t(if bidirectional ==True: concat of forward and backward hidden states)
        #h_n: Containing final hidden state in the sequence
        h0 = torch.zeros(D*self.num_layer, self.batch_size, self.hidden_size).to(self.device)
        if self.model.endswith('LSTM'):
            c0 = torch.zeros(D*self.num_layer, self.batch_size, self.hidden_size).to(self.device)
            # c_n: Containing final cell state in the sequence
            out, (hn, cn) = self.rnn(x, (h0, c0))
        else:
            out, hn = self.rnn(x, h0.detach())
        return out, hn

In [11]:
class SelfAttention(nn.Module):
    def __init__(self, hidden_size, num_heads):
        super(SelfAttention, self).__init__()
        self.hidden_size = hidden_size
        self.num_heads = num_heads
        self.head_size = hidden_size // num_heads
        
        self.query = nn.Linear(hidden_size, hidden_size)
        self.key = nn.Linear(hidden_size, hidden_size)
        self.value = nn.Linear(hidden_size, hidden_size)
        self.softmax = nn.Softmax(dim=-1)
        
    def forward(self, inputs):
        # inputs shape: [batch_size, seq_len, hidden_size]
        batch_size, seq_len, hidden_size = inputs.size()
        
        # Compute query, key, and value matrices
        query = self.query(inputs).view(batch_size, seq_len, self.num_heads, self.head_size).transpose(1, 2)
        key = self.key(inputs).view(batch_size, seq_len, self.num_heads, self.head_size).transpose(1, 2)
        value = self.value(inputs).view(batch_size, seq_len, self.num_heads, self.head_size).transpose(1, 2)
        
        # Compute scaled dot-product attention scores
        # attention_scores: [batch_size, num_heads, seq_len, seq_len]
        attention_scores = torch.matmul(query, key.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.head_size, dtype=torch.float32))
        attention_probs = self.softmax(attention_scores)
        
        # Apply attention to the value matrix and concatenate heads
        # context: [batch_size, num_heads, seq_len, head_size]=>[batch_size, seq_len, num_heads, head_size] 
        # => [batch_size, seq_len, num_heads * head_size]
        context = torch.matmul(attention_probs, value).transpose(1, 2).contiguous().view(batch_size, seq_len, -1)
#         context = nn.Linear(hidden_size, 1)(context)
        return attention_probs, context

In [12]:
class RNN_TA_CA_1DCNN(nn.Module):
    def __init__(self, input_size, num_time, num_cycles, rnn1, bi1, hid1, nhead2, fil2, pool2, npool2, fsize2, psize2, mids, dr1 = 0.1, dr2 = 0.1, di2 = 1, st2 = 1, pad2 = 0):
        super(RNN_TA_CA_1DCNN, self).__init__()
        setRandomSeed()
        
        self.D1 = 1 + bi1
        
        self.rnn1 = RNNcell(rnn1, input_size, hid1, dr1, bi1)
        
        if torch.cuda.is_available():
            self.device = 'cuda'
        else:
            self.device = 'cpu'
        
        # Overall parameters
        self.num_time = num_time
        self.num_cycles = num_cycles
        self.mids = mids
        
        # Pooling/Nonpooling parameters
        # Pooling/Nonpooling layers
        self.pool2 = pool2
        self.npool2 = npool2
        # Pooling size(Pooling layer)
        self.psize2 = psize2
        
        # Params for self-attention(ca)
        self.nh2 = nhead2
        
        # CNN hyperparameters
        self.fil2 = fil2
        self.fsize2 = fsize2
        self.di2 = di2
        self.st2 = st2
        # Padding for CNN
        self.pad2 = pad2
                
        # Critical values for the 1st CNN layer
        # Batch for different cells
        
        # Conv1d takes input dimension: [Batch_size, c_in(no. of in_channels )= hidden_size, l_in = num_cycles]
        self.in2 = self.D1*hid1
        self.out2 = self.fil2
        self.Lout = self.num_cycles
        
        # Conv_block1~P
        # Pooling layers
        for i in range(1, self.pool2+1):
#             print(f"i: {i}, out_channels: {self.out_channels}")
            globals()[f'self.conv_block{i}'] = nn.Sequential(
            nn.Conv1d(self.in2, self.out2, self.fsize2, self.st2, self.pad2),
            nn.BatchNorm1d(self.out2),
            nn.ReLU(),
            nn.MaxPool1d(self.psize2)
            ).to(self.device)

            self.in2 = self.out2
            self.out2 = self.out2*2
            self.Lout = int(int((self.Lout+2*self.pad2-self.di2*(self.fsize2-1)-1)/self.st2+1)/self.psize2)

        for j in range(self.pool2+1, self.pool2+self.npool2+1):
            globals()[f'self.conv_block{j}'] = nn.Sequential(
            nn.Conv1d(self.in2, self.out2, self.fsize2, self.st2, self.pad2),
            nn.BatchNorm1d(self.out2),
            nn.ReLU(),
            ).to(self.device)
            
            self.in2 = self.out2
            self.out2 = self.out2*2
            self.Lout = int((self.Lout+2*self.pad2-self.di2*(self.fsize2-1)-1)/self.st2+1)
        
        self.lin1 = nn.Linear(self.D1*hid1, 1)
        self.sm1 = nn.Softmax(dim = 2)
        self.relu = nn.ReLU()
        
        self.ca = SelfAttention(self.D1*hid1, self.nh2)
        
        self.fc = nn.Sequential(nn.Linear(self.in2*self.Lout, self.mids[0], bias=True), nn.ReLU(),
                       nn.Linear(self.mids[0], self.mids[1], bias = True), nn.ReLU(),
                       nn.Linear(self.mids[1], self.mids[2], bias = True))
        
        self.fc.apply(self.init_weights)
        
    def init_weights(self, m):
        self.m = m
        if type(self.m) == nn.Linear:
            nn.init.xavier_uniform_(self.m.weight)
        
    def forward(self, x):
        # Original: Batch size, num_vars, timestep, cycles=>Batch size * cycles, timestep, input_size
#         print("Batch size, input_size, timestep, cycles: ", x.size())
        x = torch.reshape(x, (x.size(0)*x.size(3), x.size(2), x.size(1))).to(device)
#         print("Batch size * cycles, timestep, input_size: ", x.size())
        out1, hn1 = self.rnn1(x)
#         print('out1: (batch_size = num_cell*num_cycle, timestep, D1*hid1)', out1.size())
#         print(hn1.size())
        
        # hn의 batch size가 n_cy*k로 설정
        cell_batch_size = int(out1.size(0)/self.num_cycles)

        # Reshape for input
        # out1: (batch_size = num_cell*num_cycle, timestep, D1*hid1)
        # outs : (batch_size(num_cell), num_cycle, timestep, D1*hid1)
        outs1 = torch.reshape(out1, (cell_batch_size, -1, out1.size(1), out1.size(2)))
        # ta: (batch_size, num_cycle, timestep, 1(D1*hid1->1))
        ta = self.sm1(self.lin1(outs1))
        # ta_outs: (batch_size, num_cycle, timestep, D1*hid1)=>(batch_size, D1*hid1, num_cycle, timestep)
        ta_outs = torch.permute(ta*outs1, (0, 3, 1, 2))
        # ct_vec: (batch_size, D1*hid1, num_cycle)
        ct_vec = torch.sum(ta_outs, -1)
        # ct_vec shape: [batch_size, num_cycle, D1*hid1(hidden_size)]
        # hidden_size = num_heads * head_size
        # ct_vec transform back: [batch_size, num_heads, num_cycle, head_size]=>[batch_size, num_cycle, num_heads, head_size] 
        # => [batch_size, num_cycle, num_heads * head_size(hidden_size)]
        # ca: matmul(query[batch_size, num_heads, num_cycle, head_size], key^T[batch_size, num_heads, head_size, num_cycle])
        # ca shape: [batch_size, num_heads, num_cycle, num_cycle]
        ca, ct_vec = self.ca(ct_vec.transpose(2, 1))
#         print(f"After self attention: ca.size: {ca.size()}, ct_vec.size: {ct_vec.size()}")
        # ct_vec shape: [batch_size, D1*hid1, num_cycle]
        ct_vec = ct_vec.transpose(2, 1)
        
        
        # input for 1d cnn: (batch_size, D1*hid1, num_cycle)
        for i in range(1, self.pool2+self.npool2+1):
            ct_vec = globals()[f'self.conv_block{i}'](ct_vec)
        # ct_vec from final convolutional layer: (batch_size, self.in2*self.Lout)
        ct_vec = ct_vec.view(ct_vec.size(0), -1)
        # final_out: (batch_size, 1)
        final_out = self.fc(ct_vec)
        
        return final_out.squeeze(), ta.squeeze(), ca

In [13]:
# Function for saving and loading of training history
def save_data(D3_array, filename):
    with open(filename,"wb") as dat_:
        pickle.dump(D3_array,dat_)
        
def load_data(filename):
    with open(filename,"rb") as ld:
        x_temp = pickle.load(ld)
    return x_temp

# Early stopping class
class EarlyStopping:
    """주어진 patience 이후로 validation loss가 개선되지 않으면 학습을 조기 중지"""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt'):
        """
        Args:
            patience (int): validation loss가 개선된 후 기다리는 기간
                            Default: 7
            verbose (bool): True일 경우 각 validation loss의 개선 사항 메세지 출력
                            Default: False
            delta (float): 개선되었다고 인정되는 monitered quantity의 최소 변화
                            Default: 0
            path (str): checkpoint저장 경로
                            Default: 'checkpoint.pt'
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.best_epoch = 1
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path

    def __call__(self, epoch, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.best_model = self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
#             print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.best_epoch = epoch
            self.best_model = self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''validation loss가 감소하면 모델을 저장한다.'''
        if self.verbose:
            print(f'At epoch {self.best_epoch}: Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss
        return model

In [14]:
class train:
    def __init__(self, model: nn.Module, train_dataloader, val_dataloader, test_dataloader, epoch: int, learning_rate=0.01, patience = 5, verbose = False):
        super().__init__()
        #   Reprocudtion
        setRandomSeed(100)
        random_seed = 100
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(random_seed)
            torch.backends.cudnn.deterministic = True
            torch.backends.cudnn.benchmark = False
        os.environ['PYTHONHASHSEED'] = str(random_seed)
        torch.manual_seed(random_seed)
        np.random.seed(random_seed)

        if torch.cuda.is_available():
            self.device = 'cuda'
        else:
            self.device = 'cpu'
        
        self.model = model.to(self.device)
        self.train_dataloader = train_dataloader
        self.val_dataloader = val_dataloader
        self.test_dataloader = test_dataloader
        self.epoch = epoch
        self.learning_rate = learning_rate
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr = self.learning_rate)
        self.early_stopping = EarlyStopping(patience = patience, verbose = verbose)

        self.criterion = nn.MSELoss()
        self.name = ['train_rmse', 'val_rmse', 'test_rmse']
        
    def EvalModel(self):
        EvalPredictions, EvalActuals = list(), list()
        TestPredictions, TestActuals = list(), list()
        self.model.eval()
        with torch.no_grad():
            for n, (inputs, targets) in enumerate(self.val_dataloader):
                inputs = inputs.float().to(self.device)
                targets = targets.float().to(self.device)

                yhat, ta, ca = self.model(inputs)

                if self.device !='cpu':
                    yhat = yhat.to('cpu')
                    targets = targets.to('cpu')    

                yhat = yhat.detach().numpy()
                actual = targets.numpy()
                if n==0:
                    EvalPredictions = yhat
                    EvalActuals = actual
                else:
                    EvalPredictions = np.concatenate((EvalPredictions, yhat), axis = 0)
                    EvalActuals = np.concatenate((EvalActuals, actual), axis = 0)

#             EvalPredictions, EvalActuals = vstack(EvalPredictions), vstack(EvalActuals)
            val_rmse = mean_squared_error(EvalActuals, EvalPredictions, squared = False)
            
            for n, (inputs, targets) in enumerate(self.test_dataloader):
                inputs = inputs.float().to(self.device)
                targets = targets.float().to(self.device)

                yhat, ta, ca = self.model(inputs)

                if self.device !='cpu':
                    yhat = yhat.to('cpu')
                    targets = targets.to('cpu')    

                yhat = yhat.detach().numpy()
                actual = targets.numpy()
                    
                if n==0:
                    TestPredictions = yhat
                    TestActuals = actual
                else:
                    TestPredictions = np.concatenate((TestPredictions, yhat), axis = 0)
                    TestActuals = np.concatenate((TestActuals, actual), axis = 0)

#             TestPredictions, TestActuals = vstack(TestPredictions), vstack(TestActuals)
            test_rmse = mean_squared_error(TestActuals, TestPredictions, squared = False)
            
            return val_rmse, test_rmse

    def TrainModel(self):
        loss_history = []
        self.model.train()
        for i in range(self.epoch):
            for n, (inputs, targets) in enumerate(self.train_dataloader):
                self.model.train()
                inputs = inputs.float().to(self.device)
                targets = targets.float().to(self.device)

                self.optimizer.zero_grad()

                yhat, ta, ca = self.model(inputs)
                    
                loss = self.criterion(yhat, targets)
                loss.backward()
                self.optimizer.step()
                
                if self.device !='cpu':
                    yhat = yhat.to('cpu')
                    targets = targets.to('cpu')
                    
                yhat = yhat.detach().numpy()
                targets = targets.numpy()
                
                if n==0:
                    predictions = yhat
                    actuals = targets
                else:
                    predictions = np.concatenate((predictions, yhat), axis = 0)
                    actuals = np.concatenate((actuals, targets), axis = 0)

            # evaluation of entire train data
#             predictions, actuals = np.append(predictions), np.append(actuals)
#             print("predictions: ", predictions)
#             print("actuals: ", actuals)
            
            train_rmse = mean_squared_error(actuals, predictions, squared = False)

            # evaluation on validation data
            val_rmse, test_rmse = self.EvalModel()
#             print("Batch ", i, train_rmse, val_rmse, test_rmse)
            early_factor = val_rmse
            
            # training loss history
            loss_history.append([train_rmse, val_rmse, test_rmse])

            self.early_stopping(i+1, early_factor, self.model)
            
            if self.early_stopping.early_stop:
                print("Early stopping at best epoch: ", self.early_stopping.best_epoch)
                self.best_loss = loss_history[self.early_stopping.best_epoch-1]
                loss_history = loss_history[:self.early_stopping.best_epoch]
                
                return self.model, self.best_loss, pd.DataFrame(loss_history, columns = self.name), ta, ca
            
        print(f"Ends at final epoch {self.epoch}")
        print(f"Best epoch: {self.early_stopping.best_epoch}")
        self.best_loss = loss_history[self.early_stopping.best_epoch-1]
        loss_history = loss_history[:self.early_stopping.best_epoch]
        
        return self.model, self.best_loss, pd.DataFrame(loss_history, columns = self.name), ta, ca
        
    def predict(self, data: torch.Tensor):
        return self.model(data)
    
    def train_val_test_plot_rmse(self, num_cycles, modelname, n_ep, patience, rnn1, hid1, fil2, pool2, npool2, fsize2, psize2, lr):
        dataloaders = [self.train_dataloader, self.val_dataloader, self.test_dataloader]

        with torch.no_grad():
            model.eval()

            model_rmse = list()
            dset_index = 0
            for dataloader in dataloaders:
                if dset_index ==0:
                    dset = 'train'
                elif dset_index == 1:
                    dset = 'val'
                else:
                    dset = 'test'

                figadd = find_directory(back_num = 0, foldername = [f'{num_cycles} cycles', f'Depth Test_col_{n_ep}_{patience}', modelname, f'{rnn1}_1D CNN', 'train_history'], 
                                             filename = f'{rnn1}_hidden_{hid1}_n_fil_{fil2}_pool_{pool2}_npool_{npool2}_fsize_{fsize2}_psize_{psize2}_lr_1_{int(1/lr)}.jpg')

                predictions, actuals = list(), list()
                for i, (test_input, test_target) in enumerate(dataloader):
                    test_input = test_input.float().to(self.device)
                    test_target = test_target.float().to(self.device)
                    test_yhat, test_ta = model(test_input)

                    if self.device != 'cpu':
                        test_yhat = test_yhat.to('cpu')
                        test_target = test_target.to('cpu')

                    test_yhat = test_yhat.detach().tolist()
                    predictions += test_yhat
                    test_target = test_target.tolist()
                    actuals += test_target

                pred_acc, targ_acc = np.array(predictions), np.array(actuals)

                rmse = mean_squared_error(actuals, predictions, squared = False)
                model_rmse.append(rmse)

                plt.figure(figsize=[10,4])
                plt.rcParams["font.size"] = "12"
                plt.plot(pred_acc, label='Prediction')
                plt.plot(range(len(targ_acc)), targ_acc, label = 'Actual')
                plt.xticks(range(0, len(targ_acc) + 1, int(len(targ_acc)/5)))
                plt.title(f"{rnn1}_hid_{hid1}_nfil_{fil2}_pool_{pool2}_npool_{npool2}_fsize_{fsize2}_psize_{psize2}: RMSE={rmse}")
                plt.xlabel('cell')
                plt.ylabel('Lifespan(cycle)')
                plt.legend()
                plt.savefig(figadd)
                plt.close()

                dset_index += 1

        return model_rmse

## Implementation

In [15]:
def history_state_dict_add(num_cycles, modelname, n_ep, patience, rnn1, hid1, nh2, fil2, pool2, npool2, fsize2, psize2, lr, batch_num):
    history_add = find_directory(back_num = 0, foldername = [f'{num_cycles} cycles', f'Depth Test_col_{n_ep}_{patience}', modelname, f'{rnn1}_1D CNN', f'batch_{batch_num}', 'train_history'], 
                                             filename = f'{rnn1}_hidden_{hid1}_nh_{nh2}_n_fil_{fil2}_pool_{pool2}_npool_{npool2}_fsize_{fsize2}_psize_{psize2}_lr_1_{int(1/lr)}.pkl')
    state_dict_add = find_directory(back_num = 0, foldername = [f'{num_cycles} cycles', f'Depth Test_col_{n_ep}_{patience}', modelname, f'{rnn1}_1D CNN', f'batch_{batch_num}', 'model'], 
                                             filename = f'{rnn1}_hidden_{hid1}_nh_{nh2}_n_fil_{fil2}_pool_{pool2}_npool_{npool2}_fsize_{fsize2}_psize_{psize2}_lr_1_{int(1/lr)}_state_dict.pth')
    ta_add = find_directory(back_num = 0, foldername = [f'{num_cycles} cycles', f'Depth Test_col_{n_ep}_{patience}', modelname, f'{rnn1}_1D CNN', f'batch_{batch_num}', 'ta'],
                                             filename = f'{rnn1}_hidden_{hid1}_nh_{nh2}_n_fil_{fil2}_pool_{pool2}_npool_{npool2}_fsize_{fsize2}_psize_{psize2}_lr_1_{int(1/lr)}_ta.wb')
    ca_add = find_directory(back_num = 0, foldername = [f'{num_cycles} cycles', f'Depth Test_col_{n_ep}_{patience}', modelname, f'{rnn1}_1D CNN', f'batch_{batch_num}', 'ca'],
                                             filename = f'{rnn1}_hidden_{hid1}_nh_{nh2}_n_fil_{fil2}_pool_{pool2}_npool_{npool2}_fsize_{fsize2}_psize_{psize2}_lr_1_{int(1/lr)}_ta.wb')
    return history_add, state_dict_add, ta_add, ca_add

## Each batch

In [None]:
# Hyperparameters
num_vars = globals()[f'x_tensor_{reg}'].size()[3]

modelname = 'RNN_TA_CA_1DCNN'
rnns = ['LSTM', 'GRU', 'RNN']
hids = [3, 5, 7]
num_time = max_time
num_cycless = [30, 50, 80]
ep_pats = [[3000, 500]]
lrs = [1e-4, 1e-3, 1e-2]

num_fils = [3, 5, 7]
pools = [1, 2]
npools = [1, 2]
fsize2 = 3
psize2 = 2
mids = [8, 4, 1]

nh2s = [2, 3, 4, 5]

batch_nums = [1]

import time

for batch_num in batch_nums:
    for [n_ep, patience] in ep_pats:
        for num_cycles in num_cycless:
            print(f"Batch: {batch_num + 1}, num_cycle = {num_cycles}, epoch = {n_ep}, patience = {patience}")
            trdl = globals()[f'b{batch_num+1}_train_dataloader_{num_cycles}']
            vdl = globals()[f'b{batch_num+1}_val_dataloader_{num_cycles}']
            tedl = globals()[f'b{batch_num+1}_test_dataloader_{num_cycles}']
            for rnn1, hid1 in itertools.product(rnns, hids):
                bi1 = False
                if rnn1.startswith('Bi'):
                    bi1 = True
                for pool2, npool2 in itertools.product(pools, npools):
                    print(f'Pooling: {pool2} layers, Nonpooling: {npool2} layers, split by {60/max_time} min')
                    for fil2, lr in itertools.product(num_fils, lrs):
                        for nh2 in nh2s:
                            print(f"rnn = {rnn1}, bi = {bi1}, hid per head = {hid1}, num_heads = {nh2}, num_fil = {fil2}, lr = {lr}")

                            all_hid1 = hid1 * nh2

                            start = time.time()

                            # Construct CNN
                            model = globals()[modelname](num_vars, num_time, num_cycles, rnn1, bi1, all_hid1, nh2, fil2, pool2, npool2, fsize2, psize2, mids).to(device)

                            # Train
                            model_train = train(model, trdl, vdl, tedl, n_ep, lr, patience, verbose = False)
                            best_model, best_loss, history, ta, ca = model_train.TrainModel()

                            print('best_loss = '+ str(best_loss))

                            history_add, state_dict_add, ta_add, ca_add = history_state_dict_add(num_cycles, modelname, n_ep, patience,
                                                                                        rnn1, hid1, nh2, fil2, pool2, npool2, fsize2, psize2, lr, batch_num+1)

                            # saving best_epoch, loss history, ta score
                            save_data(history, history_add)
                            save_data(best_model.state_dict(), state_dict_add)
                            save_data(ta, ta_add)
                            save_data(ca, ca_add)

                            print("time: ", time.time()-start)
                            print("\n\n")

Batch: 2, num_cycle = 30, epoch = 3000, patience = 500
Pooling: 1 layers, Nonpooling: 1 layers, split by 0.5 min
rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 3, lr = 0.0001
Ends at final epoch 3000
Best epoch: 2976
best_loss = [33.250347, 21.251219, 73.60257]
time:  120.46832275390625



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1495
best_loss = [28.095877, 14.428834, 85.06497]
time:  80.05868721008301



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  928
best_loss = [36.613544, 17.491817, 133.4673]
time:  58.07500696182251



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 3, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [292.78732, 293.05704, 314.22647]
time:  125.02721786499023



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 3, lr = 0.001
Early stopping at best epoch

Early stopping at best epoch:  89
best_loss = [39.888763, 12.996736, 150.3829]
time:  24.360878705978394



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 3, lr = 0.001
Early stopping at best epoch:  407
best_loss = [35.38578, 11.897153, 105.35777]
time:  37.60534334182739



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 3, lr = 0.01
Early stopping at best epoch:  113
best_loss = [47.120583, 13.219053, 103.37961]
time:  24.70645260810852



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 3, lr = 0.01
Early stopping at best epoch:  147
best_loss = [60.876064, 12.45855, 62.355156]
time:  27.44455599784851



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 3, lr = 0.01
Early stopping at best epoch:  368
best_loss = [27.228388, 12.801936, 129.34767]
time:  38.6567223072052



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 3, lr = 0.01
Early stopping at best epoch:  79
best_loss = [22.749489, 

Early stopping at best epoch:  1414
best_loss = [54.75804, 13.2489805, 96.9507]
time:  83.49617743492126



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  938
best_loss = [29.38669, 20.182493, 95.838036]
time:  61.67370104789734



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1895
best_loss = [36.661335, 11.379465, 136.23]
time:  104.43706798553467



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.001
Early stopping at best epoch:  532
best_loss = [46.50645, 6.8059626, 86.9898]
time:  44.0484676361084



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.001
Early stopping at best epoch:  151
best_loss = [43.605545, 14.152275, 108.55205]
time:  28.248472452163696



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.001
Early stopping at best epoch:  368
best_loss = [23.147

Early stopping at best epoch:  173
best_loss = [29.287472, 13.143114, 144.25116]
time:  30.20868754386902



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.01
Early stopping at best epoch:  86
best_loss = [63.324696, 7.6868377, 84.1173]
time:  25.850192546844482



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.01
Early stopping at best epoch:  141
best_loss = [49.66373, 10.233472, 97.11534]
time:  28.37588119506836



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  67
best_loss = [66.07692, 10.113034, 96.42492]
time:  25.266980409622192



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1205
best_loss = [58.29167, 17.592102, 94.329704]
time:  79.0470654964447



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1078
best_loss = [83.430084

Early stopping at best epoch:  786
best_loss = [31.133013, 15.413713, 147.06491]
time:  52.39836835861206



rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 7, lr = 0.001
Early stopping at best epoch:  189
best_loss = [53.818718, 11.784113, 86.522705]
time:  27.50518274307251



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 7, lr = 0.001
Early stopping at best epoch:  375
best_loss = [46.640438, 10.293264, 108.81723]
time:  35.22942066192627



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 7, lr = 0.001
Early stopping at best epoch:  189
best_loss = [42.67016, 10.070509, 87.797356]
time:  28.061315298080444



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  516
best_loss = [51.460167, 12.029634, 86.05437]
time:  40.897485971450806



rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 7, lr = 0.01
Early stopping at best epoch:  24
best_loss = [55.6

Early stopping at best epoch:  197
best_loss = [23.132591, 12.1238985, 90.84669]
time:  29.706825971603394



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 7, lr = 0.01
Early stopping at best epoch:  369
best_loss = [21.083225, 11.3704, 70.6359]
time:  37.41410183906555



Pooling: 2 layers, Nonpooling: 1 layers, split by 0.5 min
rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1346
best_loss = [50.540325, 15.16734, 94.65022]
time:  78.22383332252502



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1202
best_loss = [40.135784, 8.306749, 121.54541]
time:  74.19723176956177



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1536
best_loss = [33.53288, 9.516222, 137.7106]
time:  87.76989030838013



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.

Early stopping at best epoch:  125
best_loss = [32.53965, 10.291215, 96.07287]
time:  27.174994468688965



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.001
Early stopping at best epoch:  242
best_loss = [59.282997, 11.104974, 88.53053]
time:  32.37597918510437



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.001
Early stopping at best epoch:  132
best_loss = [55.348934, 11.676276, 87.53027]
time:  27.56169366836548



rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.01
Early stopping at best epoch:  131
best_loss = [54.90959, 8.690185, 134.63239]
time:  27.284446239471436



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.01
Early stopping at best epoch:  120
best_loss = [42.723186, 12.653051, 103.64422]
time:  27.11059832572937



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.01
Early stopping at best epoch:  125
best_loss = [31.286942

Early stopping at best epoch:  1021
best_loss = [47.189495, 22.323, 108.6195]
time:  61.18641781806946



rnn = LSTM, bi = False, hid per head = 7, num_heads = 3, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1066
best_loss = [33.340755, 11.912889, 148.40723]
time:  63.22582006454468



rnn = LSTM, bi = False, hid per head = 7, num_heads = 4, num_fil = 5, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [292.6807, 292.9484, 314.1221]
time:  124.27333211898804



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  719
best_loss = [39.88514, 15.794939, 93.83938]
time:  50.60563111305237



rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 5, lr = 0.001
Early stopping at best epoch:  141
best_loss = [37.17739, 13.951705, 123.50683]
time:  25.815876960754395



rnn = LSTM, bi = False, hid per head = 7, num_heads = 3, num_fil = 5, lr = 0.001
Early stopping at best epoch:  407
best_loss =

Early stopping at best epoch:  141
best_loss = [29.781033, 10.706386, 153.33679]
time:  27.645228385925293



rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 5, lr = 0.01
Early stopping at best epoch:  144
best_loss = [45.4246, 11.037467, 107.40285]
time:  26.836050033569336



rnn = LSTM, bi = False, hid per head = 7, num_heads = 3, num_fil = 5, lr = 0.01
Early stopping at best epoch:  402
best_loss = [34.362823, 11.620178, 109.09455]
time:  38.186416149139404



rnn = LSTM, bi = False, hid per head = 7, num_heads = 4, num_fil = 5, lr = 0.01
Early stopping at best epoch:  48
best_loss = [45.196823, 11.968176, 83.703156]
time:  23.203757286071777



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  143
best_loss = [40.53321, 11.804123, 85.12872]
time:  27.629521369934082



rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1346
best_loss = [53.4

Early stopping at best epoch:  928
best_loss = [46.839157, 13.976768, 116.937065]
time:  61.3969566822052



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  891
best_loss = [27.47905, 19.073961, 112.00597]
time:  61.31925821304321



rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 7, lr = 0.001
Early stopping at best epoch:  189
best_loss = [50.743015, 9.497233, 100.289116]
time:  29.42779803276062



rnn = LSTM, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.001
Early stopping at best epoch:  424
best_loss = [22.711882, 10.691069, 124.74328]
time:  39.48838233947754



rnn = LSTM, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.001
Early stopping at best epoch:  143
best_loss = [39.463257, 14.038333, 127.19855]
time:  27.637767791748047



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  141
best_loss = [5

Early stopping at best epoch:  60
best_loss = [52.282276, 14.243682, 100.56992]
time:  25.38365364074707



rnn = LSTM, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.01
Early stopping at best epoch:  133
best_loss = [39.73789, 12.030395, 116.61112]
time:  28.749021291732788



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.01
Early stopping at best epoch:  42
best_loss = [44.87402, 9.844516, 103.46021]
time:  24.944985151290894



Pooling: 1 layers, Nonpooling: 1 layers, split by 0.5 min
rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  928
best_loss = [52.376877, 13.752308, 142.02866]
time:  53.20354723930359



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 3, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [292.51743, 292.782, 314.03436]
time:  114.4052324295044



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 3, lr =

Ends at final epoch 3000
Best epoch: 3000
best_loss = [274.31766, 274.21207, 296.16837]
time:  123.22544360160828



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  270
best_loss = [16.997105, 16.21916, 114.971756]
time:  29.991151332855225



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 3, lr = 0.001
Early stopping at best epoch:  1083
best_loss = [12.878084, 16.971087, 155.47394]
time:  63.0728075504303



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 3, lr = 0.001
Early stopping at best epoch:  1308
best_loss = [10.33318, 16.202168, 120.56306]
time:  72.46794176101685



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 3, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.224075, 100.48538, 139.75116]
time:  122.04536294937134



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 3, lr = 0.01
Early stopping at best epoch:  20
best_

Early stopping at best epoch:  42
best_loss = [50.575184, 11.730626, 92.105156]
time:  22.117798805236816



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1998
best_loss = [53.4225, 13.5320015, 83.1728]
time:  100.70015239715576



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1478
best_loss = [38.521633, 15.200537, 80.812836]
time:  79.4128999710083



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1536
best_loss = [35.587242, 10.579992, 111.08801]
time:  82.63312578201294



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1210
best_loss = [36.62476, 17.741621, 122.45184]
time:  70.20071911811829



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.001
Early stopping at best epoch:  358
best_loss = [55.

Early stopping at best epoch:  85
best_loss = [54.980377, 12.043597, 113.26788]
time:  25.29327154159546



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.01
Early stopping at best epoch:  34
best_loss = [59.09562, 12.061275, 112.67119]
time:  22.781967878341675



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.01
Early stopping at best epoch:  30
best_loss = [45.570564, 12.161168, 82.69802]
time:  22.93889880180359



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.01
Early stopping at best epoch:  112
best_loss = [34.842743, 15.881999, 86.93652]
time:  26.635443925857544



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  251
best_loss = [16.855371, 9.220845, 103.70851]
time:  32.97000527381897



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [293.0959

Early stopping at best epoch:  548
best_loss = [33.991478, 12.531324, 112.13838]
time:  40.760236740112305



rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 7, lr = 0.001
Early stopping at best epoch:  178
best_loss = [45.6136, 11.921942, 100.79576]
time:  25.96053647994995



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 7, lr = 0.001
Early stopping at best epoch:  234
best_loss = [27.648394, 16.90382, 109.24013]
time:  28.311761379241943



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 7, lr = 0.001
Early stopping at best epoch:  60
best_loss = [52.667873, 14.37145, 100.29462]
time:  21.635002613067627



rnn = GRU, bi = False, hid per head = 5, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  143
best_loss = [23.627522, 14.449531, 128.4084]
time:  24.768872499465942



rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 7, lr = 0.01
Early stopping at best epoch:  11
best_loss = [57.83929, 1

Early stopping at best epoch:  11
best_loss = [55.30977, 13.069884, 98.65978]
time:  21.571352005004883



Pooling: 2 layers, Nonpooling: 1 layers, split by 0.5 min
rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1850
best_loss = [41.252224, 17.064926, 116.200775]
time:  97.64797496795654



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1254
best_loss = [19.1345, 11.187129, 141.86237]
time:  74.32752728462219



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1504
best_loss = [29.650614, 22.06928, 105.52541]
time:  83.5682168006897



rnn = GRU, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1805
best_loss = [30.45673, 19.83019, 101.23296]
time:  97.44911909103394



rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.

Early stopping at best epoch:  141
best_loss = [34.072353, 17.03323, 115.19527]
time:  27.226837873458862



rnn = GRU, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.001
Early stopping at best epoch:  1327
best_loss = [13.214514, 12.884148, 147.56548]
time:  78.15761733055115



rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.01
Early stopping at best epoch:  60
best_loss = [53.885292, 16.136787, 117.41764]
time:  23.57867932319641



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.01
Early stopping at best epoch:  141
best_loss = [30.528637, 14.429399, 103.07704]
time:  27.322540521621704



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.01
Early stopping at best epoch:  178
best_loss = [18.436577, 18.404589, 96.59306]
time:  28.964354991912842



rnn = GRU, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.01
Early stopping at best epoch:  605
best_loss = [22.914478,

Ends at final epoch 3000
Best epoch: 3000
best_loss = [292.87622, 293.14755, 314.31348]
time:  124.02869915962219



rnn = GRU, bi = False, hid per head = 7, num_heads = 5, num_fil = 5, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [293.23093, 293.5089, 314.66064]
time:  126.42297267913818



rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 5, lr = 0.001
Early stopping at best epoch:  407
best_loss = [15.385337, 13.247798, 127.64828]
time:  35.37956929206848



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 5, lr = 0.001
Early stopping at best epoch:  143
best_loss = [25.413584, 11.623784, 130.79637]
time:  25.3588604927063



rnn = GRU, bi = False, hid per head = 7, num_heads = 4, num_fil = 5, lr = 0.001
Ends at final epoch 3000
Best epoch: 2601
best_loss = [24.443542, 15.663587, 83.79991]
time:  123.12161374092102



rnn = GRU, bi = False, hid per head = 7, num_heads = 5, num_fil = 5, lr = 0.001
Ends at final epoch 3000
Best epo

Early stopping at best epoch:  42
best_loss = [42.540245, 14.816977, 75.48359]
time:  22.255109310150146



rnn = GRU, bi = False, hid per head = 7, num_heads = 4, num_fil = 5, lr = 0.01
Early stopping at best epoch:  424
best_loss = [15.524792, 14.569611, 61.059906]
time:  37.78893494606018



rnn = GRU, bi = False, hid per head = 7, num_heads = 5, num_fil = 5, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.52556, 100.83624, 140.02643]
time:  129.8888237476349



rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1633
best_loss = [45.917217, 10.063094, 87.390114]
time:  88.6267466545105



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1616
best_loss = [55.861607, 17.93778, 86.719795]
time:  88.70292162895203



rnn = GRU, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  2122
best_loss = 

Early stopping at best epoch:  60
best_loss = [55.19683, 18.005775, 128.52846]
time:  23.02741003036499



rnn = GRU, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [274.712, 274.61484, 296.5533]
time:  129.63268995285034



rnn = GRU, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  452
best_loss = [13.882304, 15.324803, 145.8015]
time:  39.89672040939331



rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 7, lr = 0.01
Early stopping at best epoch:  11
best_loss = [55.085842, 13.344646, 93.21451]
time:  20.954328060150146



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.01
Early stopping at best epoch:  139
best_loss = [26.876242, 11.785356, 87.74491]
time:  26.470223426818848



rnn = GRU, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss = [11

Early stopping at best epoch:  2030
best_loss = [34.732742, 16.262602, 155.92352]
time:  93.59455156326294



rnn = RNN, bi = False, hid per head = 3, num_heads = 3, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1375
best_loss = [31.490892, 13.735051, 95.503265]
time:  71.79204964637756



rnn = RNN, bi = False, hid per head = 3, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  2120
best_loss = [33.705624, 9.887573, 103.16684]
time:  96.5133593082428



rnn = RNN, bi = False, hid per head = 3, num_heads = 5, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1536
best_loss = [22.77257, 16.53315, 123.114494]
time:  75.04931282997131



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 3, lr = 0.001
Early stopping at best epoch:  879
best_loss = [52.941147, 10.136474, 134.11024]
time:  50.99398589134216



rnn = RNN, bi = False, hid per head = 3, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  141
best_loss = [39.

Ends at final epoch 3000
Best epoch: 3000
best_loss = [274.508, 274.40646, 296.3542]
time:  125.78943729400635



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 3, lr = 0.01
Early stopping at best epoch:  215
best_loss = [27.620026, 10.27574, 86.06405]
time:  28.934576749801636



rnn = RNN, bi = False, hid per head = 3, num_heads = 3, num_fil = 3, lr = 0.01
Early stopping at best epoch:  143
best_loss = [26.902456, 11.942418, 84.03805]
time:  25.55793809890747



rnn = RNN, bi = False, hid per head = 3, num_heads = 4, num_fil = 3, lr = 0.01
Early stopping at best epoch:  22
best_loss = [54.664356, 12.130253, 79.54077]
time:  20.944993019104004



rnn = RNN, bi = False, hid per head = 3, num_heads = 5, num_fil = 3, lr = 0.01
Early stopping at best epoch:  1271
best_loss = [17.229256, 11.411775, 151.18288]
time:  72.92179274559021



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1332
best_loss = [52.945

Early stopping at best epoch:  1241
best_loss = [48.779976, 15.821945, 102.792625]
time:  71.24195432662964



rnn = RNN, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1514
best_loss = [37.332985, 19.238836, 124.21532]
time:  82.4526948928833



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.001
Early stopping at best epoch:  1524
best_loss = [18.924854, 17.297474, 113.84367]
time:  82.09699821472168



rnn = RNN, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.001
Early stopping at best epoch:  375
best_loss = [41.831024, 9.501655, 110.7922]
time:  35.49233341217041



rnn = RNN, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.001
Early stopping at best epoch:  208
best_loss = [33.726383, 12.188295, 81.60489]
time:  28.800779819488525



rnn = RNN, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.001
Early stopping at best epoch:  213
best_loss = [31.65

Early stopping at best epoch:  54
best_loss = [40.439083, 14.268433, 81.949104]
time:  23.665757417678833



rnn = RNN, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  63
best_loss = [57.40389, 11.3657, 100.33915]
time:  24.265096426010132



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  2396
best_loss = [56.831314, 13.633577, 110.32943]
time:  131.3793065547943



rnn = RNN, bi = False, hid per head = 3, num_heads = 3, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1056
best_loss = [51.43286, 18.711567, 84.235054]
time:  69.09979677200317



rnn = RNN, bi = False, hid per head = 3, num_heads = 4, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1113
best_loss = [39.836067, 13.496591, 122.403404]
time:  71.58417534828186



rnn = RNN, bi = False, hid per head = 3, num_heads = 5, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1054
best_loss = [40.

Early stopping at best epoch:  141
best_loss = [40.333496, 15.640208, 127.709564]
time:  23.457834720611572



rnn = RNN, bi = False, hid per head = 5, num_heads = 4, num_fil = 7, lr = 0.001
Early stopping at best epoch:  2164
best_loss = [14.157854, 13.12899, 107.317505]
time:  98.3297667503357



rnn = RNN, bi = False, hid per head = 5, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  189
best_loss = [35.18318, 12.753634, 83.59404]
time:  25.702220678329468



rnn = RNN, bi = False, hid per head = 5, num_heads = 2, num_fil = 7, lr = 0.01
Early stopping at best epoch:  42
best_loss = [54.927696, 10.588733, 77.15459]
time:  19.911625146865845



rnn = RNN, bi = False, hid per head = 5, num_heads = 3, num_fil = 7, lr = 0.01
Early stopping at best epoch:  11
best_loss = [55.21146, 11.704216, 76.68333]
time:  18.85947561264038



rnn = RNN, bi = False, hid per head = 5, num_heads = 4, num_fil = 7, lr = 0.01
Early stopping at best epoch:  178
best_loss = [30.469116, 14

Early stopping at best epoch:  2404
best_loss = [52.608986, 6.739108, 65.73103]
time:  117.25235486030579



rnn = RNN, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1456
best_loss = [41.709637, 12.221053, 104.05107]
time:  80.45551061630249



rnn = RNN, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  2132
best_loss = [16.337313, 8.880835, 93.4034]
time:  108.12894463539124



rnn = RNN, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.0001
Ends at final epoch 3000
Best epoch: 2678
best_loss = [19.977045, 8.355498, 102.88964]
time:  125.68508744239807



rnn = RNN, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.001
Early stopping at best epoch:  480
best_loss = [16.067509, 13.321605, 188.5393]
time:  39.6814501285553



rnn = RNN, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  220
best_loss = 

Early stopping at best epoch:  141
best_loss = [47.64026, 10.082298, 94.983086]
time:  26.867309093475342



rnn = RNN, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.01
Early stopping at best epoch:  122
best_loss = [25.740793, 14.009252, 111.679955]
time:  26.247398376464844



rnn = RNN, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.01
Early stopping at best epoch:  42
best_loss = [48.44859, 8.332065, 117.00779]
time:  22.820600032806396



rnn = RNN, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.01
Early stopping at best epoch:  156
best_loss = [50.68372, 11.918537, 101.216156]
time:  27.649785041809082



rnn = RNN, bi = False, hid per head = 5, num_heads = 2, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1718
best_loss = [77.451546, 15.714901, 88.88047]
time:  95.91449069976807



rnn = RNN, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1364
best_loss = [35.669

Early stopping at best epoch:  141
best_loss = [36.563053, 9.90543, 113.26324]
time:  24.209962129592896



rnn = RNN, bi = False, hid per head = 7, num_heads = 3, num_fil = 5, lr = 0.001
Early stopping at best epoch:  373
best_loss = [20.16798, 8.978147, 107.29134]
time:  32.97734189033508



rnn = RNN, bi = False, hid per head = 7, num_heads = 4, num_fil = 5, lr = 0.001
Early stopping at best epoch:  197
best_loss = [37.093307, 10.559685, 87.5478]
time:  26.380743503570557



rnn = RNN, bi = False, hid per head = 7, num_heads = 5, num_fil = 5, lr = 0.001
Early stopping at best epoch:  141
best_loss = [29.380562, 11.065571, 131.11655]
time:  24.908804655075073



rnn = RNN, bi = False, hid per head = 7, num_heads = 2, num_fil = 5, lr = 0.01
Early stopping at best epoch:  178
best_loss = [14.62515, 13.548749, 75.03941]
time:  25.482633352279663



rnn = RNN, bi = False, hid per head = 7, num_heads = 3, num_fil = 5, lr = 0.01
Early stopping at best epoch:  131
best_loss = [33.980076, 16

Early stopping at best epoch:  1246
best_loss = [38.61279, 13.455952, 94.22023]
time:  70.69485974311829



rnn = RNN, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  764
best_loss = [43.52708, 24.707077, 123.95493]
time:  51.14958953857422



rnn = RNN, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1150
best_loss = [36.09096, 18.558996, 108.880554]
time:  66.59529280662537



rnn = RNN, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  548
best_loss = [34.024624, 16.272041, 102.45884]
time:  43.47642683982849



rnn = RNN, bi = False, hid per head = 7, num_heads = 2, num_fil = 7, lr = 0.001
Early stopping at best epoch:  318
best_loss = [32.653057, 6.2122617, 132.60765]
time:  32.5795202255249



rnn = RNN, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.001
Early stopping at best epoch:  120
best_loss = [46.706

Early stopping at best epoch:  32
best_loss = [51.116123, 12.568385, 103.88779]
time:  20.346134185791016



rnn = RNN, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.01
Early stopping at best epoch:  189
best_loss = [19.361948, 10.060338, 133.9531]
time:  26.61685872077942



rnn = RNN, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.01
Early stopping at best epoch:  464
best_loss = [27.724642, 9.459362, 93.99102]
time:  36.72435021400452



rnn = RNN, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.01
Early stopping at best epoch:  42
best_loss = [44.87148, 14.958919, 82.29376]
time:  21.042449235916138



Pooling: 2 layers, Nonpooling: 2 layers, split by 0.5 min
rnn = RNN, bi = False, hid per head = 7, num_heads = 2, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1364
best_loss = [47.29916, 17.690025, 92.66494]
time:  72.84034323692322



rnn = RNN, bi = False, hid per head = 7, num_heads = 3, num_fil = 3, lr = 0.0001
Earl

Early stopping at best epoch:  760
best_loss = [43.67165, 105.283485, 125.471016]
time:  52.252262592315674



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 3, lr = 0.001
Early stopping at best epoch:  376
best_loss = [38.20159, 9.88959, 95.01507]
time:  35.57586932182312



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  150
best_loss = [50.46831, 13.979034, 110.64772]
time:  26.464714765548706



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 3, lr = 0.001
Early stopping at best epoch:  93
best_loss = [41.75882, 22.333082, 93.01924]
time:  24.30686664581299



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 3, lr = 0.001
Early stopping at best epoch:  1254
best_loss = [16.276117, 20.414644, 89.63068]
time:  72.02928614616394



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 3, lr = 0.01
Early stopping at best epoch:  27
best_loss = [50.899582

Early stopping at best epoch:  1386
best_loss = [7.569095, 7.879769, 89.72697]
time:  79.7715950012207



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1737
best_loss = [55.034794, 10.349279, 86.6162]
time:  94.76298022270203



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1576
best_loss = [55.635475, 41.31752, 56.32174]
time:  88.26190280914307



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  875
best_loss = [43.514412, 11.291793, 107.70615]
time:  59.09828495979309



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  416
best_loss = [50.740303, 29.317524, 103.47864]
time:  39.37926650047302



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.001
Early stopping at best epoch:  453
best_loss = [38.

Early stopping at best epoch:  93
best_loss = [47.81679, 13.124716, 74.282005]
time:  25.728895664215088



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.01
Early stopping at best epoch:  93
best_loss = [37.18428, 21.492523, 59.804317]
time:  25.426487684249878



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.01
Early stopping at best epoch:  250
best_loss = [33.221485, 15.946345, 58.738495]
time:  32.19143486022949



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.01
Early stopping at best epoch:  11
best_loss = [61.77453, 24.671099, 74.41185]
time:  22.032766342163086



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  27
best_loss = [53.77044, 10.721176, 58.122704]
time:  23.012542724609375



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1272
best_loss = [55.523075

Early stopping at best epoch:  404
best_loss = [50.432716, 22.298777, 97.9663]
time:  42.32395052909851



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 7, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [293.3236, 293.60333, 314.75137]
time:  143.57323050498962



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.001
Early stopping at best epoch:  95
best_loss = [61.722305, 27.824993, 69.387634]
time:  27.762983322143555



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 7, lr = 0.001
Early stopping at best epoch:  215
best_loss = [46.746742, 12.804648, 83.79567]
time:  33.15763330459595



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 7, lr = 0.001
Early stopping at best epoch:  38
best_loss = [62.740288, 28.672688, 76.39896]
time:  25.002991914749146



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 7, lr = 0.001
Ends at final epoch 3000
Best epoch: 3000
best

Early stopping at best epoch:  27
best_loss = [47.09938, 10.343178, 66.40452]
time:  15.952495574951172



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 7, lr = 0.01
Early stopping at best epoch:  27
best_loss = [53.622868, 14.101971, 73.94684]
time:  15.947832584381104



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 7, lr = 0.01
Early stopping at best epoch:  48
best_loss = [45.236977, 15.352632, 89.31774]
time:  17.854759693145752



Pooling: 1 layers, Nonpooling: 2 layers, split by 0.5 min
rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1582
best_loss = [46.297924, 9.965479, 104.11586]
time:  85.55792546272278



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1577
best_loss = [44.037647, 13.779048, 70.397064]
time:  84.99826788902283



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0

Early stopping at best epoch:  1447
best_loss = [28.85278, 35.39615, 127.528595]
time:  80.5927791595459



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  299
best_loss = [39.664177, 14.914116, 73.06536]
time:  33.221792697906494



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [274.83984, 274.74542, 296.67813]
time:  129.53335237503052



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.001
Early stopping at best epoch:  549
best_loss = [21.186893, 13.147599, 98.083046]
time:  44.33867955207825



rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.01
Early stopping at best epoch:  11
best_loss = [65.30643, 15.140761, 71.50409]
time:  21.367438077926636



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.01
Early stopping at best epoch:  20
best_loss = [

Early stopping at best epoch:  1521
best_loss = [48.18978, 12.3456545, 77.562386]
time:  91.07974457740784



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [292.73306, 293.00168, 314.17334]
time:  141.720618724823



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 5, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [292.65475, 292.92194, 314.0967]
time:  141.18601250648499



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1235
best_loss = [43.78572, 10.897391, 77.6273]
time:  79.52380919456482



rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 5, lr = 0.001
Early stopping at best epoch:  694
best_loss = [34.733963, 12.152986, 76.261734]
time:  53.290061473846436



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.001
Ends at final epoch 3000
Best epoc

Early stopping at best epoch:  965
best_loss = [11.345829, 7.853653, 88.06716]
time:  62.179481744766235



rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 5, lr = 0.01
Early stopping at best epoch:  57
best_loss = [45.648895, 22.910025, 74.82305]
time:  22.5716609954834



rnn = LSTM, bi = False, hid per head = 7, num_heads = 3, num_fil = 5, lr = 0.01
Early stopping at best epoch:  23
best_loss = [53.796234, 21.894651, 84.77613]
time:  21.50146245956421



rnn = LSTM, bi = False, hid per head = 7, num_heads = 4, num_fil = 5, lr = 0.01
Early stopping at best epoch:  42
best_loss = [47.424847, 14.81511, 56.147785]
time:  22.32264733314514



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  93
best_loss = [41.046577, 14.96549, 78.24716]
time:  25.21867036819458



rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  416
best_loss = [44.717777, 18.06

Early stopping at best epoch:  781
best_loss = [42.7632, 11.970629, 62.72128]
time:  54.62347674369812



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  404
best_loss = [45.641846, 29.987564, 98.3927]
time:  40.68281364440918



rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 7, lr = 0.001
Early stopping at best epoch:  72
best_loss = [62.038624, 13.206418, 63.712414]
time:  23.89845585823059



rnn = LSTM, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.001
Early stopping at best epoch:  62
best_loss = [49.45024, 18.06821, 113.374695]
time:  23.615044593811035



rnn = LSTM, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.001
Early stopping at best epoch:  64
best_loss = [56.698853, 13.394502, 77.866905]
time:  23.99359369277954



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  49
best_loss = [46.584, 23

Early stopping at best epoch:  464
best_loss = [11.428119, 13.830231, 81.129776]
time:  41.11013317108154



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.01
Early stopping at best epoch:  367
best_loss = [14.958093, 15.709445, 103.56339]
time:  38.72930908203125



Pooling: 2 layers, Nonpooling: 2 layers, split by 0.5 min
rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1817
best_loss = [59.302162, 11.700918, 69.694405]
time:  99.9957959651947



rnn = LSTM, bi = False, hid per head = 7, num_heads = 3, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1171
best_loss = [34.547832, 6.455473, 106.60746]
time:  73.32536149024963



rnn = LSTM, bi = False, hid per head = 7, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  760
best_loss = [36.59304, 31.741951, 119.00692]
time:  55.08875751495361



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 3, lr

Early stopping at best epoch:  125
best_loss = [39.98245, 14.239021, 105.310135]
time:  24.147119522094727



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 3, lr = 0.001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [274.56512, 274.4648, 296.40994]
time:  119.3877227306366



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 3, lr = 0.001
Early stopping at best epoch:  89
best_loss = [39.644794, 13.678607, 94.50055]
time:  22.96345329284668



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 3, lr = 0.01
Early stopping at best epoch:  527
best_loss = [17.992685, 12.372481, 54.288395]
time:  39.50341010093689



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 3, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.18042, 100.434555, 139.71129]
time:  122.1115345954895



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 3, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
bes

Early stopping at best epoch:  639
best_loss = [54.513428, 14.806229, 96.627174]
time:  46.1785249710083



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  888
best_loss = [40.87334, 19.822134, 58.288094]
time:  57.409366846084595



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  700
best_loss = [50.59502, 23.368748, 81.779366]
time:  50.14700484275818



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.001
Early stopping at best epoch:  355
best_loss = [51.793858, 16.253584, 78.84806]
time:  34.855947971343994



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.001
Early stopping at best epoch:  57
best_loss = [58.438038, 14.992229, 96.27289]
time:  23.1391704082489



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.001
Early stopping at best epoch:  55
best_loss = [46.14178, 1

Early stopping at best epoch:  188
best_loss = [13.792516, 17.731562, 65.6756]
time:  29.299741744995117



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.01
Early stopping at best epoch:  20
best_loss = [52.765167, 19.046677, 64.85741]
time:  22.282562494277954



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  20
best_loss = [50.950447, 10.630571, 64.042816]
time:  22.565142154693604



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  2217
best_loss = [45.6297, 14.515865, 65.250786]
time:  117.81588172912598



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  875
best_loss = [41.586555, 14.494602, 88.144684]
time:  59.80895161628723



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  517
best_loss = [49.88080

Early stopping at best epoch:  54
best_loss = [48.75894, 16.135893, 84.755424]
time:  26.236963510513306



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 7, lr = 0.001
Early stopping at best epoch:  105
best_loss = [46.48677, 13.3809805, 80.1176]
time:  28.17783212661743



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  139
best_loss = [52.91073, 9.57094, 91.87124]
time:  29.74007225036621



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.41448, 100.70702, 139.925]
time:  146.27560448646545



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 7, lr = 0.01
Early stopping at best epoch:  105
best_loss = [31.038525, 16.43735, 81.21101]
time:  28.19258975982666



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 7, lr = 0.01
Early stopping at best epoch:  220
best_loss = [19.120588, 19

Early stopping at best epoch:  613
best_loss = [44.979507, 10.830133, 71.853745]
time:  47.351481914520264



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  797
best_loss = [41.73033, 31.87181, 117.289764]
time:  54.60899376869202



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1435
best_loss = [29.464716, 15.533416, 73.379166]
time:  81.71670579910278



rnn = GRU, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1045
best_loss = [37.519993, 64.094345, 97.06688]
time:  65.67948842048645



rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.001
Early stopping at best epoch:  154
best_loss = [46.152008, 25.979137, 69.63578]
time:  27.566200256347656



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  105
best_loss = [47.

Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.71454, 101.05597, 140.199]
time:  130.75151371955872



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.01
Early stopping at best epoch:  27
best_loss = [44.54272, 17.124138, 63.44313]
time:  22.497905492782593



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.01
Early stopping at best epoch:  245
best_loss = [20.266712, 15.458163, 68.61156]
time:  32.30162286758423



rnn = GRU, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.01
Early stopping at best epoch:  42
best_loss = [40.63962, 11.033506, 89.77092]
time:  23.211522102355957



rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 5, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [293.42575, 293.70743, 314.85135]
time:  133.02308535575867



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  609
best_loss = 

Early stopping at best epoch:  544
best_loss = [14.968322, 11.781099, 72.35301]
time:  47.75444459915161



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.001
Early stopping at best epoch:  625
best_loss = [14.319868, 23.089243, 84.24749]
time:  51.82598876953125



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 5, lr = 0.001
Early stopping at best epoch:  606
best_loss = [13.390785, 16.203753, 85.54642]
time:  50.774155616760254



rnn = GRU, bi = False, hid per head = 5, num_heads = 5, num_fil = 5, lr = 0.001
Early stopping at best epoch:  93
best_loss = [43.80995, 25.351856, 93.446365]
time:  27.505724668502808



rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 5, lr = 0.01
Early stopping at best epoch:  618
best_loss = [61.66713, 26.744545, 92.3951]
time:  51.45910286903381



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.01
Early stopping at best epoch:  205
best_loss = [15.668952, 13.3

Early stopping at best epoch:  609
best_loss = [43.776287, 34.138325, 70.26299]
time:  46.9726459980011



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  517
best_loss = [50.799503, 22.501, 65.16731]
time:  42.5585458278656



rnn = GRU, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1690
best_loss = [32.757626, 33.969402, 98.30898]
time:  91.23477411270142



rnn = GRU, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  404
best_loss = [48.213264, 79.32457, 106.626884]
time:  38.28720474243164



rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 7, lr = 0.001
Early stopping at best epoch:  55
best_loss = [44.837406, 54.99874, 71.45798]
time:  22.922951221466064



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.001
Early stopping at best epoch:  105
best_loss = [43.436016, 37

Early stopping at best epoch:  10
best_loss = [56.55814, 15.941717, 116.94967]
time:  21.905890703201294



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.01
Early stopping at best epoch:  26
best_loss = [49.352512, 13.752564, 77.616264]
time:  22.6357159614563



rnn = GRU, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.01
Early stopping at best epoch:  27
best_loss = [46.673214, 17.139627, 70.27671]
time:  22.61771821975708



rnn = GRU, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.01
Early stopping at best epoch:  58
best_loss = [37.73445, 11.657405, 57.84037]
time:  24.19082736968994



Pooling: 2 layers, Nonpooling: 1 layers, split by 0.5 min
rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  718
best_loss = [47.6264, 28.59333, 98.89649]
time:  52.243370056152344



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 3, lr = 0.0001
Early sto

Early stopping at best epoch:  941
best_loss = [39.218487, 50.233696, 60.249672]
time:  68.07813000679016



rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 3, lr = 0.001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [274.49448, 274.39264, 296.34097]
time:  140.98250484466553



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  129
best_loss = [55.61587, 14.303492, 73.54248]
time:  28.881328105926514



rnn = GRU, bi = False, hid per head = 7, num_heads = 4, num_fil = 3, lr = 0.001
Early stopping at best epoch:  93
best_loss = [46.83915, 27.556715, 91.517426]
time:  27.556451559066772



rnn = GRU, bi = False, hid per head = 7, num_heads = 5, num_fil = 3, lr = 0.001
Early stopping at best epoch:  96
best_loss = [47.659527, 12.871379, 84.71636]
time:  27.769372701644897



rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 3, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss

Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.19443, 100.450874, 139.72409]
time:  128.6402096748352



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [292.97818, 293.25146, 314.41327]
time:  129.87139248847961



rnn = RNN, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  661
best_loss = [42.604736, 25.12226, 104.23889]
time:  49.44992136955261



rnn = RNN, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [293.17404, 293.45102, 314.605]
time:  128.09822297096252



rnn = RNN, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  684
best_loss = [36.29701, 20.950426, 69.716576]
time:  49.67816185951233



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.001
Ends at final epoch 3000
Best e

Early stopping at best epoch:  81
best_loss = [44.865063, 12.496412, 82.515854]
time:  25.737168788909912



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.01
Early stopping at best epoch:  30
best_loss = [55.30687, 20.957186, 80.44031]
time:  23.57691478729248



rnn = RNN, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.01
Early stopping at best epoch:  26
best_loss = [41.679802, 19.648916, 80.814224]
time:  23.065863847732544



rnn = RNN, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.01
Early stopping at best epoch:  62
best_loss = [37.934414, 17.592459, 92.174835]
time:  24.548604011535645



rnn = RNN, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  93
best_loss = [47.13429, 17.598543, 79.37117]
time:  26.13922142982483



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  846
best_loss = [55.87708, 12.428

Early stopping at best epoch:  1384
best_loss = [34.086674, 14.023486, 94.71575]
time:  87.29836392402649



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.001
Early stopping at best epoch:  61
best_loss = [56.068962, 22.064188, 88.91339]
time:  25.18319082260132



rnn = RNN, bi = False, hid per head = 3, num_heads = 3, num_fil = 7, lr = 0.001
Early stopping at best epoch:  233
best_loss = [46.636196, 11.493831, 101.72155]
time:  32.91881060600281



rnn = RNN, bi = False, hid per head = 3, num_heads = 4, num_fil = 7, lr = 0.001
Early stopping at best epoch:  93
best_loss = [44.154606, 19.806095, 83.6913]
time:  26.381049394607544



rnn = RNN, bi = False, hid per head = 3, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  376
best_loss = [40.053394, 15.204732, 81.145836]
time:  39.46883463859558



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.01
Early stopping at best epoch:  15
best_loss = [63.521847, 16

Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.31986, 100.59689, 139.8386]
time:  146.3172526359558



Pooling: 1 layers, Nonpooling: 1 layers, split by 0.5 min
rnn = RNN, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1133
best_loss = [46.079502, 15.730426, 110.53433]
time:  65.78997159004211



rnn = RNN, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [292.55722, 292.82257, 314.00128]
time:  123.92870330810547



rnn = RNN, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  831
best_loss = [39.85904, 55.015663, 69.41766]
time:  56.04864144325256



rnn = RNN, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1284
best_loss = [42.05238, 57.632374, 150.83473]
time:  75.6328980922699



rnn = RNN, bi = False, hid per head = 5, num_heads = 2, num_f

Ends at final epoch 3000
Best epoch: 3000
best_loss = [275.0818, 274.99255, 296.91434]
time:  133.56397914886475



rnn = RNN, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [274.52872, 274.42764, 296.37442]
time:  133.4503879547119



rnn = RNN, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.01
Early stopping at best epoch:  542
best_loss = [8.424295, 14.124002, 82.796974]
time:  44.66548442840576



rnn = RNN, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.01
Early stopping at best epoch:  569
best_loss = [18.175852, 13.608533, 124.66275]
time:  46.1354615688324



rnn = RNN, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.81637, 101.17434, 140.29202]
time:  134.05551290512085



rnn = RNN, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.01
Ends at final epoch 3000
Best epoch: 30

Early stopping at best epoch:  1202
best_loss = [21.826235, 22.89203, 108.10039]
time:  70.83542323112488



rnn = RNN, bi = False, hid per head = 5, num_heads = 5, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  763
best_loss = [38.648224, 46.224247, 91.793785]
time:  52.08829975128174



rnn = RNN, bi = False, hid per head = 5, num_heads = 2, num_fil = 5, lr = 0.001
Early stopping at best epoch:  238
best_loss = [43.16507, 16.280886, 80.369286]
time:  29.962658166885376



rnn = RNN, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [274.5074, 274.40582, 296.35355]
time:  126.59118628501892



rnn = RNN, bi = False, hid per head = 5, num_heads = 4, num_fil = 5, lr = 0.001
Early stopping at best epoch:  318
best_loss = [20.802921, 15.768035, 102.79389]
time:  33.607540130615234



rnn = RNN, bi = False, hid per head = 5, num_heads = 5, num_fil = 5, lr = 0.001
Early stopping at best epoch:  852
best_loss =

Early stopping at best epoch:  36
best_loss = [56.58584, 26.469551, 87.35284]
time:  23.01796007156372



rnn = RNN, bi = False, hid per head = 5, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  126
best_loss = [20.169666, 17.187504, 72.057434]
time:  27.086217164993286



rnn = RNN, bi = False, hid per head = 5, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1423
best_loss = [43.031307, 10.973685, 84.118324]
time:  84.13770127296448



rnn = RNN, bi = False, hid per head = 5, num_heads = 3, num_fil = 7, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [292.72122, 292.98965, 314.16177]
time:  135.380188703537



rnn = RNN, bi = False, hid per head = 5, num_heads = 4, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1114
best_loss = [39.772854, 20.082966, 106.603264]
time:  71.31237077713013



rnn = RNN, bi = False, hid per head = 5, num_heads = 5, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  508
best_loss 

Early stopping at best epoch:  89
best_loss = [39.875504, 8.599237, 81.20961]
time:  23.224059581756592



rnn = RNN, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  105
best_loss = [40.215244, 17.405172, 77.51195]
time:  24.056167364120483



rnn = RNN, bi = False, hid per head = 7, num_heads = 2, num_fil = 7, lr = 0.01
Early stopping at best epoch:  25
best_loss = [45.33776, 22.981283, 68.98101]
time:  20.383815050125122



rnn = RNN, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.63989, 100.96919, 140.13083]
time:  121.17078518867493



rnn = RNN, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.01
Early stopping at best epoch:  27
best_loss = [46.13826, 16.569714, 82.44363]
time:  20.5558500289917



rnn = RNN, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.01
Early stopping at best epoch:  324
best_loss = [31.19907, 1

Early stopping at best epoch:  1254
best_loss = [30.4909, 25.028008, 126.36867]
time:  72.68823456764221



rnn = RNN, bi = False, hid per head = 7, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  863
best_loss = [37.61978, 31.414867, 114.36008]
time:  56.25613355636597



rnn = RNN, bi = False, hid per head = 7, num_heads = 5, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1213
best_loss = [56.223373, 58.134533, 108.499954]
time:  72.12933444976807



rnn = RNN, bi = False, hid per head = 7, num_heads = 2, num_fil = 3, lr = 0.001
Early stopping at best epoch:  145
best_loss = [38.160694, 64.63715, 96.23726]
time:  25.942782402038574



rnn = RNN, bi = False, hid per head = 7, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  195
best_loss = [26.601788, 22.749992, 67.43206]
time:  28.130857706069946



rnn = RNN, bi = False, hid per head = 7, num_heads = 4, num_fil = 3, lr = 0.001
Early stopping at best epoch:  96
best_loss = [57.8859

Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.68896, 101.02623, 140.17563]
time:  128.52914452552795



rnn = RNN, bi = False, hid per head = 7, num_heads = 4, num_fil = 3, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.38385, 100.671364, 139.89703]
time:  130.44801950454712



rnn = RNN, bi = False, hid per head = 7, num_heads = 5, num_fil = 3, lr = 0.01
Early stopping at best epoch:  27
best_loss = [51.1582, 15.076492, 78.63321]
time:  22.192400455474854



rnn = RNN, bi = False, hid per head = 7, num_heads = 2, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  585
best_loss = [54.97241, 33.279736, 99.79123]
time:  46.781288146972656



rnn = RNN, bi = False, hid per head = 7, num_heads = 3, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1157
best_loss = [36.21681, 24.233461, 98.76803]
time:  72.52186346054077



rnn = RNN, bi = False, hid per head = 7, num_heads = 4, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  958
best_lo

Early stopping at best epoch:  183
best_loss = [60.372066, 17.1761, 103.80564]
time:  30.500941038131714



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.001
Early stopping at best epoch:  57
best_loss = [57.423817, 14.643039, 87.48781]
time:  24.899980306625366



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [274.90997, 274.81708, 296.7466]
time:  140.2753586769104



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.001
Early stopping at best epoch:  65
best_loss = [59.60824, 16.333368, 67.904755]
time:  25.92386245727539



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.01
Early stopping at best epoch:  32
best_loss = [57.41113, 20.478369, 94.7915]
time:  23.73740816116333



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.01
Early stopping at best epoch:  100
best_loss = [52.336

Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.21835, 100.47873, 139.74593]
time:  149.26401114463806



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  667
best_loss = [65.82693, 28.276583, 104.42404]
time:  55.79135775566101



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1290
best_loss = [52.40009, 8.755471, 107.25444]
time:  85.85878372192383



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1104
best_loss = [50.90359, 22.642557, 97.165344]
time:  76.78819799423218



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1294
best_loss = [54.0295, 14.286826, 108.19836]
time:  86.8978819847107



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.001
Early stopping at best epoch:  489
best_lo

Early stopping at best epoch:  152
best_loss = [54.56765, 25.435358, 82.222]
time:  31.711724281311035



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  332
best_loss = [40.02974, 17.923822, 80.41835]
time:  40.546101570129395



rnn = LSTM, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.01
Early stopping at best epoch:  31
best_loss = [65.761665, 24.913267, 106.48475]
time:  25.407062292099



rnn = LSTM, bi = False, hid per head = 3, num_heads = 3, num_fil = 7, lr = 0.01
Early stopping at best epoch:  81
best_loss = [42.644917, 16.18608, 83.86525]
time:  27.820586442947388



rnn = LSTM, bi = False, hid per head = 3, num_heads = 4, num_fil = 7, lr = 0.01
Early stopping at best epoch:  15
best_loss = [60.196003, 25.474539, 87.1657]
time:  24.97551727294922



rnn = LSTM, bi = False, hid per head = 3, num_heads = 5, num_fil = 7, lr = 0.01
Early stopping at best epoch:  299
best_loss = [14.264878, 11.1365

Early stopping at best epoch:  1122
best_loss = [40.913067, 37.00241, 95.74101]
time:  74.58616471290588



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  988
best_loss = [42.02079, 29.486887, 127.132034]
time:  69.07550382614136



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  660
best_loss = [31.470356, 32.553307, 125.48122]
time:  54.22370195388794



rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.001
Early stopping at best epoch:  197
best_loss = [38.47005, 15.074839, 65.473785]
time:  31.728734016418457



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  158
best_loss = [47.57863, 23.111214, 98.772736]
time:  30.942101001739502



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.001
Early stopping at best epoch:  150
best_loss = [5

Early stopping at best epoch:  174
best_loss = [23.75569, 17.069801, 86.73157]
time:  31.74164915084839



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.6037, 100.92711, 140.09778]
time:  147.00125646591187



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.01
Early stopping at best epoch:  13
best_loss = [65.96543, 24.406187, 118.77038]
time:  24.626868724822998



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.01
Early stopping at best epoch:  33
best_loss = [64.41966, 19.345716, 105.98117]
time:  25.833707571029663



rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  1028
best_loss = [57.41182, 7.136947, 92.713005]
time:  73.14995431900024



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_lo

Ends at final epoch 3000
Best epoch: 3000
best_loss = [293.05154, 293.32617, 314.48508]
time:  152.74167823791504



rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 5, lr = 0.001
Early stopping at best epoch:  248
best_loss = [55.16282, 19.310226, 100.756165]
time:  35.68694043159485



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.001
Early stopping at best epoch:  80
best_loss = [69.465996, 16.4389, 92.05608]
time:  27.931804895401



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 5, lr = 0.001
Early stopping at best epoch:  299
best_loss = [48.09002, 9.595387, 101.705315]
time:  38.77063012123108



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 5, lr = 0.001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [274.77118, 274.6753, 296.6111]
time:  152.15382313728333



rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 5, lr = 0.01
Early stopping at best epoch:  126
best_loss

Early stopping at best epoch:  27
best_loss = [54.23565, 18.408264, 108.98414]
time:  27.828387022018433



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  30
best_loss = [58.224564, 19.730371, 86.28273]
time:  28.144904136657715



rnn = LSTM, bi = False, hid per head = 5, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  392
best_loss = [65.8558, 11.801685, 96.83591]
time:  46.853453159332275



rnn = LSTM, bi = False, hid per head = 5, num_heads = 3, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  848
best_loss = [56.433125, 21.129288, 89.9713]
time:  71.56805443763733



rnn = LSTM, bi = False, hid per head = 5, num_heads = 4, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1556
best_loss = [55.342133, 8.44477, 119.492325]
time:  110.33427834510803



rnn = LSTM, bi = False, hid per head = 5, num_heads = 5, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  964
best_loss = [60.5

Early stopping at best epoch:  150
best_loss = [45.729763, 9.757188, 100.10649]
time:  31.680644989013672



rnn = LSTM, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.001
Early stopping at best epoch:  126
best_loss = [61.808517, 12.640455, 113.98139]
time:  30.89972710609436



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  57
best_loss = [53.528576, 13.106876, 77.723564]
time:  29.098191261291504



rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 7, lr = 0.01
Early stopping at best epoch:  29
best_loss = [57.750008, 22.588644, 100.986145]
time:  25.449690341949463



rnn = LSTM, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.01
Early stopping at best epoch:  56
best_loss = [61.05535, 20.770885, 88.91007]
time:  27.289149522781372



rnn = LSTM, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.01
Early stopping at best epoch:  65
best_loss = [42.4850

Early stopping at best epoch:  1701
best_loss = [48.434406, 15.655383, 64.83713]
time:  109.08192253112793



rnn = LSTM, bi = False, hid per head = 7, num_heads = 3, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  700
best_loss = [45.86133, 39.04889, 87.08389]
time:  60.08933234214783



rnn = LSTM, bi = False, hid per head = 7, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1254
best_loss = [48.04636, 33.49613, 75.090836]
time:  88.64075183868408



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  609
best_loss = [47.047516, 74.03433, 124.4812]
time:  59.822104692459106



rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 3, lr = 0.001
Early stopping at best epoch:  71
best_loss = [63.412987, 36.788692, 100.45914]
time:  28.76458477973938



rnn = LSTM, bi = False, hid per head = 7, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  105
best_loss = [42.4

Ends at final epoch 3000
Best epoch: 3000
best_loss = [274.73022, 274.63348, 296.57114]
time:  171.4182105064392



rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 3, lr = 0.01
Early stopping at best epoch:  1630
best_loss = [8.521519, 16.442492, 71.149376]
time:  108.2051055431366



rnn = LSTM, bi = False, hid per head = 7, num_heads = 3, num_fil = 3, lr = 0.01
Early stopping at best epoch:  24
best_loss = [57.43017, 23.118204, 100.75099]
time:  27.102068185806274



rnn = LSTM, bi = False, hid per head = 7, num_heads = 4, num_fil = 3, lr = 0.01
Early stopping at best epoch:  74
best_loss = [49.98832, 20.786469, 108.166565]
time:  29.96052885055542



rnn = LSTM, bi = False, hid per head = 7, num_heads = 5, num_fil = 3, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.54333, 100.85692, 140.04266]
time:  168.2388300895691



rnn = LSTM, bi = False, hid per head = 7, num_heads = 2, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  393
best_l

Early stopping at best epoch:  1726
best_loss = [39.29493, 13.570867, 60.70964]
time:  102.92508721351624



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  475
best_loss = [54.197662, 28.570816, 91.63857]
time:  46.19509482383728



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.001
Early stopping at best epoch:  233
best_loss = [43.54165, 16.843554, 93.62481]
time:  33.40632700920105



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.001
Early stopping at best epoch:  287
best_loss = [38.028286, 8.382203, 88.580246]
time:  36.00721073150635



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.001
Early stopping at best epoch:  113
best_loss = [48.046494, 16.31389, 67.03647]
time:  27.70767116546631



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.001
Early stopping at best epoch:  183
best_loss = [44.166317, 1

Early stopping at best epoch:  226
best_loss = [9.692085, 15.072561, 75.83216]
time:  34.4623703956604



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  29
best_loss = [59.697227, 17.669752, 101.33838]
time:  25.575355052947998



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  406
best_loss = [66.03517, 22.41023, 87.08243]
time:  43.72590160369873



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  485
best_loss = [44.87847, 31.105682, 90.86383]
time:  47.204880714416504



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  362
best_loss = [58.23044, 29.420412, 102.68541]
time:  41.61514711380005



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  399
best_loss = [70.383224, 3

Early stopping at best epoch:  151
best_loss = [56.463947, 16.790712, 80.24446]
time:  31.590620756149292



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 7, lr = 0.001
Early stopping at best epoch:  196
best_loss = [46.41442, 14.707155, 71.148895]
time:  34.17289209365845



rnn = GRU, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.01
Early stopping at best epoch:  413
best_loss = [11.078083, 16.40452, 91.25811]
time:  44.38684630393982



rnn = GRU, bi = False, hid per head = 3, num_heads = 3, num_fil = 7, lr = 0.01
Early stopping at best epoch:  151
best_loss = [14.869225, 12.944821, 75.26805]
time:  31.857274293899536



rnn = GRU, bi = False, hid per head = 3, num_heads = 4, num_fil = 7, lr = 0.01
Early stopping at best epoch:  62
best_loss = [42.3178, 12.915796, 96.97827]
time:  27.547627210617065



rnn = GRU, bi = False, hid per head = 3, num_heads = 5, num_fil = 7, lr = 0.01
Early stopping at best epoch:  376
best_loss = [15.624891, 9.2156

Early stopping at best epoch:  862
best_loss = [37.98988, 67.949875, 115.675865]
time:  62.64687895774841



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  1311
best_loss = [41.530506, 13.405958, 99.49603]
time:  84.02053236961365



rnn = GRU, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  609
best_loss = [52.602566, 96.34175, 153.66693]
time:  51.973615884780884



rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 3, lr = 0.001
Early stopping at best epoch:  250
best_loss = [43.687542, 10.357019, 100.47844]
time:  33.94703507423401



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  61
best_loss = [51.302013, 30.404951, 130.06174]
time:  25.471275091171265



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.001
Early stopping at best epoch:  86
best_loss = [51.510

Early stopping at best epoch:  51
best_loss = [33.44099, 19.85897, 96.130615]
time:  26.28285837173462



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 3, lr = 0.01
Early stopping at best epoch:  6
best_loss = [74.284836, 22.253283, 98.871826]
time:  24.257031679153442



rnn = GRU, bi = False, hid per head = 5, num_heads = 5, num_fil = 3, lr = 0.01
Ends at final epoch 3000
Best epoch: 3000
best_loss = [110.27144, 100.540535, 139.7944]
time:  148.68604850769043



rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 5, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [293.39578, 293.67688, 314.82202]
time:  143.60340642929077



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  2345
best_loss = [50.751324, 15.980465, 94.871]
time:  135.2284369468689



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  599
best_loss

Early stopping at best epoch:  85
best_loss = [43.529194, 21.137836, 85.67182]
time:  28.23276686668396



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.001
Early stopping at best epoch:  184
best_loss = [45.652546, 11.840852, 87.5955]
time:  33.254369020462036



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 5, lr = 0.001
Early stopping at best epoch:  93
best_loss = [46.993607, 17.497143, 94.53612]
time:  29.0167818069458



rnn = GRU, bi = False, hid per head = 5, num_heads = 5, num_fil = 5, lr = 0.001
Early stopping at best epoch:  67
best_loss = [50.39508, 19.54572, 59.726803]
time:  28.1389639377594



rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 5, lr = 0.01
Early stopping at best epoch:  15
best_loss = [51.01261, 19.251045, 74.28246]
time:  24.889950275421143



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 5, lr = 0.01
Early stopping at best epoch:  113
best_loss = [16.834463, 18.422983,

Early stopping at best epoch:  1156
best_loss = [60.550095, 24.097006, 102.58316]
time:  84.70649027824402



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  1487
best_loss = [46.374172, 23.318638, 104.66483]
time:  102.21699547767639



rnn = GRU, bi = False, hid per head = 5, num_heads = 4, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  357
best_loss = [74.09387, 21.188864, 92.2999]
time:  44.19501090049744



rnn = GRU, bi = False, hid per head = 5, num_heads = 5, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  687
best_loss = [42.74365, 23.605864, 94.48608]
time:  61.97632169723511



rnn = GRU, bi = False, hid per head = 5, num_heads = 2, num_fil = 7, lr = 0.001
Early stopping at best epoch:  175
best_loss = [49.68463, 16.543575, 89.85571]
time:  34.1888964176178



rnn = GRU, bi = False, hid per head = 5, num_heads = 3, num_fil = 7, lr = 0.001
Early stopping at best epoch:  549
best_loss = [38.913334

Early stopping at best epoch:  30
best_loss = [49.158855, 16.891804, 69.00359]
time:  25.6345374584198



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 7, lr = 0.01
Early stopping at best epoch:  143
best_loss = [11.504722, 19.375967, 113.73265]
time:  31.464325189590454



rnn = GRU, bi = False, hid per head = 7, num_heads = 4, num_fil = 7, lr = 0.01
Early stopping at best epoch:  21
best_loss = [54.800312, 15.729363, 83.13119]
time:  25.60503888130188



rnn = GRU, bi = False, hid per head = 7, num_heads = 5, num_fil = 7, lr = 0.01
Early stopping at best epoch:  13
best_loss = [57.687286, 24.471563, 103.046394]
time:  27.20557737350464



Pooling: 1 layers, Nonpooling: 2 layers, split by 0.5 min
rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 3, lr = 0.0001
Early stopping at best epoch:  700
best_loss = [47.039913, 22.07436, 106.33447]
time:  60.907230615615845



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 3, lr = 0.0001
Ea

Early stopping at best epoch:  609
best_loss = [35.72156, 53.91613, 148.62009]
time:  61.0880012512207



rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 3, lr = 0.001
Early stopping at best epoch:  106
best_loss = [49.011395, 24.88248, 111.54147]
time:  31.224948406219482



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 3, lr = 0.001
Early stopping at best epoch:  465
best_loss = [60.74556, 13.0969, 108.069595]
time:  50.315157890319824



rnn = GRU, bi = False, hid per head = 7, num_heads = 4, num_fil = 3, lr = 0.001
Early stopping at best epoch:  66
best_loss = [53.978966, 35.49444, 98.56274]
time:  29.806878566741943



rnn = GRU, bi = False, hid per head = 7, num_heads = 5, num_fil = 3, lr = 0.001
Early stopping at best epoch:  142
best_loss = [55.080555, 15.659807, 72.04008]
time:  35.7099928855896



rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 3, lr = 0.01
Early stopping at best epoch:  18
best_loss = [56.661186, 24.527

Early stopping at best epoch:  450
best_loss = [53.24391, 5.068868, 98.429504]
time:  53.91724634170532



rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 5, lr = 0.0001
Ends at final epoch 3000
Best epoch: 3000
best_loss = [293.1574, 293.43405, 314.58868]
time:  166.8294129371643



rnn = GRU, bi = False, hid per head = 7, num_heads = 3, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  908
best_loss = [51.47674, 20.754068, 90.19801]
time:  78.02814412117004



rnn = GRU, bi = False, hid per head = 7, num_heads = 4, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  458
best_loss = [61.621666, 24.3302, 92.803635]
time:  53.49065899848938



rnn = GRU, bi = False, hid per head = 7, num_heads = 5, num_fil = 5, lr = 0.0001
Early stopping at best epoch:  587
best_loss = [43.820778, 18.042385, 79.69157]
time:  63.66238498687744



rnn = GRU, bi = False, hid per head = 7, num_heads = 2, num_fil = 5, lr = 0.001
Ends at final epoch 3000
Best epoch: 3000
best_loss 

Early stopping at best epoch:  424
best_loss = [38.628456, 16.265593, 102.82079]
time:  46.85869216918945



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 5, lr = 0.01
Early stopping at best epoch:  507
best_loss = [59.98549, 17.735626, 104.729675]
time:  50.30143404006958



rnn = RNN, bi = False, hid per head = 3, num_heads = 3, num_fil = 5, lr = 0.01
Early stopping at best epoch:  66
best_loss = [37.914963, 17.87546, 100.93974]
time:  28.1198091506958



rnn = RNN, bi = False, hid per head = 3, num_heads = 4, num_fil = 5, lr = 0.01
Early stopping at best epoch:  150
best_loss = [41.4485, 17.059765, 91.59106]
time:  32.65036582946777



rnn = RNN, bi = False, hid per head = 3, num_heads = 5, num_fil = 5, lr = 0.01
Early stopping at best epoch:  11
best_loss = [62.633404, 20.592411, 94.629036]
time:  25.905970096588135



rnn = RNN, bi = False, hid per head = 3, num_heads = 2, num_fil = 7, lr = 0.0001
Early stopping at best epoch:  421
best_loss = [60.84629, 24.455

In [None]:
# Hyperparameters
num_vars = globals()[f'x_tensor_{reg}'].size()[3]

modelname = 'RNN_TA_CA_1DCNN'
rnns = ['LSTM', 'GRU', 'RNN']
hids = [3, 5, 7]
num_time = max_time
num_cycless = [30, 50, 80]
ep_pats = [[1000, 10], [2000, 20]]
lrs = [1e-4, 1e-3, 1e-2]

num_fils = [3, 5, 7]
pools = [1, 2]
npools = [1, 2]
fsize2 = 3
psize2 = 2
mids = [8, 4, 1]

nh2s = [1, 2, 3, 4, 5]

batch_nums = [1]

import time

for batch_num in batch_nums:
    for [n_ep, patience] in ep_pats:
        for num_cycles in num_cycless:
            print(f"Batch: {batch_num + 1}, num_cycle = {num_cycles}, epoch = {n_ep}, patience = {patience}")
            trdl = globals()[f'b{batch_num+1}_train_dataloader_{num_cycles}']
            vdl = globals()[f'b{batch_num+1}_val_dataloader_{num_cycles}']
            tedl = globals()[f'b{batch_num+1}_test_dataloader_{num_cycles}']
            for rnn1, hid1 in itertools.product(rnns, hids):
                bi1 = False
                if rnn1.startswith('Bi'):
                    bi1 = True
                for pool2, npool2 in itertools.product(pools, npools):
                    print(f'Pooling: {pool2} layers, Nonpooling: {npool2} layers, split by {60/max_time} min')
                    for fil2, lr in itertools.product(num_fils, lrs):
                        for nh2 in nh2s:
                            print(f"rnn = {rnn1}, bi = {bi1}, hid per head = {hid1}, num_heads = {nh2}, num_fil = {fil2}, lr = {lr}")

                            all_hid1 = hid1 * nh2

                            start = time.time()

                            # Construct CNN
                            model = globals()[modelname](num_vars, num_time, num_cycles, rnn1, bi1, all_hid1, nh2, fil2, pool2, npool2, fsize2, psize2, mids).to(device)

                            # Train
                            model_train = train(model, trdl, vdl, tedl, n_ep, lr, patience, verbose = False)
                            best_model, best_loss, history, ta, ca = model_train.TrainModel()

                            print('best_loss = '+ str(best_loss))

                            history_add, state_dict_add, ta_add, ca_add = history_state_dict_add(num_cycles, modelname, n_ep, patience,
                                                                                        rnn1, hid1, nh2, fil2, pool2, npool2, fsize2, psize2, lr, batch_num+1)

                            # saving best_epoch, loss history, ta score
                            save_data(history, history_add)
                            save_data(best_model.state_dict(), state_dict_add)
                            save_data(ta, ta_add)
                            save_data(ca, ca_add)

                            print("time: ", time.time()-start)
                            print("\n\n")

In [None]:
modelname = 'RNN_TA_CA_1DCNN'
rnns = ['LSTM', 'GRU', 'RNN']
hids = [3, 5, 7]
num_time = max_time
num_cycles = 100
ep_pats = [[3000, 500]]
lrs = [1e-4, 1e-3, 1e-2]

num_fils = [3, 5, 7]
pools = [1, 2]
npools = [1, 2]
fsize2 = 3
psize2 = 2
mids = [8, 4, 1]

nh2s = [1, 2, 3, 4, 5]

batch_nums = [0, 2, 1]

for batch_num in batch_nums:
    for [n_ep, patience] in ep_pats:
        for nh2 in nh2s:
            for rnn1 in rnns:
                df = pd.DataFrame(columns = ['train rmse', 'val rmse', 'test rmse'])
                bi1 = False
                for hid1 in hids:
                    for pool2, npool2 in itertools.product(pools, npools):
                        for fil2, lr in itertools.product(num_fils, lrs):
                            history_add, state_dict_add, ta_add, ca_add = history_state_dict_add(num_cycles, modelname, n_ep, patience,
                                                                                        rnn1, hid1, nh2, fil2, pool2, npool2, fsize2, psize2, lr, batch_num+1)

                            history = load_data(history_add)

                            df.loc[f'Batch {batch_num+1}_{nh2}_{rnn1}_{hid1}_{fil2}_{pool2}_{npool2}_{fsize2}_{lr}'] = [history.iloc[-1, 0], history.iloc[-1, 1], history.iloc[-1, 2]]

                df
                df_add = find_directory(back_num = 0, foldername = [f'{num_cycles} cycles', f'Depth Test_col_{n_ep}_{patience}', modelname, 'Each batch', f'Batch {batch_num+1}'], 
                                filename = f'Batch {batch_num+1}_{nh2} heads_{modelname}_{rnn1}_{nh2}.csv')
                df.to_csv(df_add)