# Strats+Text Forecasting

- physiological features + clinical text

- clinical BERT for text embedding 

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
! export TF_CPP_MIN_LOG_LEVEL=2

## Hardware check

In [3]:
# gpu check
!nvidia-smi

Mon Oct 16 09:18:26 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  On   | 00000000:3A:00.0 Off |                    0 |
| N/A   33C    P0    41W / 300W |      9MiB / 32768MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-SXM2...  On   | 00000000:3B:00.0 Off |                    0 |
| N/A   34C    P0    41W / 300W |      9MiB / 32768MiB |      0%      Default |
|       

In [4]:
# check number of cores
import multiprocessing

cores = multiprocessing.cpu_count() 
cores

80

## Environment Prep

In [5]:
cd /pfs/data5/home/hd/hd_hd/hd_nf283/MA_Thesis

/pfs/data5/home/hd/hd_hd/hd_nf283/MA_Thesis


In [6]:
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, Lambda
# from tensorflow.keras.models import Model
from tensorflow.keras import models
import pickle
import numpy as np
from tqdm import tqdm
tqdm.pandas()
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import Callback, EarlyStopping
import pandas as pd
import json
from torch.utils.data import Dataset
from transformers import AutoTokenizer, pipeline, AutoModel
import resources.smart_cond as sc
import gc
# from google.colab import files

## Version Check

In [7]:
import tensorflow as tf
print(tf. __version__)

pickle.format_version

2.12.0


'4.0'

## Load Data

In [8]:
data_path = 'Data/sepsis_removed_0.pkl'
pkl = pickle.load(open(data_path, 'rb'))
data = pkl[0]
oc = pkl[1]
train_ind = pkl[2]
valid_ind = pkl[3]
test_ind = pkl[4]
del pkl

In [9]:
data

Unnamed: 0,ts_ind,hour,variable,value,TABLE,mean,std
0,10223,467.816667,Text,Admission Date: [**2119-5-4**] D...,noteevents,1.000000,1.000000
1,18407,28.016667,Text,Admission Date: [**2112-12-8**] ...,noteevents,1.000000,1.000000
2,40300,155.166667,Text,Admission Date: [**2194-7-18**] ...,noteevents,1.000000,1.000000
3,23747,52.383333,Text,Admission Date: [**2194-1-7**] D...,noteevents,1.000000,1.000000
4,2357,73.133333,Text,Admission Date: [**2186-6-7**] Discharge ...,noteevents,1.000000,1.000000
...,...,...,...,...,...,...,...
82886223,57281,20.400000,MBP,0.195381,chart,78.552377,17.645628
82886224,57281,20.400000,O2 Saturation,-0.678068,chart,96.820961,4.160290
82886225,57281,20.400000,RR,0.179866,chart,26.278501,15.130729
82886226,57281,20.400000,SBP,-0.404061,chart,120.239648,25.341836


In [10]:
data.loc[data['variable'] == 'Text', 'value'] = 1
data

Unnamed: 0,ts_ind,hour,variable,value,TABLE,mean,std
0,10223,467.816667,Text,1,noteevents,1.000000,1.000000
1,18407,28.016667,Text,1,noteevents,1.000000,1.000000
2,40300,155.166667,Text,1,noteevents,1.000000,1.000000
3,23747,52.383333,Text,1,noteevents,1.000000,1.000000
4,2357,73.133333,Text,1,noteevents,1.000000,1.000000
...,...,...,...,...,...,...,...
82886223,57281,20.400000,MBP,0.195381,chart,78.552377,17.645628
82886224,57281,20.400000,O2 Saturation,-0.678068,chart,96.820961,4.160290
82886225,57281,20.400000,RR,0.179866,chart,26.278501,15.130729
82886226,57281,20.400000,SBP,-0.404061,chart,120.239648,25.341836


In [11]:
pred_window = 2 # hours
obs_windows = range(20, 124, 4)
# Remove test patients.
data = data.merge(oc[['ts_ind', 'SUBJECT_ID']], on='ts_ind', how='left')
test_sub = oc.loc[oc.ts_ind.isin(test_ind)].SUBJECT_ID.unique()
data = data.loc[~data.SUBJECT_ID.isin(test_sub)]
oc = oc.loc[~oc.SUBJECT_ID.isin(test_sub)]
data.drop(columns=['SUBJECT_ID', 'TABLE'], inplace=True)
# Fix age.
data.loc[(data.variable=='Age')&(data.value>200), 'value'] = 91.4
# data[data.variable=='Age'][data.value>200]['value'] = 91.4

In [12]:
# Get static data with mean fill and missingness indicator.
static_varis = ['Age', 'Gender']
ii = data.variable.isin(static_varis)
static_data = data.loc[ii]
data = data.loc[~ii]
def inv_list(l, start=0):
    d = {}
    for i in range(len(l)):
        d[l[i]] = i+start
    return d
static_var_to_ind = inv_list(static_varis)
D = len(static_varis)
N = data.ts_ind.max()+1
demo = np.zeros((N, D))
for row in tqdm(static_data.itertuples()):
    demo[row.ts_ind, static_var_to_ind[row.variable]] = row.value
# Normalize static data.
means = demo.mean(axis=0, keepdims=True)
stds = demo.std(axis=0, keepdims=True)
stds = (stds==0)*1 + (stds!=0)*stds
demo = (demo-means)/stds
# Get variable indices.
varis = sorted(list(set(data.variable)))
V = len(varis)
var_to_ind = inv_list(varis, start=1)
data['vind'] = data.variable.map(var_to_ind)
data = data[['ts_ind', 'vind', 'hour', 'value']].sort_values(by=['ts_ind', 'vind', 'hour'])
# Find max_len.
fore_max_len = 880
# Get forecast inputs and outputs.
fore_times_ip = []
fore_values_ip = []
fore_varis_ip = []
fore_op = []
fore_inds = []
def f(x):
    mask = [0 for i in range(V)]
    values = [0 for i in range(V)]
    for vv in x:
        v = int(vv[0])-1
        mask[v] = 1
        values[v] = vv[1]
    return values+mask
def pad(x):
    return x+[0]*(fore_max_len-len(x))
for w in tqdm(obs_windows):
    pred_data = data.loc[(data.hour>=w)&(data.hour<=w+pred_window)]
    pred_data = pred_data.groupby(['ts_ind', 'vind']).agg({'value':'first'}).reset_index()
    pred_data['vind_value'] = pred_data[['vind', 'value']].values.tolist()
    pred_data = pred_data.groupby('ts_ind').agg({'vind_value':list}).reset_index()
    pred_data['vind_value'] = pred_data['vind_value'].apply(f)    
    obs_data = data.loc[(data.hour<w)&(data.hour>=w-24)]
    obs_data = obs_data.loc[obs_data.ts_ind.isin(pred_data.ts_ind)]
    obs_data = obs_data.groupby('ts_ind').head(fore_max_len)
    obs_data = obs_data.groupby('ts_ind').agg({'vind':list, 'hour':list, 'value':list}).reset_index()
    obs_data = obs_data.merge(pred_data, on='ts_ind')
    for col in ['vind', 'hour', 'value']:
        obs_data[col] = obs_data[col].apply(pad)
    fore_op.append(np.array(list(obs_data.vind_value)))
    fore_inds.append(np.array(list(obs_data.ts_ind)))
    fore_times_ip.append(np.array(list(obs_data.hour)))
    fore_values_ip.append(np.array(list(obs_data.value)))
    fore_varis_ip.append(np.array(list(obs_data.vind)))
del data
fore_times_ip = np.concatenate(fore_times_ip, axis=0)
fore_values_ip = np.concatenate(fore_values_ip, axis=0)
fore_varis_ip = np.concatenate(fore_varis_ip, axis=0)
fore_op = np.concatenate(fore_op, axis=0)
fore_inds = np.concatenate(fore_inds, axis=0)
fore_demo = demo[fore_inds]
# Get train and valid ts_ind for forecast task.
# train_sub = oc.loc[oc.ts_ind.isin(train_ind)].SUBJECT_ID.unique()
valid_sub = oc.loc[oc.ts_ind.isin(valid_ind)].SUBJECT_ID.unique()
rem_sub = oc.loc[~oc.SUBJECT_ID.isin(np.concatenate((train_ind, valid_ind)))].SUBJECT_ID.unique()
bp = int(0.8*len(rem_sub))
# train_sub = np.concatenate((train_sub, rem_sub[:bp]))
valid_sub = np.concatenate((valid_sub, rem_sub[bp:]))
# train_ind = oc.loc[oc.SUBJECT_ID.isin(train_sub)].ts_ind.unique() # Add remaining ts_ind s of train subjects.
valid_ind = oc.loc[oc.SUBJECT_ID.isin(valid_sub)].ts_ind.unique() # Add remaining ts_ind s of train subjects.
# Generate 3 sets of inputs and outputs.
# train_ind = np.argwhere(np.in1d(fore_inds, train_ind)).flatten()
valid_ind = np.argwhere(np.in1d(fore_inds, valid_ind)).flatten()
# fore_train_ip = [ip[train_ind] for ip in [fore_demo, fore_times_ip, fore_values_ip, fore_varis_ip]]
fore_valid_ip = [ip[valid_ind] for ip in [fore_demo, fore_times_ip, fore_values_ip, fore_varis_ip]]
del fore_times_ip, fore_values_ip, fore_varis_ip, demo, fore_demo
# fore_train_op = fore_op[train_ind]
fore_valid_op = fore_op[valid_ind]
del fore_op

91626it [00:00, 784987.87it/s]
100%|██████████| 26/26 [04:56<00:00, 11.40s/it]


In [13]:
# add text features
text_ip = pickle.load(open('Data/text_emb_input_train_val_1.pkl', 'rb'))

# train_text_ip = text_ip[0]
valid_text_ip = text_ip[1]

# fore_train_ip.append(train_text_ip)
fore_valid_ip.append(valid_text_ip)

In [14]:
def get_res(y_true, y_pred):
    precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
    pr_auc = auc(recall, precision)
    minrp = np.minimum(precision, recall).max()
    roc_auc = roc_auc_score(y_true, y_pred)
    return [roc_auc, pr_auc, minrp]

# ######################################################################################################## 
# ######################################################################################################## 
# class_weights = compute_class_weight(class_weight='balanced', classes=[0,1], y=train_op)
# def mortality_loss(y_true, y_pred):
#     sample_weights = (1-y_true)*class_weights[0] + y_true*class_weights[1]
#     bce = K.binary_crossentropy(y_true, y_pred)
#     return K.mean(sample_weights*bce, axis=-1)
# ######################################################################################################## 
# ######################################################################################################## 

# var_weights = np.sum(fore_train_op[:, V:], axis=0)
# var_weights[var_weights==0] = var_weights.max()
# var_weights = var_weights.max()/var_weights
# var_weights = var_weights.reshape((1, V))
def forecast_loss(y_true, y_pred):
    return K.sum(y_true[:,V:]*(y_true[:,:V]-y_pred)**2, axis=-1)

def get_min_loss(weight):
    def min_loss(y_true, y_pred):
        return weight*y_pred
    return min_loss

class CustomCallback(Callback):
    def __init__(self, validation_data, batch_size):
        self.val_x, self.val_y = validation_data
        self.batch_size = batch_size
        super(Callback, self).__init__()

    def on_epoch_end(self, epoch, logs={}):
        y_pred = self.model.predict(self.val_x, verbose=0, batch_size=self.batch_size)
        if type(y_pred)==type([]):
            y_pred = y_pred[0]
        precision, recall, thresholds = precision_recall_curve(self.val_y, y_pred)
        pr_auc = auc(recall, precision)
        roc_auc = roc_auc_score(self.val_y, y_pred)
        logs['custom_metric'] = pr_auc + roc_auc
        print ('val_aucs:', pr_auc, roc_auc)

In [15]:
import tensorflow as tf
import numpy as np
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Embedding, Activation, Dropout, Softmax, Layer, InputSpec, Input, Dense, Lambda, TimeDistributed, Concatenate, Add
from tensorflow.keras import initializers, regularizers, constraints, Model
from tensorflow.python.keras.utils import tf_utils
from tensorflow.python.ops import array_ops
from tensorflow import nn

    
class CVE(Layer):
    def __init__(self, hid_units, output_dim):
        self.hid_units = hid_units
        self.output_dim = output_dim
        super(CVE, self).__init__()
        
    def build(self, input_shape): 
        self.W1 = self.add_weight(name='CVE_W1',
                            shape=(1, self.hid_units),
                            initializer='glorot_uniform',
                            trainable=True)
        self.b1 = self.add_weight(name='CVE_b1',
                            shape=(self.hid_units,),
                            initializer='zeros',
                            trainable=True)
        self.W2 = self.add_weight(name='CVE_W2',
                            shape=(self.hid_units, self.output_dim),
                            initializer='glorot_uniform',
                            trainable=True)
        super(CVE, self).build(input_shape)
        
    def call(self, x):
        x = K.expand_dims(x, axis=-1)
        x = K.dot(K.tanh(K.bias_add(K.dot(x, self.W1), self.b1)), self.W2)
        return x
        
    def compute_output_shape(self, input_shape):
        return input_shape + (self.output_dim,)
    
    
class Attention(Layer):
    
    def __init__(self, hid_dim):
        self.hid_dim = hid_dim
        super(Attention, self).__init__()

    def build(self, input_shape):
        d = input_shape.as_list()[-1]
        self.W = self.add_weight(shape=(d, self.hid_dim), name='Att_W',
                                 initializer='glorot_uniform',
                                 trainable=True)
        self.b = self.add_weight(shape=(self.hid_dim,), name='Att_b',
                                 initializer='zeros',
                                 trainable=True)
        self.u = self.add_weight(shape=(self.hid_dim,1), name='Att_u',
                                 initializer='glorot_uniform',
                                 trainable=True)
        super(Attention, self).build(input_shape)
        
    def call(self, x, mask, mask_value=-1e30):
        attn_weights = K.dot(K.tanh(K.bias_add(K.dot(x,self.W), self.b)), self.u)
        mask = K.expand_dims(mask, axis=-1)
        attn_weights = mask*attn_weights + (1-mask)*mask_value
        attn_weights = K.softmax(attn_weights, axis=-2)
        return attn_weights
        
    def compute_output_shape(self, input_shape):
        return input_shape[:-1] + (1,)
    
    
class Transformer(Layer):
    
    def __init__(self, N=2, h=8, dk=None, dv=None, dff=None, dropout=0):
        self.N, self.h, self.dk, self.dv, self.dff, self.dropout = N, h, dk, dv, dff, dropout
        self.epsilon = K.epsilon() * K.epsilon()
        super(Transformer, self).__init__()

    def build(self, input_shape):
        d = input_shape.as_list()[-1]
        if self.dk==None:
            self.dk = d//self.h
        if self.dv==None:
            self.dv = d//self.h
        if self.dff==None:
            self.dff = 2*d
        self.Wq = self.add_weight(shape=(self.N, self.h, d, self.dk), name='Wq',
                                 initializer='glorot_uniform', trainable=True)
        self.Wk = self.add_weight(shape=(self.N, self.h, d, self.dk), name='Wk',
                                 initializer='glorot_uniform', trainable=True)
        self.Wv = self.add_weight(shape=(self.N, self.h, d, self.dv), name='Wv',
                                 initializer='glorot_uniform', trainable=True)
        self.Wo = self.add_weight(shape=(self.N, self.dv*self.h, d), name='Wo',
                                 initializer='glorot_uniform', trainable=True)
        self.W1 = self.add_weight(shape=(self.N, d, self.dff), name='W1',
                                 initializer='glorot_uniform', trainable=True)
        self.b1 = self.add_weight(shape=(self.N, self.dff), name='b1',
                                 initializer='zeros', trainable=True)
        self.W2 = self.add_weight(shape=(self.N, self.dff, d), name='W2',
                                 initializer='glorot_uniform', trainable=True)
        self.b2 = self.add_weight(shape=(self.N, d), name='b2',
                                 initializer='zeros', trainable=True)
        self.gamma = self.add_weight(shape=(2*self.N,), name='gamma',
                                 initializer='ones', trainable=True)
        self.beta = self.add_weight(shape=(2*self.N,), name='beta',
                                 initializer='zeros', trainable=True)
        super(Transformer, self).build(input_shape)
        
    def call(self, x, mask, mask_value=-1e-30):
        mask = K.expand_dims(mask, axis=-2)
        for i in range(self.N):
            # MHA
            mha_ops = []
            for j in range(self.h):
                q = K.dot(x, self.Wq[i,j,:,:])
                k = K.permute_dimensions(K.dot(x, self.Wk[i,j,:,:]), (0,2,1))
                v = K.dot(x, self.Wv[i,j,:,:])
                A = K.batch_dot(q,k)
                # Mask unobserved steps.
                A = mask*A + (1-mask)*mask_value
                # Mask for attention dropout.
                def dropped_A():
                    dp_mask = K.cast((K.random_uniform(shape=array_ops.shape(A))>=self.dropout), K.floatx())
                    return A*dp_mask + (1-dp_mask)*mask_value
                A = sc.smart_cond(K.learning_phase(), dropped_A, lambda: array_ops.identity(A))
                A = K.softmax(A, axis=-1)
                mha_ops.append(K.batch_dot(A,v))
            conc = K.concatenate(mha_ops, axis=-1)
            proj = K.dot(conc, self.Wo[i,:,:])
            # Dropout.
            proj = sc.smart_cond(K.learning_phase(), lambda: array_ops.identity(nn.dropout(proj, rate=self.dropout)),\
                                       lambda: array_ops.identity(proj))
            # Add & LN
            x = x+proj
            mean = K.mean(x, axis=-1, keepdims=True)
            variance = K.mean(K.square(x - mean), axis=-1, keepdims=True)
            std = K.sqrt(variance + self.epsilon)
            x = (x - mean) / std
            x = x*self.gamma[2*i] + self.beta[2*i]
            # FFN
            ffn_op = K.bias_add(K.dot(K.relu(K.bias_add(K.dot(x, self.W1[i,:,:]), self.b1[i,:])), 
                           self.W2[i,:,:]), self.b2[i,:,])
            # Dropout.
            ffn_op = sc.smart_cond(K.learning_phase(), lambda: array_ops.identity(nn.dropout(ffn_op, rate=self.dropout)),\
                                       lambda: array_ops.identity(ffn_op))
            # Add & LN
            x = x+ffn_op
            mean = K.mean(x, axis=-1, keepdims=True)
            variance = K.mean(K.square(x - mean), axis=-1, keepdims=True)
            std = K.sqrt(variance + self.epsilon)
            x = (x - mean) / std
            x = x*self.gamma[2*i+1] + self.beta[2*i+1]            
        return x
        
    def compute_output_shape(self, input_shape):
        return input_shape


def build_strats(D, max_len, V, d, N, he, dropout, forecast=False):
    demo = Input(shape=(D,))
    demo_enc = Dense(2*d, activation='tanh')(demo)
    demo_enc = Dense(d, activation='tanh')(demo_enc)
    varis = Input(shape=(max_len,))
    values = Input(shape=(max_len,))
    times = Input(shape=(max_len,))
    varis_emb = Embedding(V+1, d)(varis)
    cve_units = int(np.sqrt(d))
    values_emb = CVE(cve_units, d)(values)
    times_emb = CVE(cve_units, d)(times)
    comb_emb = Add()([varis_emb, values_emb, times_emb]) # b, L, d
#     demo_enc = Lambda(lambda x:K.expand_dims(x, axis=-2))(demo_enc) # b, 1, d
#     comb_emb = Concatenate(axis=-2)([demo_enc, comb_emb]) # b, L+1, d
    mask = Lambda(lambda x:K.clip(x,0,1))(varis) # b, L
#     mask = Lambda(lambda x:K.concatenate((K.ones_like(x)[:,0:1], x), axis=-1))(mask) # b, L+1
    cont_emb = Transformer(N, he, dk=None, dv=None, dff=None, dropout=dropout)(comb_emb, mask=mask)
    attn_weights = Attention(2*d)(cont_emb, mask=mask)
    fused_emb = Lambda(lambda x:K.sum(x[0]*x[1], axis=-2))([cont_emb, attn_weights])
    # embed text input
    texts = Input(shape=(33792,))
    text_enc = Dense(22528, activation='relu')(texts)
    text_enc = Dense(10000, activation='relu')(texts)
    text_enc = Dense(5000, activation='relu')(texts)
    text_enc = Dense(1000, activation='relu')(texts)
    text_enc = Dense(d, activation='relu')(text_enc)
    conc = Concatenate(axis=-1)([fused_emb, text_enc, demo_enc])
    fore_op = Dense(V)(conc)
    op = Dense(1, activation='sigmoid')(fore_op)
    model = Model([demo, times, values, varis, texts], op)
    if forecast:
        fore_model = Model([demo, times, values, varis, texts], fore_op)
        return [model, fore_model]
    return model

# To tune:
# 1. Transformer parameters. (N, h, dropout)
# 2. Normalization

## Pretrain on forecasting

In [16]:
# fore_train_ip[-1].shape

In [17]:
fore_valid_ip[-1].shape

(136823, 33792)

In [18]:
# lr, batch_size, samples_per_epoch, patience = 0.0005, 8, 102400, 5
# # lr, batch_size, samples_per_epoch, patience = 0.0005, 1, 1024, 5
# d, N, he, dropout = 50, 2, 4, 0.2
# model, fore_model =  build_strats(D, fore_max_len, V, d, N, he, dropout, forecast=True)
# print (fore_model.summary())

# val_losses = []

# for i in list(range(45)):
#     gc.collect()
#     fore_path = 'Exp1/exp_arc_0/models/forecasting/forecasting_'+str(i+1)+'_epochs.h5'
#     fore_model.compile(loss=forecast_loss, optimizer=Adam(lr))
#     fore_model.load_weights(fore_path)

#     val_loss = fore_model.evaluate(fore_valid_ip, fore_valid_op, batch_size=batch_size, verbose=1)
#     val_losses.append(val_loss)
#     print(f'validation loss: {val_loss}')
#     gc.collect()

# print(val_losses)

In [19]:
# Model: "model_5"
# __________________________________________________________________________________________________
#  Layer (type)                   Output Shape         Param #     Connected to                     
# ==================================================================================================
#  input_12 (InputLayer)          [(None, 880)]        0           []                               
                                                                                                  
#  input_13 (InputLayer)          [(None, 880)]        0           []                               
                                                                                                  
#  input_14 (InputLayer)          [(None, 880)]        0           []                               
                                                                                                  
#  embedding_2 (Embedding)        (None, 880, 50)      6750        ['input_12[0][0]']               
                                                                                                  
#  cve_4 (CVE)                    (None, 880, 50)      364         ['input_13[0][0]']               
                                                                                                  
#  cve_5 (CVE)                    (None, 880, 50)      364         ['input_14[0][0]']               
                                                                                                  
#  add_2 (Add)                    (None, 880, 50)      0           ['embedding_2[0][0]',            
#                                                                   'cve_4[0][0]',                  
#                                                                   'cve_5[0][0]']                  
                                                                                                  
#  lambda_4 (Lambda)              (None, 880)          0           ['input_12[0][0]']               
                                                                                                  
#  transformer_2 (Transformer)    (None, 880, 50)      39508       ['add_2[0][0]',                  
#                                                                   'lambda_4[0][0]']               
                                                                                                  
#  input_15 (InputLayer)          [(None, 33792)]      0           []                               
                                                                                                  
#  input_11 (InputLayer)          [(None, 2)]          0           []                               
                                                                                                  
#  attention_2 (Attention)        (None, 880, 1)       5200        ['transformer_2[0][0]',          
#                                                                   'lambda_4[0][0]']               
                                                                                                  
#  dense_17 (Dense)               (None, 1000)         33793000    ['input_15[0][0]']               
                                                                                                  
#  dense_12 (Dense)               (None, 100)          300         ['input_11[0][0]']               
                                                                                                  
#  lambda_5 (Lambda)              (None, 50)           0           ['transformer_2[0][0]',          
#                                                                   'attention_2[0][0]']            
                                                                                                  
#  dense_18 (Dense)               (None, 50)           50050       ['dense_17[0][0]']               
                                                                                                  
#  dense_13 (Dense)               (None, 50)           5050        ['dense_12[0][0]']               
                                                                                                  
#  concatenate_2 (Concatenate)    (None, 150)          0           ['lambda_5[0][0]',               
#                                                                   'dense_18[0][0]',               
#                                                                   'dense_13[0][0]']               
                                                                                                  
#  dense_19 (Dense)               (None, 134)          20234       ['concatenate_2[0][0]']          
                                                                                                  
# ==================================================================================================
# Total params: 33,920,820
# Trainable params: 33,920,820
# Non-trainable params: 0
# __________________________________________________________________________________________________
# None
# 17103/17103 [==============================] - 118s 7ms/step - loss: 8.0106
# validation loss: 8.010640144348145
# 17103/17103 [==============================] - 116s 7ms/step - loss: 7.2779
# validation loss: 7.27788782119751
# 17103/17103 [==============================] - 116s 7ms/step - loss: 6.8495
# validation loss: 6.849523544311523
# 17103/17103 [==============================] - 115s 7ms/step - loss: 6.5644
# validation loss: 6.5644121170043945
# 17103/17103 [==============================] - 116s 7ms/step - loss: 6.3555
# validation loss: 6.3555426597595215
# 17103/17103 [==============================] - 119s 7ms/step - loss: 6.2333
# validation loss: 6.23325777053833
# 17103/17103 [==============================] - 117s 7ms/step - loss: 6.1807
# validation loss: 6.1807050704956055
# 17103/17103 [==============================] - 117s 7ms/step - loss: 6.1158
# validation loss: 6.1158246994018555
# 17103/17103 [==============================] - 115s 7ms/step - loss: 6.0120
# validation loss: 6.0119853019714355
# 17103/17103 [==============================] - 120s 7ms/step - loss: 5.9760
# validation loss: 5.97597599029541
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.9096
# validation loss: 5.909636974334717
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.9062
# validation loss: 5.906187534332275
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.8428
# validation loss: 5.8428473472595215
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.8966
# validation loss: 5.89660120010376
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.7367
# validation loss: 5.73671817779541
# 17103/17103 [==============================] - 115s 7ms/step - loss: 5.7826
# validation loss: 5.782601833343506
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.7171
# validation loss: 5.717074394226074
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.6666
# validation loss: 5.6666059494018555
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.6324
# validation loss: 5.632350921630859
# 17103/17103 [==============================] - 120s 7ms/step - loss: 5.6569
# validation loss: 5.656907558441162
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.6176
# validation loss: 5.617555618286133
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.6440
# validation loss: 5.644014358520508
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.6394
# validation loss: 5.639357566833496
#  1401/17103 [=>............................] - ETA: 1:44 - loss: 6.9063

In [20]:
# lr, batch_size, samples_per_epoch, patience = 0.0005, 8, 102400, 5
# # lr, batch_size, samples_per_epoch, patience = 0.0005, 1, 1024, 5
# d, N, he, dropout = 50, 2, 4, 0.2
# model, fore_model =  build_strats(D, fore_max_len, V, d, N, he, dropout, forecast=True)
# print (fore_model.summary())

# val_losses = []

# for i in list(range(25)):
#     gc.collect()
#     fore_path = 'Exp1/exp_arc_0/models/forecasting/forecasting_'+str(22+i)+'_epochs.h5'
#     fore_model.compile(loss=forecast_loss, optimizer=Adam(lr))
#     fore_model.load_weights(fore_path)

#     val_loss = fore_model.evaluate(fore_valid_ip, fore_valid_op, batch_size=batch_size, verbose=1)
#     val_losses.append(val_loss)
#     print(f'validation loss: {val_loss}')
#     gc.collect()

# print(val_losses)

In [21]:
# Model: "model_1"
# __________________________________________________________________________________________________
#  Layer (type)                   Output Shape         Param #     Connected to                     
# ==================================================================================================
#  input_2 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  input_3 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  input_4 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  embedding (Embedding)          (None, 880, 50)      6750        ['input_2[0][0]']                
                                                                                                  
#  cve (CVE)                      (None, 880, 50)      364         ['input_3[0][0]']                
                                                                                                  
#  cve_1 (CVE)                    (None, 880, 50)      364         ['input_4[0][0]']                
                                                                                                  
#  add (Add)                      (None, 880, 50)      0           ['embedding[0][0]',              
#                                                                   'cve[0][0]',                    
#                                                                   'cve_1[0][0]']                  
                                                                                                  
#  lambda (Lambda)                (None, 880)          0           ['input_2[0][0]']                
                                                                                                  
#  transformer (Transformer)      (None, 880, 50)      39508       ['add[0][0]',                    
#                                                                   'lambda[0][0]']                 
                                                                                                  
#  input_5 (InputLayer)           [(None, 33792)]      0           []                               
                                                                                                  
#  input_1 (InputLayer)           [(None, 2)]          0           []                               
                                                                                                  
#  attention (Attention)          (None, 880, 1)       5200        ['transformer[0][0]',            
#                                                                   'lambda[0][0]']                 
                                                                                                  
#  dense_5 (Dense)                (None, 1000)         33793000    ['input_5[0][0]']                
                                                                                                  
#  dense (Dense)                  (None, 100)          300         ['input_1[0][0]']                
                                                                                                  
#  lambda_1 (Lambda)              (None, 50)           0           ['transformer[0][0]',            
#                                                                   'attention[0][0]']              
                                                                                                  
#  dense_6 (Dense)                (None, 50)           50050       ['dense_5[0][0]']                
                                                                                                  
#  dense_1 (Dense)                (None, 50)           5050        ['dense[0][0]']                  
                                                                                                  
#  concatenate (Concatenate)      (None, 150)          0           ['lambda_1[0][0]',               
#                                                                   'dense_6[0][0]',                
#                                                                   'dense_1[0][0]']                
                                                                                                  
#  dense_7 (Dense)                (None, 134)          20234       ['concatenate[0][0]']            
                                                                                                  
# ==================================================================================================
# Total params: 33,920,820
# Trainable params: 33,920,820
# Non-trainable params: 0
# __________________________________________________________________________________________________
# None
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.6440
# validation loss: 5.644014358520508
# 17103/17103 [==============================] - 115s 7ms/step - loss: 5.6394
# validation loss: 5.639357566833496
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.5691
# validation loss: 5.569100856781006
# 17103/17103 [==============================] - 115s 7ms/step - loss: 5.6783
# validation loss: 5.678278923034668
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.6189
# validation loss: 5.618879795074463
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.6489
# validation loss: 5.648892402648926
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.5398
# validation loss: 5.539790630340576
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.5430
# validation loss: 5.543003082275391
# 17103/17103 [==============================] - 115s 7ms/step - loss: 5.5273
# validation loss: 5.527316570281982
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.5792
# validation loss: 5.579209804534912
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.5294
# validation loss: 5.529357433319092
# 17103/17103 [==============================] - 115s 7ms/step - loss: 5.4699
# validation loss: 5.469902038574219
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.4234
# validation loss: 5.423367023468018
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.5262
# validation loss: 5.5261759757995605
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.4463
# validation loss: 5.446327209472656
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.4604
# validation loss: 5.460381507873535
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.4880
# validation loss: 5.487956523895264
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.4336
# validation loss: 5.433576583862305
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.3665
# validation loss: 5.366487979888916
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.4384
# validation loss: 5.438374996185303
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.4524
# validation loss: 5.452399730682373
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.4178
# validation loss: 5.417797565460205
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.3783
# validation loss: 5.378251552581787
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.3784
# validation loss: 5.3783979415893555
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.3372
# validation loss: 5.33723783493042
# [5.644014358520508, 5.639357566833496, 5.569100856781006, 5.678278923034668, 5.618879795074463, 5.648892402648926, 5.539790630340576, 5.543003082275391, 5.527316570281982, 5.579209804534912, 5.529357433319092, 5.469902038574219, 5.423367023468018, 5.5261759757995605, 5.446327209472656, 5.460381507873535, 5.487956523895264, 5.433576583862305, 5.366487979888916, 5.438374996185303, 5.452399730682373, 5.417797565460205, 5.378251552581787, 5.3783979415893555, 5.33723783493042]

In [22]:
# lr, batch_size, samples_per_epoch, patience = 0.0005, 8, 102400, 5
# # lr, batch_size, samples_per_epoch, patience = 0.0005, 1, 1024, 5
# d, N, he, dropout = 50, 2, 4, 0.2
# model, fore_model =  build_strats(D, fore_max_len, V, d, N, he, dropout, forecast=True)
# print (fore_model.summary())

# val_losses = []

# for i in list(range(70)):
#     gc.collect()
#     fore_path = 'Exp1/exp_arc_0/models/forecasting/forecasting_'+str(45+i)+'_epochs.h5'
#     fore_model.compile(loss=forecast_loss, optimizer=Adam(lr))
#     fore_model.load_weights(fore_path)

#     val_loss = fore_model.evaluate(fore_valid_ip, fore_valid_op, batch_size=batch_size, verbose=1)
#     val_losses.append(val_loss)
#     print(f'Model {45+i} epochs')
#     print(f'validation loss: {val_loss}')
#     gc.collect()

# print(val_losses)

In [23]:
# Model: "model_1"
# __________________________________________________________________________________________________
#  Layer (type)                   Output Shape         Param #     Connected to                     
# ==================================================================================================
#  input_2 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  input_3 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  input_4 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  embedding (Embedding)          (None, 880, 50)      6750        ['input_2[0][0]']                
                                                                                                  
#  cve (CVE)                      (None, 880, 50)      364         ['input_3[0][0]']                
                                                                                                  
#  cve_1 (CVE)                    (None, 880, 50)      364         ['input_4[0][0]']                
                                                                                                  
#  add (Add)                      (None, 880, 50)      0           ['embedding[0][0]',              
#                                                                   'cve[0][0]',                    
#                                                                   'cve_1[0][0]']                  
                                                                                                  
#  lambda (Lambda)                (None, 880)          0           ['input_2[0][0]']                
                                                                                                  
#  transformer (Transformer)      (None, 880, 50)      39508       ['add[0][0]',                    
#                                                                   'lambda[0][0]']                 
                                                                                                  
#  input_5 (InputLayer)           [(None, 33792)]      0           []                               
                                                                                                  
#  input_1 (InputLayer)           [(None, 2)]          0           []                               
                                                                                                  
#  attention (Attention)          (None, 880, 1)       5200        ['transformer[0][0]',            
#                                                                   'lambda[0][0]']                 
                                                                                                  
#  dense_5 (Dense)                (None, 1000)         33793000    ['input_5[0][0]']                
                                                                                                  
#  dense (Dense)                  (None, 100)          300         ['input_1[0][0]']                
                                                                                                  
#  lambda_1 (Lambda)              (None, 50)           0           ['transformer[0][0]',            
#                                                                   'attention[0][0]']              
                                                                                                  
#  dense_6 (Dense)                (None, 50)           50050       ['dense_5[0][0]']                
                                                                                                  
#  dense_1 (Dense)                (None, 50)           5050        ['dense[0][0]']                  
                                                                                                  
#  concatenate (Concatenate)      (None, 150)          0           ['lambda_1[0][0]',               
#                                                                   'dense_6[0][0]',                
#                                                                   'dense_1[0][0]']                
                                                                                                  
#  dense_7 (Dense)                (None, 134)          20234       ['concatenate[0][0]']            
                                                                                                  
# ==================================================================================================
# Total params: 33,920,820
# Trainable params: 33,920,820
# Non-trainable params: 0
# __________________________________________________________________________________________________
# None
# 17103/17103 [==============================] - 123s 7ms/step - loss: 5.3784
# validation loss: 5.3783979415893555
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.3372
# validation loss: 5.33723783493042
# 17103/17103 [==============================] - 115s 7ms/step - loss: 5.3703
# validation loss: 5.370321273803711
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.4134
# validation loss: 5.4133806228637695
# 17103/17103 [==============================] - 115s 7ms/step - loss: 5.5294
# validation loss: 5.529374122619629
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.4660
# validation loss: 5.466038227081299
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.3115
# validation loss: 5.311540603637695
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.3645
# validation loss: 5.364523887634277
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.3708
# validation loss: 5.370822429656982
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.3405
# validation loss: 5.34048318862915
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.3176
# validation loss: 5.31758451461792
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.3034
# validation loss: 5.303376197814941
# 17103/17103 [==============================] - 115s 7ms/step - loss: 5.4280
# validation loss: 5.4279584884643555
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.3166
# validation loss: 5.316557884216309
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.3117
# validation loss: 5.311747074127197
# 11795/17103 [===================>..........] - ETA: 36s - loss: 5.5945

In [24]:
# lr, batch_size, samples_per_epoch, patience = 0.0005, 8, 102400, 5
# # lr, batch_size, samples_per_epoch, patience = 0.0005, 1, 1024, 5
# d, N, he, dropout = 50, 2, 4, 0.2
# model, fore_model =  build_strats(D, fore_max_len, V, d, N, he, dropout, forecast=True)
# print (fore_model.summary())

# val_losses = []

# for i in list(range(50)):
#     gc.collect()
#     fore_path = 'Exp1/exp_arc_0/models/forecasting/forecasting_'+str(58+i)+'_epochs.h5'
#     fore_model.compile(loss=forecast_loss, optimizer=Adam(lr))
#     fore_model.load_weights(fore_path)

#     val_loss = fore_model.evaluate(fore_valid_ip, fore_valid_op, batch_size=batch_size, verbose=1)
#     val_losses.append(val_loss)
#     print(f'Model {58+i} epochs')
#     print(f'validation loss: {val_loss}')
#     gc.collect()

# print(val_losses)

In [25]:
# Model: "model_1"
# __________________________________________________________________________________________________
#  Layer (type)                   Output Shape         Param #     Connected to                     
# ==================================================================================================
#  input_2 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  input_3 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  input_4 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  embedding (Embedding)          (None, 880, 50)      6750        ['input_2[0][0]']                
                                                                                                  
#  cve (CVE)                      (None, 880, 50)      364         ['input_3[0][0]']                
                                                                                                  
#  cve_1 (CVE)                    (None, 880, 50)      364         ['input_4[0][0]']                
                                                                                                  
#  add (Add)                      (None, 880, 50)      0           ['embedding[0][0]',              
#                                                                   'cve[0][0]',                    
#                                                                   'cve_1[0][0]']                  
                                                                                                  
#  lambda (Lambda)                (None, 880)          0           ['input_2[0][0]']                
                                                                                                  
#  transformer (Transformer)      (None, 880, 50)      39508       ['add[0][0]',                    
#                                                                   'lambda[0][0]']                 
                                                                                                  
#  input_5 (InputLayer)           [(None, 33792)]      0           []                               
                                                                                                  
#  input_1 (InputLayer)           [(None, 2)]          0           []                               
                                                                                                  
#  attention (Attention)          (None, 880, 1)       5200        ['transformer[0][0]',            
#                                                                   'lambda[0][0]']                 
                                                                                                  
#  dense_5 (Dense)                (None, 1000)         33793000    ['input_5[0][0]']                
                                                                                                  
#  dense (Dense)                  (None, 100)          300         ['input_1[0][0]']                
                                                                                                  
#  lambda_1 (Lambda)              (None, 50)           0           ['transformer[0][0]',            
#                                                                   'attention[0][0]']              
                                                                                                  
#  dense_6 (Dense)                (None, 50)           50050       ['dense_5[0][0]']                
                                                                                                  
#  dense_1 (Dense)                (None, 50)           5050        ['dense[0][0]']                  
                                                                                                  
#  concatenate (Concatenate)      (None, 150)          0           ['lambda_1[0][0]',               
#                                                                   'dense_6[0][0]',                
#                                                                   'dense_1[0][0]']                
                                                                                                  
#  dense_7 (Dense)                (None, 134)          20234       ['concatenate[0][0]']            
                                                                                                  
# ==================================================================================================
# Total params: 33,920,820
# Trainable params: 33,920,820
# Non-trainable params: 0
# __________________________________________________________________________________________________
# None
# 17103/17103 [==============================] - 122s 7ms/step - loss: 5.3166
# Model 58 epochs
# validation loss: 5.316557884216309
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.3117
# Model 59 epochs
# validation loss: 5.311747074127197
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.3675
# Model 60 epochs
# validation loss: 5.367528915405273
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.2813
# Model 61 epochs
# validation loss: 5.281347274780273
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.2865
# Model 62 epochs
# validation loss: 5.286485195159912
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.2879
# Model 63 epochs
# validation loss: 5.2878828048706055
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.3748
# Model 64 epochs
# validation loss: 5.3747663497924805
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.3134
# Model 65 epochs
# validation loss: 5.313393592834473
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.3218
# Model 66 epochs
# validation loss: 5.321843147277832
# 17103/17103 [==============================] - 121s 7ms/step - loss: 5.3583
# Model 67 epochs
# validation loss: 5.358343601226807
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.3441
# Model 68 epochs
# validation loss: 5.344141006469727
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.2642
# Model 69 epochs
# validation loss: 5.264240741729736
# 17103/17103 [==============================] - 116s 7ms/step - loss: 5.2775
# Model 70 epochs
# validation loss: 5.277490139007568
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.2479
# Model 71 epochs
# validation loss: 5.2479448318481445
# 17103/17103 [==============================] - 120s 7ms/step - loss: 5.2424
# Model 72 epochs
# validation loss: 5.242390155792236
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.2851
# Model 73 epochs
# validation loss: 5.285145282745361
# 17103/17103 [==============================] - 122s 7ms/step - loss: 5.3125
# Model 74 epochs
# validation loss: 5.312472343444824
# 17103/17103 [==============================] - 209s 10ms/step - loss: 5.2839
# Model 75 epochs
# validation loss: 5.2838873863220215
# 17103/17103 [==============================] - 120s 7ms/step - loss: 5.2878
# Model 76 epochs
# validation loss: 5.287770748138428
# 17103/17103 [==============================] - 132s 8ms/step - loss: 5.2509
# Model 77 epochs
# validation loss: 5.250929832458496
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.5873
# Model 78 epochs
# validation loss: 5.587294101715088
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.2825
# Model 79 epochs
# validation loss: 5.282538414001465
# 17103/17103 [==============================] - 123s 7ms/step - loss: 5.2175
# Model 80 epochs
# validation loss: 5.21753454208374
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.2431
# Model 81 epochs
# validation loss: 5.243126392364502
# 17103/17103 [==============================] - 122s 7ms/step - loss: 5.3190
# Model 82 epochs
# validation loss: 5.318969249725342
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.2883
# Model 83 epochs
# validation loss: 5.28834867477417
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.2399
# Model 84 epochs
# validation loss: 5.23989725112915
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.2421
# Model 85 epochs
# validation loss: 5.242076873779297
# 17103/17103 [==============================] - 120s 7ms/step - loss: 5.2790
# Model 86 epochs
# validation loss: 5.279001235961914
# 17103/17103 [==============================] - 120s 7ms/step - loss: 5.2030
# Model 87 epochs
# validation loss: 5.203044414520264
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.2410
# Model 88 epochs
# validation loss: 5.241025447845459
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.2206
# Model 89 epochs
# validation loss: 5.220558166503906
# 17103/17103 [==============================] - 145s 8ms/step - loss: 5.2832
# Model 90 epochs
# validation loss: 5.283201217651367
# 17103/17103 [==============================] - 125s 7ms/step - loss: 5.8219
# Model 91 epochs
# validation loss: 5.821893215179443
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.3367
# Model 92 epochs
# validation loss: 5.336672306060791
# 17103/17103 [==============================] - 122s 7ms/step - loss: 5.2135
# Model 93 epochs
# validation loss: 5.213508129119873
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.3157
# Model 94 epochs
# validation loss: 5.3157057762146
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.2644
# Model 95 epochs
# validation loss: 5.264395236968994
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.2146
# Model 96 epochs
# validation loss: 5.214611053466797
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.2217
# Model 97 epochs
# validation loss: 5.221678733825684
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.1910
# Model 98 epochs
# validation loss: 5.190983772277832
# 17103/17103 [==============================] - 130s 8ms/step - loss: 5.3250
# Model 99 epochs
# validation loss: 5.325014114379883
# 17103/17103 [==============================] - 138s 8ms/step - loss: 5.2448
# Model 100 epochs
# validation loss: 5.24481201171875
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.1942
# Model 101 epochs
# validation loss: 5.194184303283691
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.1924
# Model 102 epochs
# validation loss: 5.192358493804932
# 17103/17103 [==============================] - 117s 7ms/step - loss: 5.1959
# Model 103 epochs
# validation loss: 5.195933818817139
# 17103/17103 [==============================] - 120s 7ms/step - loss: 5.2568
# Model 104 epochs
# validation loss: 5.256847858428955
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.1731
# Model 105 epochs
# validation loss: 5.173136234283447
# 17103/17103 [==============================] - 118s 7ms/step - loss: 5.2069
# Model 106 epochs
# validation loss: 5.206912040710449
# 17103/17103 [==============================] - 119s 7ms/step - loss: 5.1615
# Model 107 epochs
# validation loss: 5.161481857299805
# [5.316557884216309, 5.311747074127197, 5.367528915405273, 5.281347274780273, 5.286485195159912, 5.2878828048706055, 5.3747663497924805, 5.313393592834473, 5.321843147277832, 5.358343601226807, 5.344141006469727, 5.264240741729736, 5.277490139007568, 5.2479448318481445, 5.242390155792236, 5.285145282745361, 5.312472343444824, 5.2838873863220215, 5.287770748138428, 5.250929832458496, 5.587294101715088, 5.282538414001465, 5.21753454208374, 5.243126392364502, 5.318969249725342, 5.28834867477417, 5.23989725112915, 5.242076873779297, 5.279001235961914, 5.203044414520264, 5.241025447845459, 5.220558166503906, 5.283201217651367, 5.821893215179443, 5.336672306060791, 5.213508129119873, 5.3157057762146, 5.264395236968994, 5.214611053466797, 5.221678733825684, 5.190983772277832, 5.325014114379883, 5.24481201171875, 5.194184303283691, 5.192358493804932, 5.195933818817139, 5.256847858428955, 5.173136234283447, 5.206912040710449, 5.161481857299805]

In [26]:
# lr, batch_size, samples_per_epoch, patience = 0.0005, 4, 102400, 5
# # lr, batch_size, samples_per_epoch, patience = 0.0005, 1, 1024, 5
# d, N, he, dropout = 50, 2, 4, 0.2
# model, fore_model =  build_strats(D, fore_max_len, V, d, N, he, dropout, forecast=True)
# print (fore_model.summary())

# val_losses = []

# for i in list(range(9)):
#     gc.collect()
#     fore_path = 'Exp1/exp_arc_0/models/forecasting/forecasting_'+str(106+i)+'_epochs.h5'
#     fore_model.compile(loss=forecast_loss, optimizer=Adam(lr))
#     fore_model.load_weights(fore_path)

#     val_loss = fore_model.evaluate(fore_valid_ip, fore_valid_op, batch_size=batch_size, verbose=1)
#     val_losses.append(val_loss)
#     print(f'Model {106+i} epochs')
#     print(f'validation loss: {val_loss}')
#     gc.collect()

# print(val_losses)

In [27]:
# Model: "model_1"
# __________________________________________________________________________________________________
#  Layer (type)                   Output Shape         Param #     Connected to                     
# ==================================================================================================
#  input_2 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  input_3 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  input_4 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
#  embedding (Embedding)          (None, 880, 50)      6750        ['input_2[0][0]']                
                                                                                                  
#  cve (CVE)                      (None, 880, 50)      364         ['input_3[0][0]']                
                                                                                                  
#  cve_1 (CVE)                    (None, 880, 50)      364         ['input_4[0][0]']                
                                                                                                  
#  add (Add)                      (None, 880, 50)      0           ['embedding[0][0]',              
#                                                                   'cve[0][0]',                    
#                                                                   'cve_1[0][0]']                  
                                                                                                  
#  lambda (Lambda)                (None, 880)          0           ['input_2[0][0]']                
                                                                                                  
#  transformer (Transformer)      (None, 880, 50)      39508       ['add[0][0]',                    
#                                                                   'lambda[0][0]']                 
                                                                                                  
#  input_5 (InputLayer)           [(None, 33792)]      0           []                               
                                                                                                  
#  input_1 (InputLayer)           [(None, 2)]          0           []                               
                                                                                                  
#  attention (Attention)          (None, 880, 1)       5200        ['transformer[0][0]',            
#                                                                   'lambda[0][0]']                 
                                                                                                  
#  dense_5 (Dense)                (None, 1000)         33793000    ['input_5[0][0]']                
                                                                                                  
#  dense (Dense)                  (None, 100)          300         ['input_1[0][0]']                
                                                                                                  
#  lambda_1 (Lambda)              (None, 50)           0           ['transformer[0][0]',            
#                                                                   'attention[0][0]']              
                                                                                                  
#  dense_6 (Dense)                (None, 50)           50050       ['dense_5[0][0]']                
                                                                                                  
#  dense_1 (Dense)                (None, 50)           5050        ['dense[0][0]']                  
                                                                                                  
#  concatenate (Concatenate)      (None, 150)          0           ['lambda_1[0][0]',               
#                                                                   'dense_6[0][0]',                
#                                                                   'dense_1[0][0]']                
                                                                                                  
#  dense_7 (Dense)                (None, 134)          20234       ['concatenate[0][0]']            
                                                                                                  
# ==================================================================================================
# Total params: 33,920,820
# Trainable params: 33,920,820
# Non-trainable params: 0
# __________________________________________________________________________________________________
# None
# 34206/34206 [==============================] - 169s 5ms/step - loss: 5.2069
# Model 106 epochs
# validation loss: 5.206903457641602
# 34206/34206 [==============================] - 160s 5ms/step - loss: 5.1615
# Model 107 epochs
# validation loss: 5.1614837646484375
# 34206/34206 [==============================] - 164s 5ms/step - loss: 5.1785
# Model 108 epochs
# validation loss: 5.178544521331787
# 34206/34206 [==============================] - 159s 5ms/step - loss: 5.1542
# Model 109 epochs
# validation loss: 5.154242038726807
# 34206/34206 [==============================] - 162s 5ms/step - loss: 5.2161
# Model 110 epochs
# validation loss: 5.216117858886719
# 34206/34206 [==============================] - 164s 5ms/step - loss: 5.1453
# Model 111 epochs
# validation loss: 5.145287036895752
# 34206/34206 [==============================] - 163s 5ms/step - loss: 5.1825
# Model 112 epochs
# validation loss: 5.18254280090332
# 34206/34206 [==============================] - 160s 5ms/step - loss: 5.2167
# Model 113 epochs
# validation loss: 5.216660499572754
# 34206/34206 [==============================] - 162s 5ms/step - loss: 5.2047
# Model 114 epochs
# validation loss: 5.204678535461426
# [5.206903457641602, 5.1614837646484375, 5.178544521331787, 5.154242038726807, 5.216117858886719, 5.145287036895752, 5.18254280090332, 5.216660499572754, 5.204678535461426]

In [30]:
lr, batch_size, samples_per_epoch, patience = 0.0005, 4, 102400, 5
# lr, batch_size, samples_per_epoch, patience = 0.0005, 1, 1024, 5
d, N, he, dropout = 50, 2, 4, 0.2
model, fore_model =  build_strats(D, fore_max_len, V, d, N, he, dropout, forecast=True)
print (fore_model.summary())

val_losses = []

for i in list(range(50)):
    gc.collect()
    fore_path = 'Exp1/exp_arc_0/models/forecasting/forecasting_'+str(114+i)+'_epochs.h5'
    fore_model.compile(loss=forecast_loss, optimizer=Adam(lr))
    fore_model.load_weights(fore_path)

    val_loss = fore_model.evaluate(fore_valid_ip, fore_valid_op, batch_size=batch_size, verbose=1)
    val_losses.append(val_loss)
    print(f'Model {114+i} epochs')
    print(f'validation loss: {val_loss}')
    gc.collect()

print(val_losses)

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
 input_5 (InputLayer)           [(None, 880)]        0           []                               
                                                                                                  
 embedding (Embedding)          (None, 880, 50)      6750        ['input_3[0][0]']                
                                                                                            