This notebook use for tunning model using embeddings file and language model embedder

### Check GPU hardware

In [1]:
!nvidia-smi

Fri Mar 11 07:50:11 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.103.01   Driver Version: 470.103.01   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
|  0%   52C    P8    16W / 170W |     15MiB / 12053MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Import libraries 

In [2]:
# Libraries for system and debug
import sys
import pdb
import os
import shutil
from datetime import datetime

# Class for converting sequences to tensors
from seq2tensor import s2t

# Libraries for neural network training
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GRU, LSTM, Bidirectional, Input, Conv1D, Conv2D
from tensorflow.keras.layers import Add, Flatten, subtract, multiply, concatenate
from tensorflow.keras.layers import MaxPooling1D, AveragePooling1D, GlobalAveragePooling1D, MaxPooling2D
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.layers import Dropout, BatchNormalization
from tensorflow.keras.utils import Sequence
from tensorflow.keras import mixed_precision
from tensorflow import keras
from tensorboard.plugins.hparams import api as hp
from tensorflow.keras.utils import get_custom_objects
from tensorflow.keras.layers import Activation
from keras.callbacks import ModelCheckpoint
from tensorflow.keras import regularizers
import tensorflow_addons as tfa
from sklearn.model_selection import KFold, ShuffleSplit
from sklearn.model_selection import train_test_split


# Import accessory modules
import numpy as np
import h5py
import gc
from tqdm import tqdm
from pathlib import Path
import pandas as pd

### Set CUDA environment variables

In [3]:
os.chdir('../')
os.getcwd()

'/home/wmbio/WORK/gitworking/PIPR'

In [4]:
### Setting RAM GPU for training growth 
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


2022-03-11 07:50:22.339284: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-03-11 07:50:22.585017: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10243 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060, pci bus id: 0000:01:00.0, compute capability: 8.6


In [5]:
# ============================================
# Optimisation Flags - Do not remove
# ============================================

# Disables caching (when set to 1) or enables caching (when set to 0) for just-in-time-compilation. When disabled,
# no binary code is added to or retrieved from the cache.
os.environ['CUDA_CACHE_DISABLE'] = '0' # orig is 0

# When set to 1, forces the device driver to ignore any binary code embedded in an application 
# (see Application Compatibility) and to just-in-time compile embedded PTX code instead.
# If a kernel does not have embedded PTX code, it will fail to load. This environment variable can be used to
# validate that PTX code is embedded in an application and that its just-in-time compilation works as expected to guarantee application 
# forward compatibility with future architectures.
os.environ['CUDA_FORCE_PTX_JIT'] = '1'# no orig


os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL'

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
os.environ['TF_GPU_THREAD_COUNT']='1'

os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'

os.environ['TF_ADJUST_HUE_FUSED'] = '1'
os.environ['TF_ADJUST_SATURATION_FUSED'] = '1'
os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

os.environ['TF_SYNC_ON_FINISH'] = '0'
os.environ['TF_AUTOTUNE_THRESHOLD'] = '2'
os.environ['TF_DISABLE_NVTX_RANGES'] = '1'
os.environ["TF_ENABLE_AUTO_MIXED_PRECISION_GRAPH_REWRITE"] = "1"



# =================================================
# mixed_precision.set_global_policy('mixed_float16')

### Define custom function

In [None]:
def generator_pair(seq_tensor, class_labels, pair_index):
    for index in pair_index:
        yield {"seq1": seq_tensor[seq_index1[index]], "seq2": seq_tensor[seq_index2[index]]}, class_labels[index]

def generator_pair_predict(seq_tensor, class_labels, pair_index):
    for index in pair_index:
        yield {"seq1": seq_tensor[seq_index1[index]], "seq2": seq_tensor[seq_index2[index]]}

def input_preprocess(id2seq_file, ds_file, use_emb):
    id2index = {}
    seqs = []
    index = 0
    sid1_index = 0
    sid2_index = 1
    label_index = 2
    
    for line in open(id2seq_file):
        line = line.strip().split('\t')
        id2index[line[0]] = index
        seqs.append(line[1])
        index += 1

    seq_array = []
    id2_aid = {}
    sid = 0

    seq2t = s2t(use_emb)
    max_data = -1
    limit_data = max_data > 0
    raw_data = []
    skip_head = True
    x = None
    count = 0
    
    # Create sequence array as a list of protein strings
    for line in tqdm(open(ds_file)):
        if skip_head:
            skip_head = False
            continue
        line = line.rstrip('\n').rstrip('\r').split('\t')
        if id2index.get(line[sid1_index]) is None or id2index.get(line[sid2_index]) is None:
            continue
        if id2_aid.get(line[sid1_index]) is None:
            id2_aid[line[sid1_index]] = sid
            sid += 1
            seq_array.append(seqs[id2index[line[sid1_index]]])
        line[sid1_index] = id2_aid[line[sid1_index]]
        if id2_aid.get(line[sid2_index]) is None:
            id2_aid[line[sid2_index]] = sid
            sid += 1
            seq_array.append(seqs[id2index[line[sid2_index]]])
        line[sid2_index] = id2_aid[line[sid2_index]]
        raw_data.append(line)
        if limit_data:
            count += 1
            if count >= max_data:
                break

    len_m_seq = np.array([len(line.split()) for line in seq_array])
    avg_m_seq = int(np.average(len_m_seq)) + 1
    max_m_seq = max(len_m_seq)
    dim = seq2t.dim

    # seq_tensor is tensor representation of dataset having shape of (number_of_sequences, padding_length, embedding_dim_of_aa)
    # Random for distribution of class labels
    seq_tensor = np.array([seq2t.embed_normalized(line, seq_size) for line in tqdm(seq_array)]).astype('float16')

    # Extract index of 1st and 2nd sequences in pairs
    seq_index1 = np.array([line[sid1_index] for line in tqdm(raw_data)])
    seq_index2 = np.array([line[sid2_index] for line in tqdm(raw_data)])

    # Assign labels for pairs of sequences
    class_map = {'0': 1, '1': 0}
    class_labels = np.zeros((len(raw_data), 2))
    for i in range(len(raw_data)):
        class_labels[i][class_map[raw_data[i][label_index]]] = 1
        
    return seq_tensor, seq_index1, seq_index2, class_labels, dim

def leaky_relu(x, alpha = .3):
    return tf.keras.backend.maximum(alpha*x, x)

def build_model(hparams):
    # Input of sequence tensor representations 
    seq_input1 = Input(shape=(seq_size, dim), name='seq1')
    seq_input2 = Input(shape=(seq_size, dim), name='seq2')

    # Define Conv1D and Bi-RNN (GRU/LSTM) use in architecture
    l1=Conv1D(hparams[HP_CONV_HIDDEN_DIM], hparams[HP_KERNEL_SIZE], activation=hparams[HP_ACTIVATION_CONV], padding=hparams[HP_CONV_PADDING])
    r1=Bidirectional(GRU(hparams[HP_RNN_HIDDEN_DIM], return_sequences=True))
    l2=Conv1D(hparams[HP_CONV_HIDDEN_DIM], hparams[HP_KERNEL_SIZE], activation=hparams[HP_ACTIVATION_CONV], padding=hparams[HP_CONV_PADDING])
    r2=Bidirectional(GRU(hparams[HP_RNN_HIDDEN_DIM], return_sequences=True))
    l3=Conv1D(hparams[HP_CONV_HIDDEN_DIM], hparams[HP_KERNEL_SIZE], activation=hparams[HP_ACTIVATION_CONV], padding=hparams[HP_CONV_PADDING])
    r3=Bidirectional(GRU(hparams[HP_RNN_HIDDEN_DIM], return_sequences=True))
    l4=Conv1D(hparams[HP_CONV_HIDDEN_DIM], hparams[HP_KERNEL_SIZE], activation=hparams[HP_ACTIVATION_CONV], padding=hparams[HP_CONV_PADDING])
    r4=Bidirectional(GRU(hparams[HP_RNN_HIDDEN_DIM], return_sequences=True))
    l5=Conv1D(hparams[HP_CONV_HIDDEN_DIM], hparams[HP_KERNEL_SIZE], activation=hparams[HP_ACTIVATION_CONV], padding=hparams[HP_CONV_PADDING])
    r5=Bidirectional(GRU(hparams[HP_RNN_HIDDEN_DIM], return_sequences=True))
    l6=Conv1D(hparams[HP_CONV_HIDDEN_DIM], hparams[HP_KERNEL_SIZE], activation=hparams[HP_ACTIVATION_CONV], padding=hparams[HP_CONV_PADDING])
    
    # Siamese architecture

    ### 1st sibling

    # 1st Block RCNN 
    s1=MaxPooling1D(hparams[HP_POOLING_KERNEL])(l1(seq_input1))
    s1=concatenate([r1(s1), s1])

    # 2nd Block RCNN
    s1=MaxPooling1D(hparams[HP_POOLING_KERNEL])(l2(s1))
    s1=concatenate([r2(s1), s1])

    # 3rd Block RCNN
    s1=MaxPooling1D(hparams[HP_POOLING_KERNEL])(l3(s1))
    s1=concatenate([r3(s1), s1])

    # 4th Block RCNN 
    s1=MaxPooling1D(hparams[HP_POOLING_KERNEL])(l4(s1))
    s1=concatenate([r4(s1), s1])

    # 5th Block RCNN
    s1=MaxPooling1D(hparams[HP_POOLING_KERNEL])(l5(s1))
    s1=concatenate([r5(s1), s1])
    
    # Last convolution
    s1=l6(s1)
    s1=GlobalAveragePooling1D()(s1)

    ### 2nd sibling

    # 1st block RCNN
    s2=MaxPooling1D(hparams[HP_POOLING_KERNEL])(l1(seq_input2))
    s2=concatenate([r1(s2), s2])

    # 2nd block RCNN
    s2=MaxPooling1D(hparams[HP_POOLING_KERNEL])(l2(s2))
    s2=concatenate([r2(s2), s2])

    # 3rd block RCNN
    s2=MaxPooling1D(hparams[HP_POOLING_KERNEL])(l3(s2))
    s2=concatenate([r3(s2), s2])

    # 4th block RCNN
    s2=MaxPooling1D(hparams[HP_POOLING_KERNEL])(l4(s2))
    s2=concatenate([r4(s2), s2])

    # 5th block RCNN
    s2=MaxPooling1D(hparams[HP_POOLING_KERNEL])(l5(s2))
    s2=concatenate([r5(s2), s2])

    # Last convolution
    s2=l6(s2)
    s2=GlobalAveragePooling1D()(s2)

    ### Combine two siblings of siamese architecture
    merge_text = multiply([s1, s2])
    

    #### MLP Part
    # Set initializer
    
    # First dense
    x = Dense(hparams[HP_FIRST_DENSE], activation=hparams[HP_ACTIVATION])(merge_text)
    # x = tf.keras.layers.LeakyReLU(alpha=.3)(x)
    x = Dropout(hparams[HP_DROPOUT])(x)

    # Second dense
    x = Dense(int((hparams[HP_CONV_HIDDEN_DIM]+7)/2), activation=hparams[HP_ACTIVATION])(x)
    # x = tf.keras.layers.LeakyReLU(alpha=.3)(x)
    x = Dropout(hparams[HP_DROPOUT])(x)

    # Last softmax
    main_output = Dense(2, activation='softmax')(x)

    # Combine to form functional model
    merge_model = Model(inputs=[seq_input1, seq_input2], outputs=[main_output])
    return merge_model

def seq_max(id2seq_file):
    seqs = []
    for line in open(id2seq_file):
        line = line.strip().split('\t')
        seqs.append(len(line[1]))
    
    return max(seqs)

### Hyperparameter set by default

In [9]:
# Default hyperparameters
CONV_HIDDEN_DIM = 50
RNN_HIDDEN = 50
N_EPOCHS = 50
HIDDEN_DIM=50
BATCH_SIZE = 32
DTYPE='float16'
LEARNING_RATE=.001
EPSILON=1e-6
adam = Adam(learning_rate=LEARNING_RATE, amsgrad=True, epsilon=EPSILON)
MAX_DATASET_SIZE = 11187
DATASET_SIZE = MAX_DATASET_SIZE
KERNEL_SIZE = 3
POOLING_KERNEL = 3
seq_size = seq_max('data/wmbio_set/Train_set/human_custom_seq.tsv')

### Split the dataset

In [10]:
# human custom
seq_tensor, seq_index1, seq_index2, class_labels, dim = input_preprocess(id2seq_file='data/wmbio_set/Train_set/human_custom_seq.tsv',
                                                                         ds_file='data/wmbio_set/Train_set/human_custom_ppi_pair.tsv', 
                                                                         use_emb = 'data/ac5_aph.txt')



0it [00:00, ?it/s][A
108156it [00:00, 767612.69it/s][A

  0%|                                                                                               | 0/8992 [00:00<?, ?it/s][A
  9%|███████▎                                                                           | 794/8992 [00:00<00:01, 7938.85it/s][A
 18%|██████████████▌                                                                   | 1602/8992 [00:00<00:00, 8020.83it/s][A
 27%|██████████████████████                                                            | 2413/8992 [00:00<00:00, 8060.86it/s][A
 36%|█████████████████████████████▎                                                    | 3220/8992 [00:00<00:00, 8032.55it/s][A
 45%|████████████████████████████████████▋                                             | 4024/8992 [00:00<00:00, 7936.17it/s][A
 54%|███████████████████████████████████████████▉                                      | 4818/8992 [00:00<00:00, 7891.17it/s][A
 62%|██████████████████████████████████

### Search for optimal configurations

### Define callbacks for monitor

In [26]:
### Learning rate schedule for optimization during training
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.4,
    patience=4,
    verbose=0,
    mode="auto",
    min_lr=1e-5)

# Schedule early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    verbose=1,
    patience=6,
    mode='min',
    restore_best_weights=True)

final_reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="loss",
    factor=0.4,
    patience=4,
    verbose=0,
    mode="auto",
    min_lr=1e-5)

final_early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='accuracy', 
    verbose=1,
    patience=7,
    mode='max',
    restore_best_weights=True)

### Define performance metrics

In [13]:
METRICS = [
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      # tfa.metrics.MatthewsCorrelationCoefficient(num_classes=2, name='mcc'),
      tfa.metrics.F1Score(num_classes=2, threshold=0.5, name='f1-score'),
      # keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

### Summary of model architecture

In [14]:
HP_EPSILON = hp.HParam('epsilon', hp.Discrete([1e-6]))
HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([1e-3]))
HP_FIRST_DENSE = hp.HParam('first_dense', hp.Discrete([100]))
HP_KERNEL_SIZE = hp.HParam('kernel_size', hp.Discrete([3]))
HP_POOLING_KERNEL = hp.HParam('pooling_kernel', hp.Discrete([3]))
HP_CONV_HIDDEN_DIM = hp.HParam('conv_hidden_dim', hp.Discrete([50]))
HP_RNN_HIDDEN_DIM = hp.HParam('rnn_hidden_dim', hp.Discrete([50]))
HP_ACTIVATION = hp.HParam('activation', hp.Discrete(['leaky_relu']))
HP_ACTIVATION_CONV = hp.HParam('activation_conv', hp.Discrete(['linear']))
HP_REGULARIZER = hp.HParam('regularizer', hp.Discrete([0]))
HP_CONV_PADDING = hp.HParam('conv_padding', hp.Discrete(['valid']))
HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0e-1]))
HP_BATCH_SIZE = hp.HParam('batch_size', hp.Discrete([256]))
HP_LEAKY_RELU = hp.HParam('leaky_relu', hp.Discrete([3e-1]))
METRIC_ACCURACY = 'accuracy'

hparams = {
  HP_EPSILON: EPSILON,
  HP_LEARNING_RATE: LEARNING_RATE,
  HP_FIRST_DENSE: 100,
  HP_KERNEL_SIZE: 3,
  HP_POOLING_KERNEL: 3,
  HP_CONV_HIDDEN_DIM: 50,
  HP_RNN_HIDDEN_DIM: 50,
  HP_ACTIVATION: 'leaky_relu',
  HP_ACTIVATION_CONV: 'relu',
  HP_REGULARIZER: 0,
  HP_CONV_PADDING: 'valid',
  HP_DROPOUT: 3e-1,
  HP_BATCH_SIZE: 256,
  HP_LEAKY_RELU: 3e-1
}

### K-FOLD VALIDATION

In [None]:
from sklearn.model_selection import ShuffleSplit
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cnt = 0

# save models
SAVE_MODEL = 'save_model_baseline/'
Path(SAVE_MODEL).mkdir(parents=True, exist_ok=True)

# log
with tf.summary.create_file_writer(SAVE_MODEL + 'logs/hparam_tuning').as_default():
    hp.hparams_config(
    hparams=[HP_EPSILON,HP_LEARNING_RATE,HP_FIRST_DENSE, HP_KERNEL_SIZE, HP_POOLING_KERNEL, HP_CONV_HIDDEN_DIM, HP_RNN_HIDDEN_DIM, HP_ACTIVATION, HP_ACTIVATION_CONV, HP_REGULARIZER, HP_CONV_PADDING, HP_DROPOUT, HP_BATCH_SIZE, HP_LEAKY_RELU],
    metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
  )


for train, test in kf.split(class_labels):
    cnt+=1
    merge_model = None
    merge_model = build_model(hparams)  
    tf.keras.utils.plot_model(merge_model, to_file=SAVE_MODEL + 'model.png', show_shapes=True)


    merge_model.compile(optimizer=Adam(learning_rate=hparams[HP_LEARNING_RATE], amsgrad=True, epsilon=hparams[HP_EPSILON]), 
                      loss='categorical_crossentropy', metrics=METRICS)
    
    # Create train
    # from generator
    train_dataset = tf.data.Dataset.from_generator(generator_pair, 
                                                   args=[seq_tensor, class_labels, train], 
                                                   output_types=({"seq1": DTYPE, "seq2": DTYPE}, DTYPE), 
                                                   output_shapes = ({"seq1": (seq_size, dim), "seq2": (seq_size, dim)}, (2,)) )
    train_dataset = train_dataset.shuffle(1024).repeat(N_EPOCHS).batch(hparams[HP_BATCH_SIZE])
    train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)
    
    # Create test
    test_dataset = tf.data.Dataset.from_generator(generator_pair, args=[seq_tensor, class_labels, test], 
                                                  output_types=({"seq1": DTYPE, "seq2": DTYPE}, DTYPE), 
                                                  output_shapes = ({"seq1": (seq_size, dim), "seq2": (seq_size, dim)}, (2,)) )
    test_dataset = test_dataset.batch(hparams[HP_BATCH_SIZE])
    
    # Save the best model base on val_accuracy
    checkpoint = ModelCheckpoint(filepath=SAVE_MODEL + str(cnt)+'-fold_best_model.hdf5', 
                                 monitor='val_accuracy',verbose=1, save_best_only=True, mode='max')
    
    # Fit model
    print(f'==================== Training time =====================')
    history_model = merge_model.fit(train_dataset, 
                                    epochs=N_EPOCHS, 
                                    steps_per_epoch=len(train) // 128, 
                                    validation_data=test_dataset,
                                    callbacks=[checkpoint, reduce_lr, early_stopping,                                               
                                              tf.keras.callbacks.CSVLogger(SAVE_MODEL + 'history.csv')])
    # file rename
    shutil.move(SAVE_MODEL + 'history.csv', SAVE_MODEL + str(cnt) + '-fold_history.csv')

Epoch 1/50


2022-03-08 09:00:31.716999: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-03-08 09:00:34.528182: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8204
2022-03-08 09:00:35.906688: I tensorflow/stream_executor/cuda/cuda_blas.cc:1760] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.



Epoch 00001: val_accuracy improved from -inf to 0.93764, saving model to save_model/1-fold_best_model.hdf5
Epoch 2/50





Epoch 00002: val_accuracy improved from 0.93764 to 0.94979, saving model to save_model/1-fold_best_model.hdf5
Epoch 3/50

Epoch 00003: val_accuracy improved from 0.94979 to 0.96870, saving model to save_model/1-fold_best_model.hdf5
Epoch 4/50

Epoch 00004: val_accuracy improved from 0.96870 to 0.97286, saving model to save_model/1-fold_best_model.hdf5
Epoch 5/50

Epoch 00005: val_accuracy did not improve from 0.97286
Epoch 6/50

Epoch 00006: val_accuracy improved from 0.97286 to 0.97314, saving model to save_model/1-fold_best_model.hdf5
Epoch 7/50

Epoch 00007: val_accuracy did not improve from 0.97314
Epoch 8/50

Epoch 00008: val_accuracy improved from 0.97314 to 0.97541, saving model to save_model/1-fold_best_model.hdf5
Epoch 9/50

Epoch 00009: val_accuracy improved from 0.97541 to 0.97564, saving model to save_model/1-fold_best_model.hdf5
Epoch 10/50

Epoch 00010: val_accuracy improved from 0.97564 to 0.97804, saving model to save_model/1-fold_best_model.hdf5
Restoring model weight




Epoch 00002: val_accuracy improved from 0.93801 to 0.96339, saving model to save_model/2-fold_best_model.hdf5
Epoch 3/50

Epoch 00003: val_accuracy did not improve from 0.96339
Epoch 4/50

Epoch 00004: val_accuracy improved from 0.96339 to 0.96380, saving model to save_model/2-fold_best_model.hdf5
Epoch 5/50

Epoch 00005: val_accuracy improved from 0.96380 to 0.97351, saving model to save_model/2-fold_best_model.hdf5
Epoch 6/50

Epoch 00006: val_accuracy improved from 0.97351 to 0.97471, saving model to save_model/2-fold_best_model.hdf5
Epoch 7/50

Epoch 00007: val_accuracy improved from 0.97471 to 0.97550, saving model to save_model/2-fold_best_model.hdf5
Epoch 8/50

Epoch 00008: val_accuracy improved from 0.97550 to 0.97707, saving model to save_model/2-fold_best_model.hdf5
Epoch 9/50

Epoch 00009: val_accuracy improved from 0.97707 to 0.97762, saving model to save_model/2-fold_best_model.hdf5
Epoch 10/50

Epoch 00010: val_accuracy improved from 0.97762 to 0.97776, saving model to s




Epoch 00002: val_accuracy improved from 0.94189 to 0.96205, saving model to save_model/3-fold_best_model.hdf5
Epoch 3/50

Epoch 00003: val_accuracy improved from 0.96205 to 0.96671, saving model to save_model/3-fold_best_model.hdf5
Epoch 4/50

Epoch 00004: val_accuracy improved from 0.96671 to 0.96981, saving model to save_model/3-fold_best_model.hdf5
Epoch 5/50

Epoch 00005: val_accuracy improved from 0.96981 to 0.97388, saving model to save_model/3-fold_best_model.hdf5
Epoch 6/50

Epoch 00006: val_accuracy improved from 0.97388 to 0.97647, saving model to save_model/3-fold_best_model.hdf5
Epoch 7/50

Epoch 00007: val_accuracy improved from 0.97647 to 0.97762, saving model to save_model/3-fold_best_model.hdf5
Epoch 8/50

Epoch 00008: val_accuracy did not improve from 0.97762
Epoch 9/50

Epoch 00009: val_accuracy did not improve from 0.97762
Epoch 10/50

Epoch 00010: val_accuracy did not improve from 0.97762
Epoch 11/50

Epoch 00011: val_accuracy improved from 0.97762 to 0.98007, savi




Epoch 00002: val_accuracy improved from 0.92881 to 0.95645, saving model to save_model/4-fold_best_model.hdf5
Epoch 3/50

Epoch 00003: val_accuracy improved from 0.95645 to 0.96445, saving model to save_model/4-fold_best_model.hdf5
Epoch 4/50

Epoch 00004: val_accuracy improved from 0.96445 to 0.96963, saving model to save_model/4-fold_best_model.hdf5
Epoch 5/50

Epoch 00005: val_accuracy did not improve from 0.96963
Epoch 6/50

Epoch 00006: val_accuracy did not improve from 0.96963
Epoch 7/50

Epoch 00007: val_accuracy improved from 0.96963 to 0.97499, saving model to save_model/4-fold_best_model.hdf5
Epoch 8/50

Epoch 00008: val_accuracy did not improve from 0.97499
Epoch 9/50

Epoch 00009: val_accuracy improved from 0.97499 to 0.97675, saving model to save_model/4-fold_best_model.hdf5
Epoch 10/50

Epoch 00010: val_accuracy did not improve from 0.97675
Epoch 11/50

Epoch 00011: val_accuracy improved from 0.97675 to 0.97762, saving model to save_model/4-fold_best_model.hdf5
Epoch 12/

## Final Modeling

In [28]:
merge_model = None
merge_model = build_model(hparams)  

# ADAM
merge_model.compile(optimizer=Adam(learning_rate=hparams[HP_LEARNING_RATE], 
                                   amsgrad=True, epsilon=hparams[HP_EPSILON]), 
                    loss='categorical_crossentropy', metrics=METRICS)

# Create train
train_dataset = tf.data.Dataset.from_generator(generator_pair, 
                                               args=[seq_tensor, class_labels, np.arange(len(class_labels))], 
                                               output_types=({"seq1": DTYPE, "seq2": DTYPE}, DTYPE), 
                                               output_shapes = ({"seq1": (seq_size, dim), "seq2": (seq_size, dim)}, (2,)) )
train_dataset = train_dataset.shuffle(1024).repeat(N_EPOCHS).batch(hparams[HP_BATCH_SIZE])
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

# Fit model
history = merge_model.fit(train_dataset, 
                steps_per_epoch=len(seq_tensor) // 128, 
                epochs=30,
                callbacks=[final_reduce_lr, final_early_stopping])

# model save
SAVE_MODEL = 'final_model/'
Path(SAVE_MODEL).mkdir(parents=True, exist_ok=True)
merge_model.save(SAVE_MODEL + 'PIPR_baseline_final.h5')

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
