In [1]:
import torch
import numpy as np
import sentencepiece as spm
import torch.nn as nn
import torch.optim as optim
import random
import json
from datasets import load_dataset
from torch.utils.data import DataLoader
import os
import time
import pandas as pd
import tensorflow as tf
import keras
from tensorflow.keras.layers import Bidirectional, LSTM, Input, Dense, TimeDistributed, Embedding, Concatenate
from tensorflow.keras.models import Model

In [2]:
tf.keras.backend.clear_session()
# from tensorflow.keras.mixed_precision import set_global_policy
# set_global_policy('mixed_float16')

In [3]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
       tf.config.experimental.set_memory_growth(gpu,True)

In [4]:
sp = spm.SentencePieceProcessor()
sp.load("/kaggle/input/nepali-summarization-tokenizer/summarization_50000.model")

True

In [5]:
parameters = {
    'VOC_SIZE': 50_000,

    'ENCODER_SEQUENCE_LENGTH': 256,
    'DECODER_SEQUENCE_LENGTH': 12,
    
    'EMBEDDING_DIMENSION': 100,
    
    'ENCODER_HIDDEN_DIM': 64,
    'DECODER_HIDDEN_DIM': 128,
    
    'DROPOUT': 0.3,

    'BATCH_SIZE': 128,
    'EPOCHS': 16,
    'EARLY_STOPPING': 3,
    'L2_REG': 0.01,
    
    'LEARNING_RATE': 1e-3,
    'GRAD_CLIP': 1.0,

    'PAD_TOKEN': '<pad>',
    'UNK_TOKEN': '<unk>',
    'SOS_TOKEN': '<s>',
    'EOS_TOKEN': '</s>',

    'PAD_TOKEN_ID': sp.pad_id(),
    'UNK_TOKEN_ID': sp.unk_id(),
    'SOS_TOKEN_ID': sp.bos_id(),
    'EOS_TOKEN_ID': sp.eos_id(),

    'ATTENTION_TYPE': 'bahdanau',

    'COVERAGE_WEIGHT': 1.0,
}

In [6]:
df_eval = pd.read_csv('/kaggle/input/nepali-summarization-set-cleaned/summarization_set_cleaned_val.csv')

In [7]:
dataset_files = {
    "train": "/kaggle/input/nepali-summarization-set-cleaned/summarization_set_cleaned_train.csv",
    "test": "/kaggle/input/nepali-summarization-set-cleaned/summarization_set_cleaned_test.csv",
    "eval": "/kaggle/input/nepali-summarization-set-cleaned/summarization_set_cleaned_val.csv"
}

In [8]:
def collate_fn(batch: torch.tensor):
    data = {
        'encoder_inputs': [],
        'decoder_targets': [],
    }

    for row in batch:
        news_encoded = sp.encode(row['news'])
        title_encoded = sp.encode(row['title']) + [sp.eos_id()]

        if len(news_encoded) >= parameters['ENCODER_SEQUENCE_LENGTH']:
            data['encoder_inputs'].append(news_encoded[:parameters['ENCODER_SEQUENCE_LENGTH']])
        else:
            data['encoder_inputs'].append(news_encoded + [sp.pad_id()] * (parameters['ENCODER_SEQUENCE_LENGTH'] - len(news_encoded)))

        if len(title_encoded) >= parameters['DECODER_SEQUENCE_LENGTH']:
            data['decoder_targets'].append(title_encoded[:parameters['DECODER_SEQUENCE_LENGTH']])
        else:
            data['decoder_targets'].append(title_encoded + [sp.pad_id()] * (parameters['DECODER_SEQUENCE_LENGTH'] - len(title_encoded)))

    return data

In [9]:
dataset = load_dataset("csv", data_files=dataset_files, streaming=True)
train_dataset = dataset["train"]
test_dataset = dataset["test"]
eval_dataset = dataset["eval"]

In [10]:
train_dataloader = DataLoader(train_dataset, batch_size=parameters['BATCH_SIZE'], collate_fn=collate_fn)
test_dataloader = DataLoader(test_dataset, batch_size=parameters['BATCH_SIZE'], collate_fn=collate_fn)
eval_dataloader = DataLoader(eval_dataset, batch_size=parameters['BATCH_SIZE'], collate_fn=collate_fn)

In [11]:
train_inputs = None
train_targets = None
for batch in train_dataloader:
    if train_inputs is None:
        train_inputs = batch['encoder_inputs']
        train_targets = batch['decoder_targets']
    else:
        train_inputs = train_inputs + batch['encoder_inputs']
        train_targets = train_targets + batch['decoder_targets']
train_inputs = np.array(train_inputs)
train_targets = np.array(train_targets)

In [12]:
test_inputs = None
test_targets = None
for batch in test_dataloader:
    if test_inputs is None:
        test_inputs = batch['encoder_inputs']
        test_targets = batch['decoder_targets']
    else:
        test_inputs = test_inputs + batch['encoder_inputs']
        test_targets = test_targets + batch['decoder_targets']
test_inputs = np.array(test_inputs)
test_targets = np.array(test_targets)

In [13]:
eval_inputs = None
eval_targets = None
for batch in eval_dataloader:
    if eval_inputs is None:
        eval_inputs = batch['encoder_inputs']
        eval_targets = batch['decoder_targets']
    else:
        eval_inputs = eval_inputs + batch['encoder_inputs']
        eval_targets = eval_targets + batch['decoder_targets']
eval_inputs = np.array(eval_inputs)
eval_targets = np.array(eval_targets)

In [14]:
train = tf.data.Dataset.from_tensor_slices((train_inputs, train_targets)).batch(parameters["BATCH_SIZE"], drop_remainder=False)
test = tf.data.Dataset.from_tensor_slices((test_inputs, test_targets)).batch(parameters["BATCH_SIZE"], drop_remainder=False)
val = tf.data.Dataset.from_tensor_slices((eval_inputs, eval_targets)).batch(parameters["BATCH_SIZE"], drop_remainder=False)

In [15]:
train_inputs.shape, train_targets.shape, test_inputs.shape, test_targets.shape, eval_inputs.shape, eval_targets.shape

((260787, 256),
 (260787, 12),
 (30788, 256),
 (30788, 12),
 (15246, 256),
 (15246, 12))

In [16]:
@tf.keras.utils.register_keras_serializable()
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units, **kwargs):
        super(BahdanauAttention, self).__init__(**kwargs)
        self.units = units

    # def build(self, input_shape):
        self.W = tf.keras.layers.Dense(self.units, use_bias=False)
        self.U = tf.keras.layers.Dense(self.units, use_bias=False)
        self.Wc = tf.keras.layers.Dense(self.units)
        self.V = tf.keras.layers.Dense(1, use_bias=False)

    def build(self, input_shape):
        encoder_outputs, decoder_hidden_state = input_shape
        self.W.build((decoder_hidden_state[0], 1, decoder_hidden_state[1]))
        self.U.build(encoder_outputs)
        self.Wc.build((encoder_outputs[0], encoder_outputs[1], 1, 1))
        self.V.build(encoder_outputs)

    def compute_output_shape(self, input_shape):
        encoder_outputs, decoder_hidden_state = input_shape
        return decoder_hidden_state, decoder_hidden_state, (decoder_hidden_state[0], decoder_hidden_state[1], 1, 1)
    
    def call(self, inputs):
        encoder_outputs, encoder_mask, decoder_hidden_state, coverage_vector = inputs  # Select the output for the current time step

        score = tf.reduce_sum(self.V(tf.nn.tanh(self.W(tf.expand_dims(tf.expand_dims(decoder_hidden_state, 1), 1)) + self.U(tf.expand_dims(encoder_outputs, 2)) + self.Wc(coverage_vector))), (2, 3))
        attention_weights = tf.nn.softmax(score, axis=1)

        # attention_weights = tf.squeeze(attention_weights, 2) * encoder_mask
        attention_weights = attention_weights * encoder_mask

        for_renorm = tf.reduce_sum(attention_weights, 1)

        attention_weights = attention_weights / tf.reshape(for_renorm, (-1, 1))

        coverage_vector += tf.reshape(attention_weights, [tf.shape(encoder_outputs)[0], -1, 1, 1])

        # Calculate the context vector
        context_vector = tf.reduce_sum(tf.reshape(attention_weights, [tf.shape(encoder_outputs)[0], -1, 1, 1]) * tf.expand_dims(encoder_outputs, 2), (1, 2))
        # print(context_vector, "BEF")
        # context_vector = tf.reshape(context_vector, [-1, tf.shape(encoder_outputs)[-1]])
        # print(context_vector, "AFT")
        return context_vector, attention_weights, coverage_vector
    
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'units': self.units
        })
        return config

In [17]:
@tf.keras.utils.register_keras_serializable()
class Encoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, dropout_rate, **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.dropout_rate = dropout_rate

        self.encoder_embedding = Embedding(self.vocab_size, self.embedding_dim, mask_zero=True, name='News_Embedding')
        self.encoder_lstm = Bidirectional(LSTM(self.hidden_dim, return_sequences=True, return_state=True, dropout=self.dropout_rate), name='Encoder_BiLSTM')

    def build(self, input_shape):
        self.encoder_embedding.build(input_shape)
        lstm_input = self.encoder_embedding.compute_output_shape(input_shape)
        self.encoder_lstm.build(lstm_input)

    def compute_output_shape(self, input_shape):
        lstm_input = self.encoder_embedding.compute_output_shape(input_shape)
        out, hf, cf, hb, cb = self.encoder_lstm.compute_output_shape(lstm_input)
        return out, out, (hf[0], hf[1]+hb[1]), (cf[0], cf[1]+cb[1]) 
    
    def call(self, encoder_input, training=False):
        encoder_embedding = self.encoder_embedding(encoder_input)

        encoder_mask = tf.cast(self.encoder_embedding.compute_mask(encoder_input), tf.float32)
        
        encoder_output, state_h_fwd, state_c_fwd, state_h_bwd, state_c_bwd = self.encoder_lstm(encoder_embedding, training=training)
        
        state_h = tf.concat([state_h_fwd, state_h_bwd], -1)
        state_c = tf.concat([state_c_fwd, state_c_bwd], -1)
        
        return encoder_output, encoder_mask, state_h, state_c
    
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'vocab_size': self.vocab_size,
            'embedding_dim': self.embedding_dim,
            'hidden_dim': self.hidden_dim,
            'dropout_rate': self.dropout_rate,
        })
        return config

In [18]:
@tf.keras.utils.register_keras_serializable()
class Decoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, dropout_rate, **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.dropout_rate = dropout_rate

        self.decoder_embedding = Embedding(self.vocab_size, self.embedding_dim, mask_zero=True, name='Title_Embedding')
        self.decoder_lstm = LSTM(self.hidden_dim, return_sequences=True, return_state=True, dropout=self.dropout_rate, name='Decoder_LSTM')
        self.bahdanau_attention = BahdanauAttention(units=self.hidden_dim, name="Bahdanau_Attention")  
        self.decoder_dense = Dense(self.vocab_size, activation = 'softmax', name="Softmax_Layer")

    def build(self, input_shape):
        decoder_input, encoder_outputs = input_shape
        self.decoder_embedding.build(decoder_input)
        lstm_input = self.decoder_embedding.compute_output_shape(decoder_input)
        self.decoder_lstm.build(lstm_input)
        lstm_output, h, c = self.decoder_lstm.compute_output_shape(lstm_input)
        self.bahdanau_attention.build((encoder_outputs, h))
        cxt, attn, cvg = self.bahdanau_attention.compute_output_shape((encoder_outputs, h))
        self.decoder_dense.build((cxt[0], cxt[1]+h[1]))

    def compute_output_shape(self, input_shape):
        decoder_input, encoder_outputs = input_shape
        lstm_input = self.decoder_embedding.compute_output_shape(decoder_input)
        lstm_output, h, c = self.decoder_lstm.compute_output_shape(lstm_input)
        cxt, attn, cvg = self.bahdanau_attention.compute_output_shape((encoder_outputs, h))
        dist = self.decoder_dense.compute_output_shape((cxt[0], cxt[1]+h[1]))
        return dist, h, c, attn, cvg
    
    def call(self, decoder_input, encoder_output, encoder_mask, previous_states, coverage_vector, training=False):
        
        decoder_embedding = self.decoder_embedding(decoder_input)

        decoder_output, state_h, state_c, = self.decoder_lstm(decoder_embedding, initial_state=previous_states, training=training)
        context_vector, attention_weights, coverage_vector = self.bahdanau_attention([encoder_output, encoder_mask, state_h, coverage_vector])
        final_decoder_output = self.decoder_dense(tf.concat([context_vector, state_h], -1))
        
        return final_decoder_output, state_h, state_c, attention_weights, coverage_vector

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'vocab_size': self.vocab_size,
            'embedding_dim': self.embedding_dim,
            'hidden_dim': self.hidden_dim,
            'dropout_rate': self.dropout_rate,

        })
        return config

In [19]:
# enc_in = tf.keras.random.randint((4, 4), 0, 9)

# dec_in = tf.keras.random.randint((4, 1), 0, 3)
# dec_t = tf.keras.random.randint((4, 1), 0, 9)

In [20]:
# vocab_size, embedding_dim, hidden_dim, dropout_rate, enc_seq_len, batch = 10, 50, 10, 0.1, 4, 4
# # enc_in = tf.keras.random.randint((batch, enc_seq_len), 0, 9)
# enc = Encoder(vocab_size, embedding_dim, hidden_dim, dropout_rate)
# x, y, z, zz = enc(enc_in)

# vocab_size, embedding_dim, hidden_dim, dropout_rate, dec_seq_len, batch = 10, 50, 20, 0.1, 7, 4
# # dec_in = tf.keras.random.randint((batch, 1), 0, 3)
# # dec_t = tf.keras.random.randint((batch, 1), 0, 9)
# dec = Decoder(vocab_size, embedding_dim, hidden_dim, dropout_rate)
# dec.build(((None,1), x.shape))
# # print(dec.compute_output_shape(((None,1), x.shape)))
# a, b, c, d, e = dec(dec_in, x, y, [z, zz], np.zeros((enc_in.shape[0], enc_in.shape[1], 1, 1)))
# f, g, h, i, j = dec(tf.keras.random.randint((batch, 1), 0, 3), x, y, [b, c], e)
# f, g, h, i, j = dec(tf.keras.random.randint((batch, 1), 0, 3), x, y, [g, h], j)
# # dec_t

In [21]:
@tf.keras.utils.register_keras_serializable()
def sparse_categorical_and_coverage_loss(targets, output_dist, attn_wts, coverage, coverage_weight=1):
    scce_loss = tf.keras.losses.SparseCategoricalCrossentropy(reduction=None)
    step_scce_loss = scce_loss(targets, output_dist)
    step_cov_loss = tf.reduce_sum(tf.minimum(attn_wts, coverage), 1)

    return tf.expand_dims(step_scce_loss, 1), tf.expand_dims(step_cov_loss, 1)

In [22]:
# s1, sc1 = sparse_categorical_and_coverage_loss(tf.keras.random.randint((4, 1), 0, 10), tf.keras.random.uniform((4, 10)), tf.keras.random.uniform((4, 10)), tf.keras.random.uniform((4, 10)))
# s2, sc2 = sparse_categorical_and_coverage_loss(tf.keras.random.randint((4, 1), 0, 10), tf.keras.random.uniform((4, 10)), tf.keras.random.uniform((4, 10)), tf.keras.random.uniform((4, 10)))
# s3, sc3 = sparse_categorical_and_coverage_loss(tf.keras.random.randint((4, 1), 0, 10), tf.keras.random.uniform((4, 10)), tf.keras.random.uniform((4, 10)), tf.keras.random.uniform((4, 10)))
# s4, sc4 = sparse_categorical_and_coverage_loss(tf.keras.random.randint((4, 1), 0, 10), tf.keras.random.uniform((4, 10)), tf.keras.random.uniform((4, 10)), tf.keras.random.uniform((4, 10)))
# loss = tf.concat([s1, s2, s3, s4], 1)
# cov_loss = tf.concat([sc1, sc2, sc3, sc4], 1)
# mask = tf.cast(tf.math.not_equal(tf.keras.random.randint((4, 4), 0, 9), 0), tf.float32)
# print(tf.reduce_mean(tf.reduce_sum(loss*mask, 1) / tf.reduce_sum(mask, 1)))
# print(tf.reduce_mean(tf.reduce_sum(cov_loss*mask, 1) / tf.reduce_sum(mask, 1)))

In [23]:
@tf.keras.utils.register_keras_serializable()
class AttentiveSeq2Seq(tf.keras.Model):
    def __init__(self, encoder, decoder, sos_token_id, teacher_forcing_ratio=0.5, coverage_weight=1.0, **kwargs):
        super(AttentiveSeq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        # self.loss_fn = loss
        self.sos_token_id = sos_token_id
        self.teacher_forcing_ratio = teacher_forcing_ratio
        self.coverage_weight = coverage_weight

        self.loss_tracker = tf.keras.metrics.Mean(name="loss")
        self.coverage_tracker = tf.keras.metrics.Mean(name="coverage_loss")
        self.val_loss_tracker = tf.keras.metrics.Mean(name="loss")
        self.val_coverage_tracker = tf.keras.metrics.Mean(name="coverage_loss")

    def build(self, input_shape):
        encoder_input, decoder_input = input_shape
        self.encoder.build(encoder_input)
        encoder_outputs = self.encoder.compute_output_shape(encoder_input)
        self.decoder.build((decoder_input, encoder_outputs[0]))

    def compute_output_shape(self, input_shape):
        encoder_input, decoder_input = input_shape
        encoder_outputs = self.encoder.compute_output_shape(encoder_input)
        decoder_outputs = self.decoder.compute_output_shape((decoder_input, encoder_outputs))
        return encoder_outputs, decoder_outputs
    
    def call(self):
        """
        Decoder Inputs: <sos>.....
        Target: .....<eos>
        """

        return self.encoder, self.decoder

    @tf.function
    def train_step(self, inputs):

        encoder_inputs, targets = inputs
        loss = 0.
        cov_loss = 0.
        
        with tf.GradientTape() as tape:
            batch_loss = None
            batch_cov_loss = None
            target_mask = tf.cast(tf.math.not_equal(targets, 0), tf.float32)
            
            coverage_vector_next_t = tf.zeros([tf.shape(encoder_inputs)[0], tf.shape(encoder_inputs)[1], 1, 1], dtype=tf.float32)
            decoder_input = tf.fill([tf.shape(encoder_inputs)[0],], self.sos_token_id)

            encoder_outputs, encoder_mask, hidden, cell = self.encoder(encoder_inputs, training=True)
            
            for t in range(targets.shape[1]):
                final_dist, hidden, cell, attn_weights, coverage_vector = self.decoder(tf.expand_dims(decoder_input, 1), encoder_outputs, encoder_mask, [hidden, cell], coverage_vector_next_t, training=True)
                decoder_input = targets[:, t] if random.random() < self.teacher_forcing_ratio else tf.argmax(final_dist, 1)  

                step_loss, step_cov_loss = self.loss(targets[:,t], final_dist, attn_weights, tf.squeeze(tf.squeeze(coverage_vector_next_t, 3), 2), self.coverage_weight)

                if batch_loss is None:
                    batch_loss = step_loss
                    batch_cov_loss = step_cov_loss
                else:
                    batch_loss =tf.concat([batch_loss, step_loss], 1)
                    batch_cov_loss =tf.concat([batch_cov_loss, step_cov_loss], 1)
        
                coverage_vector_next_t = coverage_vector                

            loss = tf.reduce_mean(tf.reduce_sum(batch_loss*target_mask, 1) / tf.reduce_sum(target_mask, 1))
            cov_loss = tf.reduce_mean(tf.reduce_sum(batch_cov_loss*target_mask, 1) / tf.reduce_sum(target_mask, 1))
    
            loss = loss + self.coverage_weight * cov_loss

        variables = self.encoder.trainable_variables + self.decoder.trainable_variables
        gradients = tape.gradient(loss, variables)
        self.optimizer.apply_gradients(zip(gradients, variables))

        self.loss_tracker.update_state(loss)
        self.coverage_tracker.update_state(cov_loss)

        return {'loss': self.loss_tracker.result(), 'coverage_loss': self.coverage_tracker.result()}    

    @tf.function
    def test_step(self, inputs):
        encoder_inputs, targets = inputs

        batch_loss = None
        batch_cov_loss = None
        
        target_mask = tf.cast(tf.math.not_equal(targets, 0), tf.float32)
        
        coverage_vector_next_t = tf.zeros([tf.shape(encoder_inputs)[0], tf.shape(encoder_inputs)[1], 1, 1], dtype=tf.float32)
        decoder_input = tf.fill([tf.shape(encoder_inputs)[0],], self.sos_token_id)

        encoder_outputs, encoder_mask, hidden, cell = self.encoder(encoder_inputs, training=False)
        
        for t in range(targets.shape[1]):
            final_dist, hidden, cell, attn_weights, coverage_vector = self.decoder(tf.expand_dims(decoder_input, 1), encoder_outputs, encoder_mask, [hidden, cell], coverage_vector_next_t, training=False)
            decoder_input = tf.argmax(final_dist, 1)  

            step_loss, step_cov_loss = self.loss(targets[:,t], final_dist, attn_weights, tf.squeeze(tf.squeeze(coverage_vector_next_t, 3), 2), self.coverage_weight)

            if batch_loss is None:
                batch_loss = step_loss
                batch_cov_loss = step_cov_loss
            else:
                batch_loss =tf.concat([batch_loss, step_loss], 1)
                batch_cov_loss =tf.concat([batch_cov_loss, step_cov_loss], 1)
    
            coverage_vector_next_t = coverage_vector                
                    
        loss = tf.reduce_mean(tf.reduce_sum(batch_loss*target_mask, 1) / tf.reduce_sum(target_mask, 1))
        cov_loss = tf.reduce_mean(tf.reduce_sum(batch_cov_loss*target_mask, 1) / tf.reduce_sum(target_mask, 1))

        final_loss = loss + self.coverage_weight * cov_loss

        self.val_loss_tracker.update_state(final_loss)
        self.val_coverage_tracker.update_state(cov_loss)

        return {'loss': self.val_loss_tracker.result(), 'coverage_loss': self.val_coverage_tracker.result()}    
    
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'encoder': self.encoder,
            'decoder': self.decoder,
            'sos_token_id': self.sos_token_id,
            'teacher_forcing_ratio': self.teacher_forcing_ratio,
            'coverage_weight': self.coverage_weight,
            'loss_tracker': self.loss_tracker,
            'coverage_tracker': self.coverage_tracker,
            'val_loss_tracker': self.val_loss_tracker,
            'val_coverage_tracker': self.val_coverage_tracker
        })
        return config

In [24]:
tf.config.run_functions_eagerly(True)

In [25]:
encoder = Encoder(parameters['VOC_SIZE'], parameters['EMBEDDING_DIMENSION'], parameters['ENCODER_HIDDEN_DIM'], parameters['DROPOUT'])
decoder = Decoder(parameters['VOC_SIZE'], parameters['EMBEDDING_DIMENSION'], parameters['DECODER_HIDDEN_DIM'], parameters['DROPOUT'])    
model = AttentiveSeq2Seq(encoder, decoder,parameters['SOS_TOKEN_ID'])
model.build(((None, parameters['ENCODER_SEQUENCE_LENGTH']), (None, parameters['DECODER_SEQUENCE_LENGTH'])))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=parameters['LEARNING_RATE'], weight_decay=parameters['L2_REG']), loss=sparse_categorical_and_coverage_loss)

In [26]:
model.summary()

In [27]:
cpk = keras.callbacks.ModelCheckpoint(
    filepath='/kaggle/working/best.weights.h5',
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
)
es = keras.callbacks.EarlyStopping(monitor='val_loss', mode="min", patience=parameters['EARLY_STOPPING'])

In [28]:
history = model.fit(train, epochs=parameters["EPOCHS"], batch_size=parameters["BATCH_SIZE"], validation_data=test, callbacks=[cpk, es])

Epoch 1/16




[1m2038/2038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1792s[0m 876ms/step - coverage_loss: 0.2387 - loss: 8.3596 - val_coverage_loss: 0.0115 - val_loss: 7.3359
Epoch 2/16
[1m2038/2038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1780s[0m 873ms/step - coverage_loss: 0.0148 - loss: 7.0556 - val_coverage_loss: 0.0219 - val_loss: 7.0232
Epoch 3/16
[1m2038/2038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1789s[0m 878ms/step - coverage_loss: 0.0126 - loss: 6.6009 - val_coverage_loss: 0.0283 - val_loss: 6.8730
Epoch 4/16
[1m2038/2038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1780s[0m 873ms/step - coverage_loss: 0.0438 - loss: 6.5153 - val_coverage_loss: 0.0055 - val_loss: 6.7170
Epoch 5/16
[1m2038/2038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1761s[0m 864ms/step - coverage_loss: 0.0079 - loss: 6.0776 - val_coverage_loss: 0.0059 - val_loss: 6.6057
Epoch 6/16
[1m2038/2038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1760s[0m 864ms/step - coverage_loss: 0.0062

In [29]:
import pickle
model.save_weights('/kaggle/working/model.weights.h5')
with open('/kaggle/working/model_history', 'wb') as file:
    pickle.dump(history.history, file)