## Importing + Installs

In [None]:
!pip install -q sumeval

In [None]:
from tqdm import tqdm
import re
import numpy as np
import pandas as pd
from tensorflow.keras import backend as K 
from nltk.corpus import stopwords
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Concatenate, TimeDistributed,Add,AdditiveAttention
from tensorflow.keras.models import Model,load_model
from tensorflow.keras.callbacks import EarlyStopping
from keras.preprocessing.text import text_to_word_sequence
import tensorflow as tf
import random as rn
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer 
from keras_preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer 
from keras_preprocessing.sequence import pad_sequences
from tqdm.auto import tqdm, trange
from sumeval.metrics.rouge import RougeCalculator

import warnings
warnings.filterwarnings("ignore")

np.random.seed(42)
tf.random.set_seed(32)
rn.seed(12)



In [None]:
data_path = "/CNN/stories"
csv_path = "/CNN/CNN_with_summary.csv"
clean_csv_path = "/CNN/data_cleaned1.csv"
csv_with_ent = "/CNN/data_with_ents.csv"
glov_file = "/CNN/glove_embeddings/glove.6B.txt"
logs_dir = "/CNN/logs_dir"
model_path = "/CNN/"

In [None]:
post_pre = pd.read_csv(csv_with_ent)

In [None]:
max_text_len = 330
max_summary_len = 41

In [None]:
x_train, x_validation, y_train, y_validation = train_test_split(np.array(post_pre['text']), np.array(post_pre['summary']), random_state = 33 , test_size = 0.1)

### Considering rare words as unk

In [None]:
x_tokenizer = Tokenizer() 
x_tokenizer.fit_on_texts(list(x_train))

In [None]:
rare_word = []
for key, value in x_tokenizer.word_counts.items():
    if value < 2:
        rare_word.append(key)

In [None]:
tokenrare = []
for i in range(len(rare_word)):
    tokenrare.append('ukn')

In [None]:
dictionary_1 = dict(zip(rare_word, tokenrare)) 

In [None]:
x_trunk = []
for i in x_train:
    for word in i.split():
        if word.lower() in dictionary_1:
            i = i.replace(word, dictionary_1[word.lower()])
    x_trunk.append(i)

In [None]:
x_tokenizer = Tokenizer(oov_token = 'ukn') 
x_tokenizer.fit_on_texts(list(x_trunk))

In [None]:
# 1) Convert text sequences into integer sequences (i.e one-hot encodeing all the words)
x_tr_seq    =   x_tokenizer.texts_to_sequences(x_trunk) 
x_val_seq   =   x_tokenizer.texts_to_sequences(x_validation)
# 2) Padding zero upto maximum length
x_tr    =   pad_sequences(x_tr_seq,  maxlen=max_text_len, padding = 'post')
x_val   =   pad_sequences(x_val_seq, maxlen=max_text_len, padding = 'post')
# 3) Size of vocabulary ( +1 for padding token)
x_voc   =  len(x_tokenizer.word_index) + 1

In [None]:
y_tokenizer = Tokenizer() 
y_tokenizer.fit_on_texts(list(y_train))

In [None]:
rare_word = []
for key, value in y_tokenizer.word_counts.items():
    if(value < 2):
        rare_word.append(key)

In [None]:
tokenrare = []
for i in range(len(rare_word)):
    tokenrare.append('ukn')

In [None]:
dictionary_1 = dict(zip(rare_word, tokenrare)) 

In [None]:
y_trunk = []
for i in y_train:
    for word in i.split():
        if word.lower() in dictionary_1:
            i = i.replace(word, dictionary_1[word.lower()])
    y_trunk.append(i)

In [None]:
y_tokenizer = Tokenizer(oov_token = 'ukn') 
y_tokenizer.fit_on_texts(list(y_trunk))
# Convert text sequences into integer sequences (i.e one hot encode the text in Y)
y_tr_seq    =   y_tokenizer.texts_to_sequences(y_trunk) 
y_val_seq   =   y_tokenizer.texts_to_sequences(y_validation) 
# Padding zero upto maximum length
y_tr    =   pad_sequences(y_tr_seq, maxlen = max_summary_len, padding = 'post')
y_val   =   pad_sequences(y_val_seq, maxlen = max_summary_len, padding = 'post')
# Size of vocabulary
y_voc  =   len(y_tokenizer.word_index) + 1

## Glove File for Word Embeddings

In [None]:
embeddings_dictionary = dict()
glove_file = open(glov_file, encoding = "utf8")
for line in glove_file:
    records = line.split()
    word = records[0]
    vector_dimensions = np.asarray(records[1:], dtype = 'float32')
    embeddings_dictionary [word] = vector_dimensions
glove_file.close()

In [None]:
# Updating the dictionary with the pre-trained GloVe embeddings X
embedding_matrix_x = np.zeros((x_voc + 1, 300))
for word, index in x_tokenizer.word_index.items():
    embedding_vector = embeddings_dictionary.get(word)
    if embedding_vector is not None:
        embedding_matrix_x[index] = embedding_vector
embedding_matrix_x.shape

In [None]:
# Updating the dictionary with the pre-trained GloVe embeddings Y
embedding_matrix_y = np.zeros((y_voc + 1, 300))
for word, index in y_tokenizer.word_index.items():
    embedding_vector = embeddings_dictionary.get(word)
    if embedding_vector is not None:
        embedding_matrix_y[index] = embedding_vector
embedding_matrix_y.shape

## Encoder

In [None]:
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, embedding_matrix_x, hidden_units):
        super().__init__()
        self.hidden_units = hidden_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, weights = [embedding_matrix_x])
        self.bi_gru = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(hidden_units, return_sequences = True, return_state = True, recurrent_initializer = 'glorot_uniform', dropout = 0.08, recurrent_dropout = 0.05))
        
    def call(self, encoder_input, encoder_states):
        encoder_emb = self.embedding(encoder_input)
        encoder_output, state_fwd, state_back = self.bi_gru(encoder_emb, initial_state = encoder_states)
        encoder_states = [state_fwd, state_back]
        return encoder_output, encoder_states

## Attention mechanism with coverage vector(wc)

In [None]:
class additiveAttention(tf.keras.layers.AdditiveAttention):
    def __init__(self, hidden_units, is_coverage = False):
        super().__init__()
        self.Wh = tf.keras.layers.Dense(hidden_units)
        self.Ws = tf.keras.layers.Dense(hidden_units)
        self.wc = tf.keras.layers.Dense(1)
        self.V = tf.keras.layers.Dense(1)
        self.coverage = is_coverage
        if self.coverage is False:
            self.wc.trainable = False
        
    def call(self,keys):
        value = keys[0]
        query = keys[1]
        ct = keys[2]
        value = tf.expand_dims(value, 1)
        ct = tf.expand_dims(ct, 1)
        score = self.V(tf.nn.tanh(self.Wh(query) + self.Ws(value) + self.wc(ct))) 
        attention_weights = tf.nn.softmax(score, axis = 1)
        ct = tf.squeeze(ct, 1)
        if self.coverage is True:
            ct += tf.squeeze(attention_weights) 
        context_vector = attention_weights * query
        context_vector = tf.reduce_sum(context_vector, axis = 1)
        return context_vector, attention_weights, ct

### Decoder

In [None]:
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, embedding_matrix_y, hidden_units):
        super().__init__()
        self.hidden_units = hidden_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, weights = [embedding_matrix_y])
        self.gru = tf.keras.layers.GRU(hidden_units, return_sequences = True, return_state = True, recurrent_initializer = 'glorot_uniform')
        self.W1 = tf.keras.layers.Dense(hidden_units)
        self.W2 = tf.keras.layers.Dense(vocab_size)
        
    def call(self, decoder_input, decoder_state, encoder_output, context_vector):
        decoder_emb = self.embedding(decoder_input)
        decoder_output , decoder_state = self.gru(decoder_emb, initial_state = decoder_state)
        concat_vector = tf.concat([context_vector, decoder_state], axis =- 1)
        concat_vector = tf.reshape(concat_vector, (-1, concat_vector.shape[1]))
        p_vocab = tf.nn.log_softmax(self.W2(self.W1(concat_vector)))
        return p_vocab, decoder_state

In [None]:
input_vocab_size = x_voc + 1
output_vocab_size = y_voc + 1

### Data generator

In [None]:
def data_generator(X, y, BATCH_SIZE, BUFFER_ZISE, shuffle = True):
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        dataset = dataset.cache().shuffle(len(X) * BUFFER_ZISE).batch(BATCH_SIZE, drop_remainder = True)
    else:
        dataset = dataset.cache().batch(BATCH_SIZE, drop_remainder = True)
    dataset = dataset.prefetch(buffer_size = tf.data.experimental.AUTOTUNE)
    return dataset

In [None]:
body_seqs = x_tr
target_seqs = y_tr
body_seqs_val = x_val
target_seqs_val = y_val

In [None]:
train_dataset = data_generator(body_seqs, target_seqs, BATCH_SIZE = 64, BUFFER_ZISE = 10, shuffle = True)
val_dataset = data_generator(body_seqs_val, target_seqs_val, BATCH_SIZE = 64, BUFFER_ZISE = 10, shuffle = False)

In [None]:
embedding_dim = 300
hidden_units = 128
BATCH_SIZE = 64

encoder = Encoder(input_vocab_size, embedding_dim, embedding_matrix_x, hidden_units) 
attention = additiveAttention(hidden_units, is_coverage = True)
decoder = Decoder(output_vocab_size, embedding_dim, embedding_matrix_y, hidden_units)



In [None]:
encoder_input, decoder_target = next(iter(train_dataset))
encoder_init_states = [tf.zeros((BATCH_SIZE, encoder.hidden_units)) for i in range(2)]
encoder_output, encoder_states = encoder(encoder_input, encoder_init_states)
decoder_state = encoder_states[0] 
coverage_vector = tf.zeros((64, encoder_input.shape[1]))
decoder_input_vals = decoder_target[:, 0]
context_vector, attention_weights, coverage_vector = attention([decoder_state, encoder_output, coverage_vector])
p_vocab, decoder_state = decoder(tf.expand_dims(decoder_input_vals, 1), decoder_state, encoder_output, context_vector)

### Loss Functions

In [None]:
optimizer = tf.keras.optimizers.Adam()

def nll_loss(p_vocab, target):
    # Apply a mask such that pad zeros do not affect the loss
    mask = tf.math.logical_not(tf.math.equal(target, 0))
    loss = -p_vocab
    mask = tf.cast(mask, dtype = loss.dtype)
    loss *= mask  
    return loss

def attention_coverage_loss(attention_weights, coverage_vector, target):
    mask = tf.math.logical_not(tf.math.equal(target, 0))
    coverage_vector = tf.expand_dims(coverage_vector, axis = 2)
    ct_min = tf.reduce_min(tf.concat([attention_weights, coverage_vector], axis = 2), axis = 2)
    cov_loss = tf.reduce_sum(ct_min, axis = 1)
    mask = tf.cast(mask, dtype = cov_loss.dtype)
    cov_loss *= mask
    return cov_loss

### Model Training

### Training Function

In [None]:
@tf.function
def train_step(encoder_input, decoder_target):
    """
        Function which performs one training step-batch at a time
    """
    loss_value = tf.zeros(BATCH_SIZE)
    lambda_cov = 1
    with tf.GradientTape() as gradient_tape:
        # Run input through encoder
        encoder_init_states = [tf.zeros((BATCH_SIZE, encoder.hidden_units)) for i in range(2)]
        encoder_output, encoder_states = encoder(encoder_input, encoder_init_states)
        # Initialize decoder with encoder forward state
        decoder_state = encoder_states[0]
        coverage_vector = tf.zeros((64, encoder_input.shape[1]))
        # Loop over each word
        for dt in range(decoder_target.shape[1]-1):
            # Run decoder input through decoder and generate vocabulary distribution
            decoder_input_vals = decoder_target[:, dt]
            decoder_target_vals = decoder_target[:, dt + 1]
            # Get attention scores
            context_vector, attention_weights, coverage_vector = attention([decoder_state, encoder_output, coverage_vector])
            # Get vocabulary distribution for each batch at time dt
            p_vocabulary, decoder_state = decoder(tf.expand_dims(decoder_input_vals, 1), decoder_state, encoder_output, context_vector)
            # For each step-batch get the probability of the target word at time dt + 1
            p_vocabulary_list = []
            for i in range(len(decoder_target_vals)):
                p_vocabulary_list.append(p_vocabulary[i, decoder_target_vals[i]])
            p_vocabulary_target = tf.stack(p_vocabulary_list)
            # Calculate the loss at each time step-batch dt and add to current loss (both loss functions)
            loss_value += nll_loss(p_vocabulary_target, decoder_target_vals) + lambda_cov * attention_coverage_loss(attention_weights, coverage_vector, decoder_target_vals)
        # Get the non-padded length of each sequence in the step-batch
        sequence_len_mask = tf.cast(tf.math.logical_not(tf.math.equal(decoder_target, 0)), tf.float32)
        batch_sequence_len = tf.reduce_sum(sequence_len_mask, axis = 1)
        # Get step-batch loss by dividing the loss of each step-batch by the target sequence length and mean
        batch_loss = tf.reduce_mean(loss_value / batch_sequence_len)
    # Update global trainable variables
    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = gradient_tape.gradient(batch_loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
    return batch_loss

### Validation Step

In [None]:
@tf.function
def val_step(encoder_input, decoder_target):
    loss_value = tf.zeros(BATCH_SIZE)
    lambda_cov = 1
    encoder_init_states = [tf.zeros((BATCH_SIZE, encoder.hidden_units)) for i in range(2)]
    encoder_output, encoder_states = encoder(encoder_input, encoder_init_states)
    decoder_state = encoder_states[0] 
    coverage_vector = tf.zeros((64, encoder_input.shape[1]))
    for dt in range(decoder_target.shape[1] - 1):
        # run decoder input through decoder and generate vocabulary distribution
        decoder_input_vals = decoder_target[:, dt]
        decoder_target_vals = decoder_target[:, dt + 1]
        # get attention scores
        context_vector, attention_weights, coverage_vector = attention([decoder_state, encoder_output, coverage_vector])
        # get vocabulary distribution for each batch at time t
        p_vocabulary,decoder_state = decoder(tf.expand_dims(decoder_input_vals , 1), decoder_state, encoder_output, context_vector)
        # for each batch get the probability of the target word at time t+1
        p_vocabulary_list = []
        for i in range(len(decoder_target_vals)):
            p_vocabulary_list.append(p_vocabulary[i, decoder_target_vals[i]])
        p_vocabulary_target = tf.stack(p_vocabulary_list)
        # Calculate the loss at each time step-batch dt and add to current loss (both loss functions)
        loss_value += nll_loss(p_vocabulary_target, decoder_target_vals) + lambda_cov * attention_coverage_loss(attention_weights, coverage_vector, decoder_target_vals)
    sequence_len_mask = tf.cast(tf.math.logical_not(tf.math.equal(decoder_target, 0)), tf.float32)
    batch_sequence_len = tf.reduce_sum(sequence_len_mask, axis = 1)
    # get batch loss by dividing the loss of each batch by the target sequence length and mean
    val_batch_loss = tf.reduce_mean(loss_value / batch_sequence_len)
    return val_batch_loss

## Training loop

In [None]:
epochs = 10
epoch_loss = tf.keras.metrics.Mean()
epoch_val_loss = tf.keras.metrics.Mean()

with tqdm(total = epochs) as epoch_progress:
    for epoch in range(epochs):
        epoch_loss.reset_states()
        epoch_val_loss.reset_states()
        with tqdm(total = len(body_seqs) // BATCH_SIZE) as batch_progress:
            for batch, (encoder_input, decoder_target) in enumerate(train_dataset):
                batch_loss = train_step(encoder_input, decoder_target)
                epoch_loss(batch_loss)
                if (batch % 10) == 0:
                    batch_progress.set_description(f'Epoch {epoch + 1}')
                    batch_progress.set_postfix(Batch = batch, Loss = batch_loss.numpy())
                batch_progress.update()
        with tqdm(total = len(body_seqs_val) // BATCH_SIZE) as batch_progress:
            for batch, (encoder_input_val, decoder_target_val) in enumerate(val_dataset):
                val_batch_loss = val_step(encoder_input_val, decoder_target_val)
                epoch_val_loss(val_batch_loss)
                if (batch % 10) == 0:
                    batch_progress.set_description(f'Epoch {epoch + 1}')
                    batch_progress.set_postfix(Batch = batch, Loss = batch_loss.numpy())
                batch_progress.update()
        epoch_progress.set_description(f'Epoch {epoch + 1}')
        epoch_progress.set_postfix(Loss=epoch_loss.result().numpy())
        print('train loss_loss = ',epoch_loss.result().numpy())
        print('val_loss = ',epoch_val_loss.result().numpy())
        epoch_progress.update() 

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  7.6916122
val_loss =  7.060702


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  7.1392636
val_loss =  6.815719


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  6.811021
val_loss =  6.5126762


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  6.496533
val_loss =  6.361263


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  6.283117
val_loss =  6.282677


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  6.090431
val_loss =  6.184033


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  5.885233
val_loss =  6.1360183


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  5.698744
val_loss =  6.103146


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  5.505176
val_loss =  6.0680346


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  5.3156657
val_loss =  6.0721254


In [None]:
epochs = 4
epoch_loss = tf.keras.metrics.Mean()
epoch_val_loss = tf.keras.metrics.Mean()

with tqdm(total = epochs) as epoch_progress:
    for epoch in range(10, 14):
        epoch_loss.reset_states()
        epoch_val_loss.reset_states()
        with tqdm(total = len(body_seqs) // BATCH_SIZE) as batch_progress:
            for batch, (encoder_input, decoder_target) in enumerate(train_dataset):
                batch_loss = train_step(encoder_input, decoder_target)
                epoch_loss(batch_loss)
                if (batch % 10) == 0:
                    batch_progress.set_description(f'Epoch {epoch + 1}')
                    batch_progress.set_postfix(Batch = batch, Loss = batch_loss.numpy())
                batch_progress.update()
        with tqdm(total = len(body_seqs_val) // BATCH_SIZE) as batch_progress:
            for batch, (encoder_input_val, decoder_target_val) in enumerate(val_dataset):
                val_batch_loss = val_step(encoder_input_val, decoder_target_val)
                epoch_val_loss(val_batch_loss)
                if (batch % 10) == 0:
                    batch_progress.set_description(f'Epoch {epoch + 1}')
                    batch_progress.set_postfix(Batch = batch, Loss = batch_loss.numpy())
                batch_progress.update()
        epoch_progress.set_description(f'Epoch {epoch + 1}')
        epoch_progress.set_postfix(Loss=epoch_loss.result().numpy())
        print('train loss_loss = ',epoch_loss.result().numpy())
        print('val_loss = ',epoch_val_loss.result().numpy())
        epoch_progress.update() 

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  5.1457057
val_loss =  6.1048384


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  4.9855485
val_loss =  6.151639


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  4.830365
val_loss =  6.2151847


  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/14 [00:00<?, ?it/s]

train loss_loss =  4.6750774
val_loss =  6.3006034


In [None]:
encoder_save_name = "/CNN/encoder_15epochs.h5"
decoder_save_name = "/CNN/decoder_15epochs.h5"
attention_save_name = "/CNN/attention_15epochs.h5"

In [None]:
en = (encoder.get_weights()[0]).reshape(-1, 1)

In [None]:
en1=(encoder.get_weights()[1]).reshape(-1,1)

## Testing the model

In [None]:
reverse_target_word_index = y_tokenizer.index_word
reverse_source_word_index = x_tokenizer.index_word
target_word_index = y_tokenizer.word_index

In [None]:
def decode_sequence(encoder_input):
    """
        Create a summary by always picking the highest probability option but conditioned on the previous word
    """
    # Run sequence through the encoder
    encoder_init_states = [tf.zeros((1, encoder.hidden_units)) for i in range(2)]
    encoder_output, encoder_states = encoder(encoder_input, encoder_init_states)
    # Initialize decoder with encoder forward state
    decoder_state = encoder_states[0]
    # Initialize decoder with the target start token
    decoder_input_vals =  tf.ones(1) * target_word_index['start']
    summary = [target_word_index['start']]
    coverage_vector = tf.zeros((1, encoder_input.shape[1]))
    # Loop and as long as the decoder input is different from the end token continue, else break
    while decoder_input_vals[0].numpy() != target_word_index['end'] and len(summary) < max_summary_len:
        context_vector, attention_weights, coverage_vector = attention([decoder_state, encoder_output, coverage_vector])
        p_vocab, decoder_state = decoder(tf.expand_dims(decoder_input_vals, 1), decoder_state, encoder_output, context_vector)
        decoder_input_vals = tf.argmax(p_vocab, axis = 1) 
        decoder_word_idx = int(decoder_input_vals[0].numpy())
        summary.append(decoder_word_idx)
    return summary

In [None]:
def sequence2text(input_seq):
    newString = ''
    for i in input_seq:
        if i != 0:
            newString = newString + reverse_source_word_index[i] + ' '
    return newString

def sequence2summary(input_seq, ukn_token):
    newString = ''
    for i in input_seq:
        if((i != 0 and i != target_word_index['start']) and i != target_word_index['end']):
            if i == target_word_index['ukn']:
                newString = newString + ukn_token + ' '
            else:     
                newString = newString + reverse_target_word_index[i] + ' '
    return newString
  
def sequence_search(list, platform):
    for i in range(len(list)):
        if list[i] == platform:
            return True
    return False

In [None]:
print("DONE")

DONE


In [None]:
for i in range(31, 50):
    encoder_input_sum = tf.expand_dims(x_val[i], 0)
    summary = decode_sequence(encoder_input_sum)
    k = sequence2text(x_val[i])
    k = re.sub('[^a-z]+', ' ', k)
    result = text_to_word_sequence(k)
    if sequence_search(result, 'ukn'):
        idx=result.index('ukn')
        input_org = re.sub('[^a-z]+',' ', x_validation[i])
        input_org = text_to_word_sequence(input_org)
        ukn_token = input_org[idx]
    else:
        ukn_token='ukn'
    print("Original summary:", y_validation[i])
    print("Predicted summary:", sequence2summary(summary, ukn_token)) 
    print("\n")

### Model Evaluation

In [None]:
rouge = RougeCalculator(stopwords = True, lang = "en")
Rouge_1 = []
Rouge_2 = []
Rouge_l = []

for i in range(0, 750):
    encoder_input_sum = tf.expand_dims(x_val[i], 0)
    summary = decode_sequence(encoder_input_sum)
    k = sequence2text(x_val[i])
    k = re.sub('[^a-z]+', ' ', k)
    result = text_to_word_sequence(k)
    if sequence_search(result, 'ukn'):
        idx=result.index('ukn')
        input_org = re.sub('[^a-z]+', ' ', x_validation[i])
        input_org = text_to_word_sequence(input_org)
        ukn_token = input_org[idx]
    else:
        ukn_token='ukn'
   # rogue_1
    rouge_1 = rouge.rouge_n(summary = sequence2summary(y_val[i], ukn_token), references = sequence2summary(summary, ukn_token), n = 1)
    Rouge_1.append(rouge_1)
    # rogue_2
    rouge_2 = rouge.rouge_n(summary = sequence2summary(y_val[i], ukn_token), references = sequence2summary(summary, ukn_token),n = 2)
    Rouge_2.append(rouge_2)
    # rogue_l
    rouge_l = rouge.rouge_l(summary = sequence2summary(y_val[i], ukn_token), references = sequence2summary(summary, ukn_token))
    Rouge_l.append(rouge_l)

ROUGE-1: 0.892, ROUGE-2: 0.727, ROUGE-L: 0.856