In [1]:
import torch
import numpy as np
import sentencepiece as spm
import torch.nn as nn
import torch.optim as optim
import random
import json
from datasets import load_dataset
from torch.utils.data import DataLoader
import os
import time
import pandas as pd
import tensorflow as tf
import keras
from tensorflow.keras.layers import Bidirectional, LSTM, Input, Dense, TimeDistributed, Embedding, Concatenate
from tensorflow.keras.models import Model

2025-05-05 20:07:27.288876: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746475647.540994      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746475647.618672      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
sp = spm.SentencePieceProcessor()
sp.load("/kaggle/input/nepali-summarization-tokenizer/summarization_50000.model")

True

In [3]:
parameters = {
    'VOC_SIZE': 50_000,

    'ENCODER_SEQUENCE_LENGTH': 256,
    'DECODER_SEQUENCE_LENGTH': 12,
    
    'EMBEDDING_DIMENSION': 100,
    
    'ENCODER_HIDDEN_DIM': 64,
    'DECODER_HIDDEN_DIM': 128,
    
    'DROPOUT': 0.3,

    'BATCH_SIZE': 128,
    'EPOCHS': 16,
    'EARLY_STOPPING': 3,
    'L2_REG': 0.01,
    
    'LEARNING_RATE': 1e-3,
    'GRAD_CLIP': 1.0,

    'PAD_TOKEN': '<pad>',
    'UNK_TOKEN': '<unk>',
    'SOS_TOKEN': '<s>',
    'EOS_TOKEN': '</s>',

    'PAD_TOKEN_ID': sp.pad_id(),
    'UNK_TOKEN_ID': sp.unk_id(),
    'SOS_TOKEN_ID': sp.bos_id(),
    'EOS_TOKEN_ID': sp.eos_id(),

    'ATTENTION_TYPE': 'bahdanau',

    'COVERAGE_WEIGHT': 1.0,
}

In [4]:
df_val = pd.read_csv("/kaggle/input/nepali-summarization-set-cleaned/summarization_set_cleaned_val.csv")

In [5]:
@tf.keras.utils.register_keras_serializable()
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units, **kwargs):
        super(BahdanauAttention, self).__init__(**kwargs)
        self.units = units

    # def build(self, input_shape):
        self.W = tf.keras.layers.Dense(self.units, use_bias=False)
        self.U = tf.keras.layers.Dense(self.units, use_bias=False)
        self.Wc = tf.keras.layers.Dense(self.units)
        self.V = tf.keras.layers.Dense(1, use_bias=False)

    def build(self, input_shape):
        encoder_outputs, decoder_hidden_state = input_shape
        self.W.build((decoder_hidden_state[0], 1, decoder_hidden_state[1]))
        self.U.build(encoder_outputs)
        self.Wc.build((encoder_outputs[0], encoder_outputs[1], 1, 1))
        self.V.build(encoder_outputs)

    def compute_output_shape(self, input_shape):
        encoder_outputs, decoder_hidden_state = input_shape
        return decoder_hidden_state, decoder_hidden_state, (decoder_hidden_state[0], decoder_hidden_state[1], 1, 1)
    
    def call(self, inputs):
        encoder_outputs, encoder_mask, decoder_hidden_state, coverage_vector = inputs  # Select the output for the current time step

        score = tf.reduce_sum(self.V(tf.nn.tanh(self.W(tf.expand_dims(tf.expand_dims(decoder_hidden_state, 1), 1)) + self.U(tf.expand_dims(encoder_outputs, 2)) + self.Wc(coverage_vector))), (2, 3))
        attention_weights = tf.nn.softmax(score, axis=1)

        # attention_weights = tf.squeeze(attention_weights, 2) * encoder_mask
        attention_weights = attention_weights * encoder_mask

        for_renorm = tf.reduce_sum(attention_weights, 1)

        attention_weights = attention_weights / tf.reshape(for_renorm, (-1, 1))

        coverage_vector += tf.reshape(attention_weights, [tf.shape(encoder_outputs)[0], -1, 1, 1])

        # Calculate the context vector
        context_vector = tf.reduce_sum(tf.reshape(attention_weights, [tf.shape(encoder_outputs)[0], -1, 1, 1]) * tf.expand_dims(encoder_outputs, 2), (1, 2))
        # print(context_vector, "BEF")
        # context_vector = tf.reshape(context_vector, [-1, tf.shape(encoder_outputs)[-1]])
        # print(context_vector, "AFT")
        return context_vector, attention_weights, coverage_vector
    
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'units': self.units
        })
        return config

In [6]:
@tf.keras.utils.register_keras_serializable()
class Encoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, dropout_rate, **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.dropout_rate = dropout_rate

        self.encoder_embedding = Embedding(self.vocab_size, self.embedding_dim, mask_zero=True, name='News_Embedding')
        self.encoder_lstm = Bidirectional(LSTM(self.hidden_dim, return_sequences=True, return_state=True, dropout=self.dropout_rate), name='Encoder_BiLSTM')

    def build(self, input_shape):
        self.encoder_embedding.build(input_shape)
        lstm_input = self.encoder_embedding.compute_output_shape(input_shape)
        self.encoder_lstm.build(lstm_input)

    def compute_output_shape(self, input_shape):
        lstm_input = self.encoder_embedding.compute_output_shape(input_shape)
        out, hf, cf, hb, cb = self.encoder_lstm.compute_output_shape(lstm_input)
        return out, out, (hf[0], hf[1]+hb[1]), (cf[0], cf[1]+cb[1]) 
    
    def call(self, encoder_input, training=False):
        encoder_embedding = self.encoder_embedding(encoder_input)

        encoder_mask = tf.cast(self.encoder_embedding.compute_mask(encoder_input), tf.float32)
        
        encoder_output, state_h_fwd, state_c_fwd, state_h_bwd, state_c_bwd = self.encoder_lstm(encoder_embedding, training=training)
        
        state_h = tf.concat([state_h_fwd, state_h_bwd], -1)
        state_c = tf.concat([state_c_fwd, state_c_bwd], -1)
        
        return encoder_output, encoder_mask, state_h, state_c
    
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'vocab_size': self.vocab_size,
            'embedding_dim': self.embedding_dim,
            'hidden_dim': self.hidden_dim,
            'dropout_rate': self.dropout_rate,
        })
        return config

In [7]:
@tf.keras.utils.register_keras_serializable()
class Decoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, dropout_rate, **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.dropout_rate = dropout_rate

        self.decoder_embedding = Embedding(self.vocab_size, self.embedding_dim, mask_zero=True, name='Title_Embedding')
        self.decoder_lstm = LSTM(self.hidden_dim, return_sequences=True, return_state=True, dropout=self.dropout_rate, name='Decoder_LSTM')
        self.bahdanau_attention = BahdanauAttention(units=self.hidden_dim, name="Bahdanau_Attention")  
        self.decoder_dense = Dense(self.vocab_size, activation = 'softmax', name="Softmax_Layer")

    def build(self, input_shape):
        decoder_input, encoder_outputs = input_shape
        self.decoder_embedding.build(decoder_input)
        lstm_input = self.decoder_embedding.compute_output_shape(decoder_input)
        self.decoder_lstm.build(lstm_input)
        lstm_output, h, c = self.decoder_lstm.compute_output_shape(lstm_input)
        self.bahdanau_attention.build((encoder_outputs, h))
        cxt, attn, cvg = self.bahdanau_attention.compute_output_shape((encoder_outputs, h))
        self.decoder_dense.build((cxt[0], cxt[1]+h[1]))

    def compute_output_shape(self, input_shape):
        decoder_input, encoder_outputs = input_shape
        lstm_input = self.decoder_embedding.compute_output_shape(decoder_input)
        lstm_output, h, c = self.decoder_lstm.compute_output_shape(lstm_input)
        cxt, attn, cvg = self.bahdanau_attention.compute_output_shape((encoder_outputs, h))
        dist = self.decoder_dense.compute_output_shape((cxt[0], cxt[1]+h[1]))
        return dist, h, c, attn, cvg
    
    def call(self, decoder_input, encoder_output, encoder_mask, previous_states, coverage_vector, training=False):
        
        decoder_embedding = self.decoder_embedding(decoder_input)

        decoder_output, state_h, state_c, = self.decoder_lstm(decoder_embedding, initial_state=previous_states, training=training)
        context_vector, attention_weights, coverage_vector = self.bahdanau_attention([encoder_output, encoder_mask, state_h, coverage_vector])
        final_decoder_output = self.decoder_dense(tf.concat([context_vector, state_h], -1))
        
        return final_decoder_output, state_h, state_c, attention_weights, coverage_vector

    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'vocab_size': self.vocab_size,
            'embedding_dim': self.embedding_dim,
            'hidden_dim': self.hidden_dim,
            'dropout_rate': self.dropout_rate,

        })
        return config

In [8]:
@tf.keras.utils.register_keras_serializable()
def sparse_categorical_and_coverage_loss(targets, output_dist, attn_wts, coverage, coverage_weight=1):
    scce_loss = tf.keras.losses.SparseCategoricalCrossentropy(reduction=None)
    step_scce_loss = scce_loss(targets, output_dist)
    step_cov_loss = tf.reduce_sum(tf.minimum(attn_wts, coverage), 1)

    return tf.expand_dims(step_scce_loss, 1), tf.expand_dims(step_cov_loss, 1)

In [9]:
@tf.keras.utils.register_keras_serializable()
class AttentiveSeq2Seq(tf.keras.Model):
    def __init__(self, encoder, decoder, sos_token_id, teacher_forcing_ratio=0.5, coverage_weight=1.0, **kwargs):
        super(AttentiveSeq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        # self.loss_fn = loss
        self.sos_token_id = sos_token_id
        self.teacher_forcing_ratio = teacher_forcing_ratio
        self.coverage_weight = coverage_weight

        self.loss_tracker = tf.keras.metrics.Mean(name="loss")
        self.coverage_tracker = tf.keras.metrics.Mean(name="coverage_loss")
        self.val_loss_tracker = tf.keras.metrics.Mean(name="loss")
        self.val_coverage_tracker = tf.keras.metrics.Mean(name="coverage_loss")

    def build(self, input_shape):
        encoder_input, decoder_input = input_shape
        self.encoder.build(encoder_input)
        encoder_outputs = self.encoder.compute_output_shape(encoder_input)
        self.decoder.build((decoder_input, encoder_outputs[0]))

    def compute_output_shape(self, input_shape):
        encoder_input, decoder_input = input_shape
        encoder_outputs = self.encoder.compute_output_shape(encoder_input)
        decoder_outputs = self.decoder.compute_output_shape((decoder_input, encoder_outputs))
        return encoder_outputs, decoder_outputs
    
    def call(self):
        """
        Decoder Inputs: <sos>.....
        Target: .....<eos>
        """

        return self.encoder, self.decoder

    @tf.function
    def train_step(self, inputs):

        encoder_inputs, targets = inputs
        loss = 0.
        cov_loss = 0.
        
        with tf.GradientTape() as tape:
            batch_loss = None
            batch_cov_loss = None
            target_mask = tf.cast(tf.math.not_equal(targets, 0), tf.float32)
            
            coverage_vector_next_t = tf.zeros([tf.shape(encoder_inputs)[0], tf.shape(encoder_inputs)[1], 1, 1], dtype=tf.float32)
            decoder_input = tf.fill([tf.shape(encoder_inputs)[0],], self.sos_token_id)

            encoder_outputs, encoder_mask, hidden, cell = self.encoder(encoder_inputs, training=True)
            
            for t in range(targets.shape[1]):
                final_dist, hidden, cell, attn_weights, coverage_vector = self.decoder(tf.expand_dims(decoder_input, 1), encoder_outputs, encoder_mask, [hidden, cell], coverage_vector_next_t, training=True)
                decoder_input = targets[:, t] if random.random() < self.teacher_forcing_ratio else tf.argmax(final_dist, 1)  

                step_loss, step_cov_loss = self.loss(targets[:,t], final_dist, attn_weights, tf.squeeze(tf.squeeze(coverage_vector_next_t, 3), 2), self.coverage_weight)

                if batch_loss is None:
                    batch_loss = step_loss
                    batch_cov_loss = step_cov_loss
                else:
                    batch_loss =tf.concat([batch_loss, step_loss], 1)
                    batch_cov_loss =tf.concat([batch_cov_loss, step_cov_loss], 1)
        
                coverage_vector_next_t = coverage_vector                

            loss = tf.reduce_mean(tf.reduce_sum(batch_loss*target_mask, 1) / tf.reduce_sum(target_mask, 1))
            cov_loss = tf.reduce_mean(tf.reduce_sum(batch_cov_loss*target_mask, 1) / tf.reduce_sum(target_mask, 1))
    
            loss = loss + self.coverage_weight * cov_loss

        variables = self.encoder.trainable_variables + self.decoder.trainable_variables
        gradients = tape.gradient(loss, variables)
        self.optimizer.apply_gradients(zip(gradients, variables))

        self.loss_tracker.update_state(loss)
        self.coverage_tracker.update_state(cov_loss)

        return {'loss': self.loss_tracker.result(), 'coverage_loss': self.coverage_tracker.result()}    

    @tf.function
    def test_step(self, inputs):
        encoder_inputs, targets = inputs

        batch_loss = None
        batch_cov_loss = None
        
        target_mask = tf.cast(tf.math.not_equal(targets, 0), tf.float32)
        
        coverage_vector_next_t = tf.zeros([tf.shape(encoder_inputs)[0], tf.shape(encoder_inputs)[1], 1, 1], dtype=tf.float32)
        decoder_input = tf.fill([tf.shape(encoder_inputs)[0],], self.sos_token_id)

        encoder_outputs, encoder_mask, hidden, cell = self.encoder(encoder_inputs, training=False)
        
        for t in range(targets.shape[1]):
            final_dist, hidden, cell, attn_weights, coverage_vector = self.decoder(tf.expand_dims(decoder_input, 1), encoder_outputs, encoder_mask, [hidden, cell], coverage_vector_next_t, training=False)
            decoder_input = tf.argmax(final_dist, 1)  

            step_loss, step_cov_loss = self.loss(targets[:,t], final_dist, attn_weights, tf.squeeze(tf.squeeze(coverage_vector_next_t, 3), 2), self.coverage_weight)

            if batch_loss is None:
                batch_loss = step_loss
                batch_cov_loss = step_cov_loss
            else:
                batch_loss =tf.concat([batch_loss, step_loss], 1)
                batch_cov_loss =tf.concat([batch_cov_loss, step_cov_loss], 1)
    
            coverage_vector_next_t = coverage_vector                
                    
        loss = tf.reduce_mean(tf.reduce_sum(batch_loss*target_mask, 1) / tf.reduce_sum(target_mask, 1))
        cov_loss = tf.reduce_mean(tf.reduce_sum(batch_cov_loss*target_mask, 1) / tf.reduce_sum(target_mask, 1))

        final_loss = loss + self.coverage_weight * cov_loss

        self.val_loss_tracker.update_state(final_loss)
        self.val_coverage_tracker.update_state(cov_loss)

        return {'loss': self.val_loss_tracker.result(), 'coverage_loss': self.val_coverage_tracker.result()}    
    
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'encoder': self.encoder,
            'decoder': self.decoder,
            'sos_token_id': self.sos_token_id,
            'teacher_forcing_ratio': self.teacher_forcing_ratio,
            'coverage_weight': self.coverage_weight,
            'loss_tracker': self.loss_tracker,
            'coverage_tracker': self.coverage_tracker,
            'val_loss_tracker': self.val_loss_tracker,
            'val_coverage_tracker': self.val_coverage_tracker
        })
        return config

In [10]:
tf.config.run_functions_eagerly(True)

In [11]:
encoder = Encoder(parameters['VOC_SIZE'], parameters['EMBEDDING_DIMENSION'], parameters['ENCODER_HIDDEN_DIM'], parameters['DROPOUT'])
decoder = Decoder(parameters['VOC_SIZE'], parameters['EMBEDDING_DIMENSION'], parameters['DECODER_HIDDEN_DIM'], parameters['DROPOUT'])    
model = AttentiveSeq2Seq(encoder, decoder,parameters['SOS_TOKEN_ID'])
model.build(((None, parameters['ENCODER_SEQUENCE_LENGTH']), (None, parameters['DECODER_SEQUENCE_LENGTH'])))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=parameters['LEARNING_RATE'], weight_decay=parameters['L2_REG']), loss=sparse_categorical_and_coverage_loss)

2025-05-05 20:07:44.488392: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [12]:
model.summary()

In [13]:
model.load_weights('/kaggle/input/headline-generator-outputs/seq2seq_best.weights.h5')

  saveable.load_own_variables(weights_store.get(inner_path))


In [14]:
def greedy_generate(model, news, encoder_seq_len, decoder_seq_len, tokenizer, sos_id, eos_id, pad_id):
    news_encoded = tokenizer.encode(news)
    
    if len(news_encoded) >= encoder_seq_len:
        news_encoded = news_encoded[:encoder_seq_len]
    else:
        news_encoded = news_encoded + [pad_id] * (encoder_seq_len - len(news_encoded))

    output_seq = []

    encoder_outputs, encoder_mask, hidden, cell = model.encoder(tf.convert_to_tensor([news_encoded]), training=False)

    decoder_input = tf.fill([1,], sos_id)
    coverage_vector = tf.zeros([1, encoder_seq_len,1, 1])
    
    for t in range(decoder_seq_len):
        final_dist, hidden, cell, _, coverage_vector = model.decoder(tf.expand_dims(decoder_input, 1), encoder_outputs, encoder_mask, [hidden, cell], coverage_vector, training=False)
        decoder_input = tf.argmax(final_dist, -1)
        if decoder_input[0] == eos_id:
            break
        output_seq += decoder_input.numpy().tolist()
    return tokenizer.decode(output_seq)

In [15]:
df_val['title'][10], greedy_generate(model, df_val['news'][10], 256, 12, sp, parameters['SOS_TOKEN_ID'], parameters['EOS_TOKEN_ID'], parameters['PAD_TOKEN_ID'])




('आफूले निष्पक्ष र पारदर्शी रूपमा काम गरेको धितोपत्र बोर्डका अध्यक्षको दाबी',
 'पुँजी बजारका सबै सूचक')

In [16]:
def beam_generate(model, news, encoder_seq_len, decoder_seq_len, tokenizer, sos_id, eos_id, pad_id, beam_width=3, length_penalty=0.6, coverage_penalty=0.2):
    news_encoded = tokenizer.encode(news)
    
    if len(news_encoded) >= encoder_seq_len:
        news_encoded = news_encoded[:encoder_seq_len]
    else:
        news_encoded = news_encoded + [pad_id] * (encoder_seq_len - len(news_encoded))

    output_seq = []

    encoder_outputs, encoder_mask, hidden, cell = model.encoder(tf.convert_to_tensor([news_encoded]), training=False)

    decoder_input = tf.fill([1,], sos_id)
    coverage_vector = tf.zeros([1, encoder_seq_len,1,1])
    beams = [(0.0, decoder_input, hidden, cell, coverage_vector)]
    
    for t in range(decoder_seq_len):
        expanded_beam = []

        for log_prob_score, sequence, hid, cell, cov in beams:
            if sequence[-1] == eos_id:
                expanded_beam.append((log_prob_score, sequence, hid, cell, cov))
            final_dist, hidden, cell, _, coverage_vector = model.decoder(tf.expand_dims(sequence[-1:], 0), encoder_outputs, encoder_mask, [hid, cell], cov, training=False)
            log_prob_scores = tf.nn.log_softmax(final_dist[0])
            top_k_probs, top_k_idxs = tf.math.top_k(log_prob_scores, k=beam_width)

            for i in range(beam_width):
                new_seq = tf.concat([sequence, [top_k_idxs[i]]], axis=0)
                expanded_beam.append((log_prob_score+top_k_probs[i], new_seq, hidden, cell, coverage_vector))

        def normalized_score(candidate):
            score, seq, _, _, cov = candidate
            length = tf.cast(tf.shape(seq)[0], tf.float32)

            # coverage = tf.squeeze(cov, axis=[0, 2, 3]) 
            # cov_penalty = tf.reduce_sum(tf.math.log(tf.minimum(coverage, 1.0) + 1e-8))

            total_score = score / ((5.0 + length) ** length_penalty) / ((5.0 + 1.0) ** length_penalty)
            return total_score

        beams = sorted(expanded_beam, key=normalized_score, reverse=True)
        beams = beams[:beam_width]

        if all(seq[-1].numpy() == eos_id for _, seq, _, _, _ in beams):
            break

    return tokenizer.decode(beams[0][1][1:].numpy().tolist())

In [17]:
df_val['title'][21], beam_generate(model, df_val['news'][21], 256, 12, sp, parameters['SOS_TOKEN_ID'], parameters['EOS_TOKEN_ID'], parameters['PAD_TOKEN_ID'])


('भारतीय प्रसिद्ध गायक एसपी बालासुब्रमण्यमको निधन',
 'भारतका चर्चित कलाकार भारतीय निधन')

In [18]:
df_val = df_val.sample(n=1000)

In [19]:
ref_titles = []
pred_titles_greedy = []
pred_titles_beam3 = []
for news, title in zip(df_val['news'], df_val['title']):
    greedy_pred = greedy_generate(model, news, parameters['ENCODER_SEQUENCE_LENGTH'], parameters['DECODER_SEQUENCE_LENGTH'], sp, parameters['SOS_TOKEN_ID'], parameters['EOS_TOKEN_ID'], parameters['PAD_TOKEN_ID'])
    beam_pred = beam_generate(model, news, parameters['ENCODER_SEQUENCE_LENGTH'], parameters['DECODER_SEQUENCE_LENGTH'], sp, parameters['SOS_TOKEN_ID'], parameters['EOS_TOKEN_ID'], parameters['PAD_TOKEN_ID'])
    ref_titles.append(title)
    pred_titles_greedy.append(greedy_pred)
    pred_titles_beam3.append(beam_pred)

In [20]:
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import SmoothingFunction
smoothie = SmoothingFunction().method4

def compute_bleu_metric(reference: list, predicted: list) -> float:
    '''
    Computes the BLEU metric given a set of refraence and predicted sequences of words
    
    :param list(str) refrence: List of refrence sequences. Eg: ["Hi I am jane doe", "i am from italy"]    
    :param list(str) predicted: List of predicted sequences. Eg: ["i jane doe", "i was born and raised in sicily"]
    :return: The BLEU score for the predicted sequences.
    '''
    return corpus_bleu([[value.split()] for value in reference], [value.split() for value in predicted], smoothing_function=smoothie, weights=(0.25, 0.25, 0.25, 0.25))

In [21]:
print(f"Greedy BLEU Score: {compute_bleu_metric(ref_titles, pred_titles_greedy):.4f}")

Greedy BLEU Score: 0.0317


In [22]:
print(f"Beam BLEU Score: {compute_bleu_metric(ref_titles, pred_titles_beam3):.4f}")

Beam BLEU Score: 0.0260


In [23]:
!pip install rouge --quiet

In [24]:
from rouge import Rouge

def compute_rouge_metric(reference: list, predicted: list) -> dict:
    '''
    Computes Rogue-1, Rouge-2 and Rouge-L metric given a set of refraence and predicted sequences of words
    
    :param list(str) refrence: List of refrence sequences. Eg: ["Hi I am jane doe", "i am from italy"]    
    :param list(str) predicted: List of predicted sequences. Eg: ["i jane doe", "i was born and raised in sicily"]
    :return: The rouge-1,2,L scores for the predicted sequences.
    '''
    rouge = Rouge()
    scores = rouge.get_scores(predicted, reference, avg=True)
    return {
        "Rouge-1": scores['rouge-1']['f'],
        "Rouge-2": scores['rouge-2']['f'],
        "Rouge-L": scores['rouge-l']['f']
    }

In [25]:
compute_rouge_metric(ref_titles, pred_titles_greedy)

{'Rouge-1': 0.19154094099732077,
 'Rouge-2': 0.04900627078774634,
 'Rouge-L': 0.1867987962139995}

In [26]:
print(f"Greedy Rouge Score:") 
compute_rouge_metric(ref_titles, pred_titles_greedy)

Greedy Rouge Score:


{'Rouge-1': 0.19154094099732077,
 'Rouge-2': 0.04900627078774634,
 'Rouge-L': 0.1867987962139995}

In [27]:
print(f"Beam Rouge Score:") 
compute_rouge_metric(ref_titles, pred_titles_beam3)

Beam Rouge Score:


{'Rouge-1': 0.18098599260407947,
 'Rouge-2': 0.04581363570384322,
 'Rouge-L': 0.17680485903176935}