In [None]:
# Installation of necessary libraries
!pip install transformers
!pip install git+https://github.com/openai/CLIP.git

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-f5y1yp2y
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-f5y1yp2y
  Resolved https://github.com/openai/CLIP.git to commit a1d071733d7111c9c014f024669f959182114e33
  Preparing metadata (setup.py) ... [?25l[?25hdone
Mounted at /content/drive


In [None]:
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

class Downloader(object):
    def __init__(self, use_pydrive):
        self.use_pydrive = use_pydrive
        if self.use_pydrive:
            self.authenticate()

    def authenticate(self):
        auth.authenticate_user()
        gauth = GoogleAuth()
        gauth.credentials = GoogleCredentials.get_application_default()
        self.drive = GoogleDrive(gauth)

    def download_file(self, file_id, file_dst):
        if self.use_pydrive:
            downloaded = self.drive.CreateFile({'id': file_id})
            downloaded.FetchMetadata(fetch_all=True)
            downloaded.GetContentFile(file_dst)
        else:
            !gdown --id $file_id -O $file_dst

# Initialize downloader
download_with_pydrive = True
downloader = Downloader(download_with_pydrive)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random

# Define constants
MAX_LENGTH = 10  # Define the maximum length of the input/output sequences
SOS_token = 0    # Start-of-sequence token
EOS_token = 1    # End-of-sequence token

# Define your encoder class
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

# Define your decoder class
class Decoder(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

# Function to prune the encoder model using Random Pruning
def prune_encoder(encoder, sparsity=0.9):
    # Get all parameters
    all_params = [param for name, param in encoder.named_parameters() if 'weight' in name]
    # Calculate the number of parameters to prune
    num_params_to_prune = int(sparsity * sum(param.numel() for param in all_params))
    # Randomly select parameters to prune
    for param in all_params:
        num_params_in_layer = param.numel()
        num_params_to_keep = num_params_in_layer - num_params_to_prune
        if num_params_to_keep > 0:
            indices_to_keep = random.sample(range(num_params_in_layer), num_params_to_keep)
            mask = torch.zeros_like(param)
            mask.view(-1)[indices_to_keep] = 1
            param.data *= mask

# Define the training function
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
    encoder_hidden = torch.zeros(1, 1, encoder.hidden_size)

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)

    decoder_input = torch.tensor([[SOS_token]])

    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[di])
        if decoder_input.item() == EOS_token:
            break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

# Define other hyperparameters
hidden_size = 256
input_size = 10  # Example value, replace with your actual input size
output_size = 10  # Example value, replace with your actual output size

# Initialize your encoder and decoder
encoder = Encoder(input_size, hidden_size)
decoder = Decoder(hidden_size, output_size)

# Prune the encoder using Random Pruning
prune_encoder(encoder)

# Define your loss function and optimizer
criterion = nn.NLLLoss()
encoder_optimizer = optim.SGD(encoder.parameters(), lr=0.01)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=0.01)


In [None]:

# Main training loop
# trainIters(encoder, decoder, n_iters, print_every=1000)

import os

# Function to download the file
def download_file(file_id, destination_path):
    # Ensure that the directory for the destination file exists
    os.makedirs(os.path.dirname(destination_path), exist_ok=True)
    !gdown --id $file_id -O "$destination_path"

In [None]:


#@title Imports

import clip
import os
from torch import nn
import numpy as np
import torch
import torch.nn.functional as nnf
import sys
from typing import Tuple, List, Union, Optional
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm, trange
from google.colab import files
import skimage.io as io
import PIL.Image
from IPython.display import Image


N = type(None)
V = np.array
ARRAY = np.ndarray
ARRAYS = Union[Tuple[ARRAY, ...], List[ARRAY]]
VS = Union[Tuple[V, ...], List[V]]
VN = Union[V, N]
VNS = Union[VS, N]
T = torch.Tensor
TS = Union[Tuple[T, ...], List[T]]
TN = Optional[T]
TNS = Union[Tuple[TN, ...], List[TN]]
TSN = Optional[TS]
TA = Union[T, ARRAY]


D = torch.device
CPU = torch.device('cpu')


def get_device(device_id: int) -> D:
    if not torch.cuda.is_available():
        return CPU
    device_id = min(torch.cuda.device_count() - 1, device_id)
    return torch.device(f'cuda:{device_id}')


CUDA = get_device

current_directory = os.getcwd()
save_path = os.path.join(os.path.dirname(current_directory), "pretrained_models")
os.makedirs(save_path, exist_ok=True)
model_path = os.path.join(save_path, 'model_wieghts.pt')


In [None]:

#@title Model

class MLP(nn.Module):

    def forward(self, x: T) -> T:
        return self.model(x)

    def __init__(self, sizes: Tuple[int, ...], bias=True, act=nn.Tanh):
        super(MLP, self).__init__()
        layers = []
        for i in range(len(sizes) -1):
            layers.append(nn.Linear(sizes[i], sizes[i + 1], bias=bias))
            if i < len(sizes) - 2:
                layers.append(act())
        self.model = nn.Sequential(*layers)


class ClipCaptionModel(nn.Module):

    #@functools.lru_cache #FIXME
    def get_dummy_token(self, batch_size: int, device: D) -> T:
        return torch.zeros(batch_size, self.prefix_length, dtype=torch.int64, device=device)

    def forward(self, tokens: T, prefix: T, mask: Optional[T] = None, labels: Optional[T] = None):
        embedding_text = self.gpt.transformer.wte(tokens)
        prefix_projections = self.clip_project(prefix).view(-1, self.prefix_length, self.gpt_embedding_size)
        #print(embedding_text.size()) #torch.Size([5, 67, 768])
        #print(prefix_projections.size()) #torch.Size([5, 1, 768])
        embedding_cat = torch.cat((prefix_projections, embedding_text), dim=1)
        if labels is not None:
            dummy_token = self.get_dummy_token(tokens.shape[0], tokens.device)
            labels = torch.cat((dummy_token, tokens), dim=1)
        out = self.gpt(inputs_embeds=embedding_cat, labels=labels, attention_mask=mask)
        return out

    def __init__(self, prefix_length: int, prefix_size: int = 512):
        super(ClipCaptionModel, self).__init__()
        self.prefix_length = prefix_length
        self.gpt = GPT2LMHeadModel.from_pretrained('gpt2')
        self.gpt_embedding_size = self.gpt.transformer.wte.weight.shape[1]
        if prefix_length > 10:  # not enough memory
            self.clip_project = nn.Linear(prefix_size, self.gpt_embedding_size * prefix_length)
        else:
            self.clip_project = MLP((prefix_size, (self.gpt_embedding_size * prefix_length) // 2, self.gpt_embedding_size * prefix_length))


class ClipCaptionPrefix(ClipCaptionModel):

    def parameters(self, recurse: bool = True):
        return self.clip_project.parameters()

    def train(self, mode: bool = True):
        super(ClipCaptionPrefix, self).train(mode)
        self.gpt.eval()
        return self

In [None]:
import torch
import torch.nn.functional as nnf
from tqdm import tqdm, trange

#@title Caption prediction

def generate_beam(model, tokenizer, beam_size: int = 5, prompt=None, embed=None,
                  entry_length=67, temperature=1., stop_token: str = '.'):

    model.eval()
    stop_token_index = tokenizer.encode(stop_token)[0]
    tokens = None
    scores = None
    device = next(model.parameters()).device
    seq_lengths = torch.ones(beam_size, device=device)
    is_stopped = torch.zeros(beam_size, device=device, dtype=torch.bool)
    with torch.no_grad():
        if embed is not None:
            generated = embed
        else:
            if tokens is None:
                tokens = torch.tensor(tokenizer.encode(prompt))
                tokens = tokens.unsqueeze(0).to(device)
                generated = model.gpt.transformer.wte(tokens)
        for i in range(entry_length):
            outputs = model.gpt(inputs_embeds=generated)
            logits = outputs.logits
            logits = logits[:, -1, :] / (temperature if temperature > 0 else 1.0)
            logits = logits.softmax(-1).log()
            if scores is None:
                scores, next_tokens = logits.topk(beam_size, -1)
                generated = generated.expand(beam_size, *generated.shape[1:])
                next_tokens, scores = next_tokens.permute(1, 0), scores.squeeze(0)
                if tokens is None:
                    tokens = next_tokens
                else:
                    tokens = tokens.expand(beam_size, *tokens.shape[1:])
                    tokens = torch.cat((tokens, next_tokens), dim=1)
            else:
                logits[is_stopped] = -float(np.inf)
                logits[is_stopped, 0] = 0
                scores_sum = scores[:, None] + logits
                seq_lengths[~is_stopped] += 1
                scores_sum_average = scores_sum / seq_lengths[:, None]
                scores_sum_average, next_tokens = scores_sum_average.view(-1).topk(beam_size, -1)
                next_tokens_source = next_tokens // scores_sum.shape[1]
                seq_lengths = seq_lengths[next_tokens_source]
                next_tokens = next_tokens % scores_sum.shape[1]
                next_tokens = next_tokens.unsqueeze(1)
                tokens = tokens[next_tokens_source]
                tokens = torch.cat((tokens, next_tokens), dim=1)
                generated = generated[next_tokens_source]
                scores = scores_sum_average * seq_lengths
                is_stopped = is_stopped[next_tokens_source]
            next_token_embed = model.gpt.transformer.wte(next_tokens.squeeze()).view(generated.shape[0], 1, -1)
            generated = torch.cat((generated, next_token_embed), dim=1)
            is_stopped = is_stopped + next_tokens.eq(stop_token_index).squeeze()
            if is_stopped.all():
                break
    scores = scores / seq_lengths
    output_list = tokens.cpu().numpy()
    output_texts = [tokenizer.decode(output[:int(length)]) for output, length in zip(output_list, seq_lengths)]
    order = scores.argsort(descending=True)
    output_texts = [output_texts[i] for i in order]
    return output_texts

def generate2(
        model,
        tokenizer,
        tokens=None,
        prompt=None,
        embed=None,
        entry_count=1,
        entry_length=67,  # maximum number of words
        top_p=0.8,
        temperature=1.,
        stop_token: str = '.',
):
    model.eval()
    generated_num = 0
    generated_list = []
    stop_token_index = tokenizer.encode(stop_token)[0]
    filter_value = -float("Inf")
    device = next(model.parameters()).device

    with torch.no_grad():
        for entry_idx in trange(entry_count):
            if embed is not None:
                generated = embed
            else:
                if tokens is None:
                    tokens = torch.tensor(tokenizer.encode(prompt))
                    tokens = tokens.unsqueeze(0).to(device)
                generated = model.gpt.transformer.wte(tokens)

            for i in range(entry_length):
                outputs = model.gpt(inputs_embeds=generated)
                logits = outputs.logits
                logits = logits[:, -1, :] / (temperature if temperature > 0 else 1.0)
                sorted_logits, sorted_indices = torch.sort(logits, descending=True)
                cumulative_probs = torch.cumsum(nnf.softmax(sorted_logits, dim=-1), dim=-1)
                sorted_indices_to_remove = cumulative_probs > top_p
                sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
                sorted_indices_to_remove[..., 0] = 0

                indices_to_remove = sorted_indices[sorted_indices_to_remove]
                logits[:, indices_to_remove] = filter_value
                next_token = torch.argmax(logits, -1).unsqueeze(0)
                next_token_embed = model.gpt.transformer.wte(next_token)
                tokens = torch.cat((tokens, next_token), dim=1)
                generated = torch.cat((generated, next_token_embed), dim=1)
                if stop_token_index == next_token.item():
                    break

            output_list = list(tokens.squeeze().cpu().numpy())
            output_text = tokenizer.decode(output_list)
            generated_list.append(output_text)

    return generated_list[0]


In [None]:


#@title Choose pretrained model - COCO or Conceptual captions

pretrained_model = 'COCO'  # @param ['COCO', 'Conceptual captions']

if pretrained_model == 'Conceptual captions':
  downloader.download_file("14pXWwB4Zm82rsDdvbGguLfx9F8aM7ovT", model_path)
else:
  downloader.download_file("1IdaBtMSvtyzF0ByVaBHtvM0JYSXRExRX", model_path)


In [None]:
#@title GPU/CPU

is_gpu = True #@param {type:"boolean"}

In [None]:
#@title CLIP model + GPT2 tokenizer

device = CUDA(0) if is_gpu else "cpu"
clip_model, preprocess = clip.load("ViT-B/32", device=device, jit=False)
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

prefix_length = 10

model = ClipCaptionModel(prefix_length)

state_dict = torch.load(model_path, map_location=device)
# Manually filter out unexpected keys
filtered_state_dict = {key: value for key, value in state_dict.items() if key in model.state_dict()}
# Load the filtered state dictionary
model.load_state_dict(filtered_state_dict, strict=False)



100%|███████████████████████████████████████| 338M/338M [00:05<00:00, 59.3MiB/s]
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

<All keys matched successfully>

In [None]:
import os
from google.colab import drive
from datetime import datetime
import torch
import PIL
import io
from skimage import io

drive.mount('/content/drive')

use_beam_search = True

# Path to the folder containing images in your Google Drive
folder_path = "/content/drive/MyDrive/NLP/Flicker8k_Dataset/Images1"

# Get the current timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Create a new output text file with timestamp in the filename
output_file = f"/content/drive/MyDrive/NLP/Flicker8k_Dataset/Clip_output_{timestamp}.txt"

# Initialize an empty list to store generated captions
captions = []

# Iterate over files in batches of 100
batch_size = 100
file_list = os.listdir(folder_path)
num_files = len(file_list)
for i in range(0, num_files, batch_size):
    batch_files = file_list[i:i+batch_size]

    for filename in batch_files:
        if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"):
            # Load and process the image
            image_path = os.path.join(folder_path, filename)
            image = io.imread(image_path)
            pil_image = PIL.Image.fromarray(image)

            # Preprocess the image
            image = preprocess(pil_image).unsqueeze(0).to(device)

            # Generate caption for the image
            with torch.no_grad():
                prefix = clip_model.encode_image(image).to(device, dtype=torch.float32)
                prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)

            if use_beam_search:
                generated_text_prefix = generate_beam(model, tokenizer, embed=prefix_embed)[0]
            else:
                generated_text_prefix = generate2(model, tokenizer, embed=prefix_embed)

            # Append the filename and corresponding caption to the list
            captions.append(f"{filename},{generated_text_prefix}\n")

            print(f"{filename},{generated_text_prefix}")

# Write the captions to the output text file
with open(output_file, "w") as f:
    f.writelines(captions)

# Print message indicating successful extraction
print("Captions extracted and saved to:", output_file)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
3175434849_859f09fe07.jpg,A group of teddy bears sitting on top of a carnival float.
2505988632_9541f15583.jpg,A young girl jumping into the air on a skateboard.
2662816021_ac474e0fde.jpg,A dog with a stick in its mouth.
3174228611_6cf9d2266b.jpg,A basketball player dribbles the ball during a game.
2054308369_f9c6ec7815.jpg,A little girl hugging a brown dog on top of a bed.
2502905671_c6039804ab.jpg,A man standing on top of a sandy beach next to a dog.
2340206885_58754a799a.jpg,A couple of dogs playing with each other in the snow.
2054869561_ff723e9eab.jpg,A dog jumping in the air to catch a frisbee.
3173215794_6bdd1f72d4.jpg,A group of people standing in front of a refrigerator.
2500826039_165e75b20c.jpg,A man kissing a woman on the forehead.
2062607137_dac194ad02.jpg,A couple of dogs standing in a grassy field.
3172369593_eb4d787ffb.jpg,A woman standing in 

In [None]:
!pip install nltk
!pip install rouge-score
!pip install pycocoevalcap

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=589d422492c544e41b1cd1706e27fe871847ef787ed734b325c5a0609fbb9bff
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2
Collecting pycocoevalcap
  Downloading pycocoevalcap-1.2-py3-none-any.whl (104.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.3/104.3 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pycocoevalcap
Successfully installed pycocoevalcap-1.2


In [None]:
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.meteor_score import meteor_score
from rouge_score import rouge_scorer
from pycocoevalcap.cider.cider import Cider
from nltk.tokenize import word_tokenize
import nltk

nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
# Function to read captions from a file
def read_captions_from_file(file_path):
    with open(file_path, 'r') as file:
        captions = [line.strip().split(',', 1)[1] for line in file]
    return captions

# Function to convert captions to dictionary format
def convert_to_dict(captions):
    caption_dict = {}
    for i, caption in enumerate(captions):
        img_id = f"img_{i}"  # Create a unique image ID for each caption
        caption_dict[img_id] = [caption]
    return caption_dict


reference_file =  "/content/drive/MyDrive/NLP/Flicker8k_Dataset/output_captions_20240424_032923.txt" # Replace this with the correct path

# Read generated captions from the output file
generated_captions = read_captions_from_file(output_file)

# Read ground truth captions from the reference file
reference_captions = read_captions_from_file(reference_file)

# Compute BLEU score
bleu_score = corpus_bleu([[caption] for caption in reference_captions], generated_captions)

# Tokenize the reference captions
tokenized_reference_captions = [word_tokenize(caption) for caption in reference_captions]

# Compute METEOR score
meteor_score_value = meteor_score(tokenized_reference_captions, generated_captions)

# Function to compute ROUGE scores
def compute_rouge_scores(reference_captions, generated_captions):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    rouge_scores = []
    for ref_caption, gen_caption in zip(reference_captions, generated_captions):
        # Convert list of tokens to strings
        ref_caption_str = ' '.join(ref_caption)
        gen_caption_str = ' '.join(gen_caption)
        scores = scorer.score(ref_caption_str, gen_caption_str)
        rouge_scores.append(scores)
    return rouge_scores

# Compute ROUGE scores
rouge_scores = compute_rouge_scores(tokenized_reference_captions, generated_captions)

# Convert reference and generated captions to dictionary format
reference_dict = convert_to_dict(reference_captions)
generated_dict = convert_to_dict(generated_captions)

# Compute CIDEr score
cider_scorer = Cider()
cider_score, _ = cider_scorer.compute_score(reference_dict, generated_dict)

# Print or log the evaluation results
print("Evaluation Metrics:")
print("BLEU Score:", bleu_score)
print("METEOR Score:", meteor_score_value)
print("CIDEr Score:", cider_score)


Evaluation Metrics:
BLEU Score: 0.18857111752511
METEOR Score: 0.0
CIDEr Score: 0.07000170858466405


In [None]:
from rouge_score import rouge_scorer

def compute_overall_rouge_scores(generated_captions, reference_captions):
    rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'])
    rouge_1_f1_total = 0.0
    rouge_2_f1_total = 0.0
    rouge_l_f1_total = 0.0
    num_captions = len(generated_captions)

    for i in range(num_captions):
        scores = rouge.score(generated_captions[i], reference_captions[i])
        rouge_1_f1_total += scores['rouge1'].fmeasure
        rouge_2_f1_total += scores['rouge2'].fmeasure
        rouge_l_f1_total += scores['rougeL'].fmeasure

    rouge_1_f1_avg = rouge_1_f1_total / num_captions
    rouge_2_f1_avg = rouge_2_f1_total / num_captions
    rouge_l_f1_avg = rouge_l_f1_total / num_captions

    return rouge_1_f1_avg, rouge_2_f1_avg, rouge_l_f1_avg

rouge_1_f1_avg, rouge_2_f1_avg, rouge_l_f1_avg = compute_overall_rouge_scores(generated_captions, reference_captions)

print("ROUGE-1 F1 Score:", rouge_1_f1_avg)
print("ROUGE-2 F1 Score:", rouge_2_f1_avg)
print("ROUGE-L F1 Score:", rouge_l_f1_avg)


ROUGE-1 F1 Score: 0.14788043665386547
ROUGE-2 F1 Score: 0.01264034304288713
ROUGE-L F1 Score: 0.13371012634629487
