In [None]:
!pip install rouge_score bertscore
!pip install evaluate data peft datasets

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from datasets import load_dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from peft import LoraConfig, get_peft_model
import os
from transformers.trainer_utils import get_last_checkpoint


from evaluate import load
import numpy as np

# Load the ROUGE and F1 metrics
rouge_metric = load('rouge')
f1_metric = load('f1')
# Load the SQuAD v2 dataset
squad_v2 = load_dataset("squad_v2")

# Load the tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Define LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)

# Preprocess and tokenize data
# Preprocessing Function
def preprocess_function(examples):
    # Use only the context as the input
    inputs = [f"context: {context}" for context in examples["context"]]

    # Concatenate question and answer as the target labels
    labels = [
        f"question: {question} answer: {answer['text'][0]}"
        for question, answer in zip(examples["question"], examples["answers"])
        if len(answer["text"]) > 0  # Ensure there is at least one answer
    ]

    # If there are no valid labels, pad the labels to ensure consistent length
    if len(labels) < len(inputs):
        labels.extend([""] * (len(inputs) - len(labels)))

    # Tokenize inputs
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")

    # Tokenize labels
    labels_tokenized = tokenizer(labels, max_length=128, truncation=True, padding="max_length")

    # Add the labels to the model inputs
    model_inputs["labels"] = labels_tokenized["input_ids"]

    return model_inputs

# Preprocess the dataset
tokenized_squad = squad_v2.map(preprocess_function, batched=True)


# Select a subset of the training and validation data
small_train_dataset = tokenized_squad["train"]
small_validation_dataset = tokenized_squad["validation"].select(range(100))

training_args = TrainingArguments(
    output_dir='/content/drive/MyDrive/latest1',  # Directory to save checkpoints
    evaluation_strategy="steps",    # Evaluate every few steps
    save_steps=500,                 # Save a checkpoint every 500 steps
    save_total_limit=2,             # Only keep the last 2 checkpoints to save space
    logging_dir='./logs',           # Directory for logging
    logging_steps=100,              # Log every 100 steps
    num_train_epochs=1,             # Number of training epochs
    per_device_train_batch_size=4,  # Batch size for training
    per_device_eval_batch_size=4,   # Batch size for evaluation
    resume_from_checkpoint=True,    # Auto-resume from last checkpoint if available
)

from transformers import TrainerCallback

class SaveTokenizerCallback(TrainerCallback):
    def __init__(self, tokenizer, save_steps, output_dir):
        self.tokenizer = tokenizer
        self.save_steps = save_steps
        self.output_dir = output_dir

    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % self.save_steps == 0:
            self.tokenizer.save_pretrained(self.output_dir)
            print(f"Tokenizer saved to {self.output_dir} at step {state.global_step}")
        return control

# Initialize the custom callback
tokenizer_save_callback = SaveTokenizerCallback(
    tokenizer=tokenizer,
    save_steps=500,  # Save tokenizer every 500 steps
    output_dir=training_args.output_dir
)

last_checkpoint = None
if os.path.exists(training_args.output_dir):
    last_checkpoint = get_last_checkpoint(training_args.output_dir)
    if last_checkpoint is not None:
        print(f"Resuming from checkpoint: {last_checkpoint}")

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_validation_dataset,
    data_collator=data_collator,
    callbacks=[tokenizer_save_callback]

)

# Start training, resuming from last checkpoint if available
if last_checkpoint:
    print(f"Resuming training from checkpoint: {last_checkpoint}")
    trainer.train(resume_from_checkpoint='/content/drive/MyDrive/latest1/checkpoint-31000')
else:
    trainer.train()




KeyboardInterrupt: 

In [None]:
# prompt: save the model

trainer.save_model("/content/drive/MyDrive/latest1/model")
tokenizer.save_pretrained("/content/drive/MyDrive/latest1/tokenizer")

In [None]:
from peft import PeftModel, PeftConfig
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load the tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')

model = PeftModel.from_pretrained(model, "/content/drive/MyDrive/latest1/model")
model = model.merge_and_unload()


## **`Testing the model`**

In [None]:
!pip install transformers



In [None]:
from transformers import pipeline

# Initialize the summarization pipeline
summarizer = pipeline("summarization", model="Falconsai/text_summarization")

# Example texts for summarization
text1 = """A lawyer for several of Sean “Diddy” Combs’ accusers revealed “graphic” and “gruesome” details about an alleged rape committed by the rapper amid his ongoing legal woes.
Ariel Mitchell-Kidd detailed her client’s alleged attack during an appearance on the “Banfield” podcast on Monday, saying she plans to file a new lawsuit against the jailed music mogul “sometime this week.”
“My client was raped by Mr. Combs, his bodyguard and a friend who invited my client to his home to set up this whole situation,” Mitchell-Kidd alleged.
The lawyer alleged that Combs threatened her client “with a knife” and made “her take off her clothes” before dousing her with an oil-like substance, which she claimed may have been infused with a date rape drug like “GHB.”
“And then he takes what she believes is some type of liquid substance out of a bag, out of a fanny pack, to be specific, and he squirts it at her,” she alleged. “And she originally thought it was like acid or something, but then she realised that it was some type of lubricant or oil."""

text2 = """Once the woman was allegedly “covered in this oil, that’s when the attack began with Mr. Combs and then the friend,” according to Mitchell-Kidd.
“In between the two of them, she was squirted some more with this oil, and then the bodyguard then attacks her and raped her as well,” the lawyer claimed.
As the alleged attack went on, the woman’s “body got more and more limp;” however, she “couldn’t figure out what was causing it.”
“It wasn’t as if she was forced any drugs,” the lawyer claimed. “She said she had a cup of water that she took a sip of, and she knew immediately it wasn’t just water, but she only took a sip, and she felt that whatever the liquid was being squirted on her had something in it which essentially debilitated her and her faculties.”
Mitchell-Kidd told guest host Brian Entin that oil can be used “topically” as a “conduit for specific drugs, like, for instance, GHB.”
“So, it seems to me that there were some types of drugs mixed into the oil, which is why he was dousing her in that oil prior, not only to make it easier to assault her, but that was what was lowering her defences,” she alleged."""

text3 = """Mitchell-Kidd claimed her client was eventually able to make a “harrowing escape” and “get to safety after the gruesome attack.”
Reps for Combs did not immediately respond to Page Six’s request for comment. However, in an Oct. 1 statement, they noted it was impossible “to address every meritless allegation” levied against the rapper.
The “I’ll Be Missing You” rapper was arrested last month and charged with sex trafficking, racketeering and prostitution.
He was taken into custody six months after feds raided his homes in Los Angeles and Miami, where they found firearms and over 1,000 bottles of lubricant.
In the last month, over 120 people have come forward with accusations of sexual assault and abuse dating back 30 years.
Combs pleaded not guilty to all charges and is being held in a Brooklyn jail."""

# Summarize each text separately
summary1 = summarizer(text1, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
summary2 = summarizer(text2, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
summary3 = summarizer(text3, max_length=130, min_length=30, do_sample=False)[0]['summary_text']

# Combine summaries
final_summary = f"{summary1} {summary2} {summary3}"

# Print final summary
print("Final Summary:", final_summary)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.49k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Final Summary: Ariel Mitchell-Kidd detailed her client’s alleged rape during an appearance on the “Banfield” podcast on Monday . She said she plans to file a new lawsuit against the jailed music mogul “sometime this week” Mitchell-Kidd told guest host Brian Entin that oil can be used “topically” as a “conduit for specific drugs, like, for instance, GHB” . Combs was arrested last month and charged with sex trafficking, racketeering and prostitution . He was taken into custody six months after feds raided his homes in Los Angeles and Miami . In the last month, over 120 people have come forward with accusations dating back 30 years .


In [None]:
!pip install git+https://github.com/boudinfl/pke.git

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, DataCollatorForSeq2Seq

# Load the fine-tuned model and tokenizer

model = T5ForConditionalGeneration.from_pretrained('/content/drive/MyDrive/latest/checkpoint-12500/model')
tokenizer = T5Tokenizer.from_pretrained('/content/drive/MyDrive/latest/checkpoint-12500/tokenizer')
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

def generate_qa_pairs(context, num_questions=3):
    # Prepare the input for the model
    input_text = f"context: {context}"
    input_ids = tokenizer(input_text, return_tensors='pt', max_length=512, truncation=True).input_ids

    # Generate questions
    question_outputs = model.generate(
        input_ids,
        max_length=50,
        num_return_sequences=num_questions,   # Generate multiple sequences
        do_sample=True,                      # Enable sampling to introduce diversity
        top_k=50,                            # Limit to top 50 candidates
        top_p=0.9,                           # Nucleus sampling to take tokens with cumulative prob of 0.9
        temperature=1.0                      # Adjust temperature for randomness
    )
    questions = tokenizer.batch_decode(question_outputs, skip_special_tokens=True)

    # Prepare to generate answers for each question
    qa_pairs = []
    for question in questions:
        answer_input = f"question: {question} context: {context}"
        answer_ids = tokenizer(answer_input, return_tensors='pt', max_length=512, truncation=True).input_ids

        # Generate answers
        answer_output = model.generate(answer_ids, max_length=50)
        answer = tokenizer.decode(answer_output[0], skip_special_tokens=True)

        qa_pairs.append((question, answer))

    return qa_pairs

# Example context
context = """The International Institute of Information Technology Hyderabad (IIITH) has introduced a new online course titled 'AI for Medical Professionals,' in collaboration with the National Academy of Medical Sciences (NAMS) and IHub-Data. NAMS, an autonomous organization under the Ministry of Health & Family Welfare, Government of India, joined forces with IIITH to design this 12-week orientation program aimed at equipping medical professionals with essential AI skills."""
# Generate question and answer pairs

qa_pairs = generate_qa_pairs(context, num_questions=5)
print(qa_pairs)
for q, a in qa_pairs:
    print(f"{q}")

# Print the generated question and answer pairs



Loading adapter weights from /content/drive/MyDrive/latest/checkpoint-12500/model led to unexpected keys not found in the model:  ['base_model.decoder.block.0.layer.0.SelfAttention.q.lora_A.default.default.weight', 'base_model.decoder.block.0.layer.0.SelfAttention.q.lora_A.default.weight', 'base_model.decoder.block.0.layer.0.SelfAttention.q.lora_B.default.default.weight', 'base_model.decoder.block.0.layer.0.SelfAttention.q.lora_B.default.weight', 'base_model.decoder.block.0.layer.0.SelfAttention.v.lora_A.default.default.weight', 'base_model.decoder.block.0.layer.0.SelfAttention.v.lora_A.default.weight', 'base_model.decoder.block.0.layer.0.SelfAttention.v.lora_B.default.default.weight', 'base_model.decoder.block.0.layer.0.SelfAttention.v.lora_B.default.weight', 'base_model.decoder.block.0.layer.1.EncDecAttention.q.lora_A.default.default.weight', 'base_model.decoder.block.0.layer.1.EncDecAttention.q.lora_A.default.weight', 'base_model.decoder.block.0.layer.1.EncDecAttention.q.lora_B.defa

[('AI for Medical Professionals', "International Institute of Information Technology Hyderabad (IIITH) has introduced a new online course titled 'AI for Medical Professionals,' in collaboration with the National Academy of Medical Sciences (NAMS) and IHub-Data"), ("'AI for Medical Professionals'", 'online course'), ('the National Academy of Medical Sciences (NAMS) and IHub-Data', "The International Institute of Information Technology Hyderabad (IIITH) has introduced a new online course titled 'AI for Medical Professionals,' in collaboration with the National Academy of Medical Sciences"), ("AI for Medical Professionals,' in collaboration with the National Academy of Medical Sciences (NAMS) and IHub-Data", "The International Institute of Information Technology Hyderabad (IIITH) has introduced a new online course titled 'AI for Medical Professionals,' in collaboration with the National Academy of Medical Sciences"), ('National Academy of Medical Sciences (NAMS) and IHub-Data', "Internati

# **`Evaluation(Blue Score)`**

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import load_dataset
from evaluate import load
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

# Load the SQuAD v2 dataset
squad_v2 = load_dataset("squad_v2")

# Load BLEU metric
smooth = SmoothingFunction().method1

model = T5ForConditionalGeneration.from_pretrained('/content/drive/MyDrive/latest1/model')
tokenizer = T5Tokenizer.from_pretrained('/content/drive/MyDrive/latest1/tokenizer')

# Function to generate questions and answers
def generate_question_answer(context):
    input_text = f"context: {context}"
    input_ids = tokenizer(input_text, return_tensors='pt', max_length=512, truncation=True).input_ids

    # Generate question
    question_output = model.generate(
        input_ids,
        max_length=50,
        num_return_sequences=1,   # Generate multiple sequences
        do_sample=True,                      # Enable sampling to introduce diversity
        top_k=50,                            # Limit to top 50 candidates
        top_p=0.9,                           # Nucleus sampling to take tokens with cumulative prob of 0.9
        temperature=1.0                      # Adjust temperature for randomness
    )
    question = tokenizer.decode(question_output[0], skip_special_tokens=True)

    # Generate answer
    answer_input = f"question: {question} context: {context}"
    answer_ids = tokenizer(answer_input, return_tensors='pt', max_length=512, truncation=True).input_ids
    answer_output = model.generate(answer_ids, max_length=50)
    answer = tokenizer.decode(answer_output[0], skip_special_tokens=True)

    return question, answer

# Calculate BLEU score
def calculate_bleu(predictions, references):
    bleu_scores = []
    for pred, ref in zip(predictions, references):
        pred_tokens = pred.split()
        ref_tokens = [ref.split()]
        bleu_score = sentence_bleu(ref_tokens, pred_tokens, smoothing_function=smooth)
        bleu_scores.append(bleu_score)
    return sum(bleu_scores) / len(bleu_scores)

# Select 10 samples from the dataset
def evaluate_bleu_on_samples(squad_data):
    generated_pairs = []
    ground_truth_pairs = []

    for example in squad_data['validation'].select(range(10)):
        context = example['context']

        # Generate question and answer
        generated_question, generated_answer = generate_question_answer(context)
        generated_pairs.append(f"Q: {generated_question} A: {generated_answer}")

        # Collect ground truth question and answer
    for ground_truth_answer in example['answers']['text']:
        # Get the corresponding question
        ground_truth_question = example['question']

        # Append both the question and the answer in the desired format
        ground_truth_pairs.append(f"Q: {ground_truth_question}\nA: {ground_truth_answer}")

    return generated_pairs, ground_truth_pairs

# Generate and evaluate BLEU on 10 samples
generated_qas, ground_truth_qas = evaluate_bleu_on_samples(squad_v2)

# Calculate BLEU score for answers
for qa in generated_qas:
    print(qa)

for qa in ground_truth_qas:
    print(qa)
bleu_score = calculate_bleu(generated_qas, ground_truth_qas)
print(f"Average BLEU score for generated Q&A pairs: {bleu_score:.4f}")


Generated QA pairs:
Q:  A: question: What is the name of the Normans? answer: Normandy
Q: question: What was the word of the Roman-Gaulish population? answer: Frankish A: question: What was the name of the region in France? answer: Normandy
Q: question: How many people did it take to enter the Norman provinces? answer: seven million A: question: What was the name of the Normans? answer: Normandy
Q: question: What did the Normans do during the 10th and 11th centuries? answer: give their name to Normandy A: question: What was the name of the Normans? answer: Normandy
Q: question: Who was King Charles III's slave? answer: Rollo A: question: What was the name of the Normans? answer: Normandy
Q:  A: question: What was the name of the Normans? answer: Normandy
Q: question: The Normans are an ethnic group in the United States? answer: Normans A: question: What is the name of the Normans? answer: Normandy
Q: question: How did the Normans develop and become independent from Germany in the 8th c

## **``BLEU Score``**

In [None]:
# Function to generate questions and answers
!pip install bert_score
from evaluate import load
bertscore_metric = load("bertscore")
model = T5ForConditionalGeneration.from_pretrained('/content/drive/MyDrive/latest1/model')
tokenizer = T5Tokenizer.from_pretrained('/content/drive/MyDrive/latest1/tokenizer')
def generate_question_answer(context):
    input_text = f"context: {context}"
    input_ids = tokenizer(input_text, return_tensors='pt', max_length=512, truncation=True).input_ids

    # Generate question
    question_output = model.generate(
        input_ids,
        max_length=50,
        num_return_sequences=1,   # Generate multiple sequences
        do_sample=True,                      # Enable sampling to introduce diversity
        top_k=50,                            # Limit to top 50 candidates
        top_p=0.9,                           # Nucleus sampling to take tokens with cumulative prob of 0.9
        temperature=1.0                      # Adjust temperature for randomness
    )
    question = tokenizer.decode(question_output[0], skip_special_tokens=True)

    # Generate answer
    answer_input = f"question: {question} context: {context}"
    answer_ids = tokenizer(answer_input, return_tensors='pt', max_length=512, truncation=True).input_ids
    answer_output = model.generate(answer_ids, max_length=50)
    answer = tokenizer.decode(answer_output[0], skip_special_tokens=True)

    return question, answer

# Calculate BERTScore
def calculate_bertscore(predictions, references):
    results = bertscore_metric.compute(predictions=predictions, references=references, lang="en")
    avg_f1_score = sum(results['f1']) / len(results['f1'])
    return avg_f1_score
# Select 10 samples from the dataset
def evaluate_bleu_on_samples(squad_data):
    generated_pairs = []
    ground_truth_pairs = []

    for example in squad_data['validation'].select(range(10)):
        context = example['context']

        # Generate question and answer
        generated_question, generated_answer = generate_question_answer(context)
        generated_pairs.append(f"Q: {generated_question} A: {generated_answer}")

        # Collect ground truth question and one of the answers (first one for simplicity)
        ground_truth_question = example['question']
        if example['answers']['text']:
            ground_truth_answer = example['answers']['text'][0]  # Use the first ground truth answer
        else:
            ground_truth_answer = "No answer"  # Handle cases with no ground truth answers

        ground_truth_pairs.append(f"Q: {ground_truth_question} A: {ground_truth_answer}")

    return generated_pairs, ground_truth_pairs

# Generate and evaluate BLEU on 10 samples
generated_qas, ground_truth_qas = evaluate_bleu_on_samples(squad_v2)

# Now calculate BERTScore
bertscore_f1 = calculate_bertscore(generated_qas, ground_truth_qas)
print(f"Average BERT F1 Score for generated Q&A pairs: {bertscore_f1:.4f}")


# **`Rouge Score`**

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import load_dataset
from evaluate import load

# Load the SQuAD v2 dataset
squad_v2 = load_dataset("squad_v2")

# Load ROUGE metric
rouge_metric = load('rouge')

# Initialize model and tokenizer
tokenizer = T5Tokenizer.from_pretrained('/content/drive/MyDrive/latest1/tokenizer')
model = T5ForConditionalGeneration.from_pretrained('/content/drive/MyDrive/latest1/model')

# Function to extract ground truth questions, answers, and context
def extract_ground_truth(dataset, num_samples=10):
    ground_truths = []
    questions = []
    contexts = []
    for example in dataset['validation'].select(range(num_samples)):
        questions.append(example['question'])
        contexts.append(example['context'])
        if example['answers']['text']:
            ground_truths.append(example['answers']['text'][0])
        else:
            ground_truths.append("No answer")  # Handle cases with no ground truth answers
    return questions, ground_truths, contexts

# Extract questions, answers, and contexts from the first 10 samples
questions, ground_truth_answers, contexts = extract_ground_truth(squad_v2)

# Function to generate Q&A and evaluate using ROUGE
def generate_qa_and_evaluate(context, questions, ground_truth_answers):
    # Generate answers
    generated_answers = []
    for question in questions:
        answer_input = f"question: {question} context: {context}"
        answer_ids = tokenizer(answer_input, return_tensors='pt', max_length=512, truncation=True).input_ids
        answer_output = model.generate(answer_ids, max_length=50)
        answer = tokenizer.decode(answer_output[0], skip_special_tokens=True)
        generated_answers.append(answer.strip())

    # Filter out unanswerable questions
    valid_generated_answers = []
    valid_ground_truth_answers = []
    for generated_answer, ground_truth_answer in zip(generated_answers, ground_truth_answers):
        if ground_truth_answer != "No answer":  # Only consider answerable questions
            valid_generated_answers.append(generated_answer)
            valid_ground_truth_answers.append(ground_truth_answer)

    # Compute ROUGE scores only for answerable questions
    if valid_generated_answers and valid_ground_truth_answers:
        rouge_results = rouge_metric.compute(predictions=valid_generated_answers, references=valid_ground_truth_answers)
    else:
        rouge_results = {'rouge1': 0, 'rouge2': 0, 'rougeL': 0}  # No valid answers, no ROUGE score

    return rouge_results

# Calculate average ROUGE score for answerable questions only
def calculate_average_rouge(contexts, questions, ground_truth_answers):
    total_rouge1 = 0
    total_rouge2 = 0
    total_rougel = 0
    count = 0

    for i in range(len(contexts)):
        rouge_results = generate_qa_and_evaluate(contexts[i], questions[i:i + 1], ground_truth_answers[i:i + 1])

        # Only accumulate ROUGE if valid answers exist
        if rouge_results['rouge1'] > 0 or rouge_results['rouge2'] > 0 or rouge_results['rougeL'] > 0:
            count += 1
            total_rouge1 += rouge_results['rouge1']
            total_rouge2 += rouge_results['rouge2']
            total_rougel += rouge_results['rougeL']

    # Average ROUGE scores
    if count > 0:
        avg_rouge1 = total_rouge1 / count
        avg_rouge2 = total_rouge2 / count
        avg_rougel = total_rougel / count

        print(f"Average ROUGE-1 F1: {avg_rouge1:.4f}")
        print(f"Average ROUGE-2 F1: {avg_rouge2:.4f}")
        print(f"Average ROUGE-L F1: {avg_rougel:.4f}")
    else:
        print("No valid answers for evaluation.")

# Calculate average ROUGE for answerable questions
calculate_average_rouge(contexts, questions, ground_truth_answers)


Average ROUGE-1 F1: 0.2644
Average ROUGE-2 F1: 0.1429
Average ROUGE-L F1: 0.2644
