In [None]:
!pip install evaluate rouge rouge_score

In [None]:
!pip install huggingface_hub

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
import torch,nltk,spacy,string,transformers,json,evaluate,warnings
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader, RandomSampler
from sklearn.model_selection import train_test_split
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, T5TokenizerFast
from nltk.translate.bleu_score import sentence_bleu
from sklearn.metrics import f1_score

warnings.filterwarnings("ignore")

In [None]:
# nltk.download('punkt')          # Tokenizer models
# nltk.download('wordnet')        # WordNet lexical database
# nltk.download('omw-1.4')        # Open Multilingual WordNet
# nltk.download('averaged_perceptron_tagger')  # POS tagger
# nltk.download('stopwords')      # Common stop words
# nltk.download('vader_lexicon')  # Sentiment analysis lexicon

In [None]:
TOKENIZER = T5TokenizerFast.from_pretrained("t5-small")
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
MODEL = T5ForConditionalGeneration.from_pretrained("t5-small", return_dict=True)
MODEL.to(DEVICE)
OPTIMIZER = Adam(MODEL.parameters(), lr=0.00001)
Q_LEN = 256   # Question Length
T_LEN = 32    # Target Length
BATCH_SIZE = 4
EPOCHS = 5
OUTPUT_DIR = '/kaggle/tmp/'
OUTPUT_MODEL_NAME = 'T5-QG-finetuned-squad'

In [None]:
# Loading the data

with open('/kaggle/input/squad-20/train-v2.0.json') as f:
    data = json.load(f)

In [None]:
def prepare_data(data):
    articles = []
    
    for article in data["data"]:
        for paragraph in article["paragraphs"]:
            for qa in paragraph["qas"]:
                # We use context as the input and question as the output (label)
                context = paragraph["context"]
                question = qa["question"]

                # If you want to generate questions from both context and answer
                if not qa["is_impossible"]:
                    answer = qa["answers"][0]["text"]
                else:
                    answer = ""

                inputs = {"context": context, "question": question, "answer": answer}
                articles.append(inputs)

    return articles

In [None]:
data = prepare_data(data)

# Create a Dataframe
data = pd.DataFrame(data)
data

In [None]:
# data = data.sample(n=1000, random_state=42)
# data = data.reset_index(drop=True)
data, test_data = train_test_split(data, test_size=0.1, random_state=42)

data = data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

print(f"Data size: {len(data)}")
print(f"Testing data size: {len(test_data)}")

In [None]:
class QA_Dataset(Dataset):
    def __init__(self, tokenizer, dataframe, q_len, t_len):
        self.tokenizer = tokenizer
        self.q_len = q_len
        self.t_len = t_len
        self.data = dataframe
        self.questions = self.data["question"]
        self.context = self.data["context"]
        
    def __len__(self):
        return len(self.questions)
    
    def __getitem__(self, idx):
        question = self.questions[idx]
        context = self.context[idx]
        
        # Tokenizing context only (no answer included)
        input_tokenized = self.tokenizer(context, max_length=self.q_len, padding="max_length",
                                         truncation=True, pad_to_max_length=True, add_special_tokens=True)
        
        # Tokenizing the question to use as labels
        question_tokenized = self.tokenizer(question, max_length=self.t_len, padding="max_length", 
                                          truncation=True, pad_to_max_length=True, add_special_tokens=True)
        
        labels = torch.tensor(question_tokenized["input_ids"], dtype=torch.long)
        labels[labels == 0] = -100  # Ignore padding tokens

        return {
            "input_ids": torch.tensor(input_tokenized["input_ids"], dtype=torch.long),
            "attention_mask": torch.tensor(input_tokenized["attention_mask"], dtype=torch.long),
            "labels": labels,
            "decoder_attention_mask": torch.tensor(question_tokenized["attention_mask"], dtype=torch.long)
        }

In [None]:
# Dataloader
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

train_sampler = RandomSampler(train_data.index)
val_sampler = RandomSampler(val_data.index)

qa_dataset = QA_Dataset(TOKENIZER, data, Q_LEN, T_LEN)

train_loader = DataLoader(qa_dataset, batch_size=BATCH_SIZE, sampler=train_sampler)
val_loader = DataLoader(qa_dataset, batch_size=BATCH_SIZE, sampler=val_sampler)

In [None]:
print(len(train_loader),len(val_loader))

In [None]:
# Lists to store loss values for each epoch
train_losses = []
val_losses = []

for epoch in range(EPOCHS):
    MODEL.train()
    train_loss = 0
    train_batch_count = 0
    
    for batch in tqdm(train_loader, desc="Training batches"):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        labels = batch["labels"].to(DEVICE)
        decoder_attention_mask = batch["decoder_attention_mask"].to(DEVICE)

        outputs = MODEL(
                          input_ids=input_ids,
                          attention_mask=attention_mask,
                          labels=labels,
                          decoder_attention_mask=decoder_attention_mask
                        )

        OPTIMIZER.zero_grad()
        outputs.loss.backward()
        OPTIMIZER.step()
        train_loss += outputs.loss.item()
        train_batch_count += 1

    avg_train_loss = train_loss / train_batch_count
    train_losses.append(avg_train_loss)

    # Evaluation
    MODEL.eval()
    val_loss = 0
    val_batch_count = 0

    for batch in tqdm(val_loader, desc="Validation batches"):
        input_ids = batch["input_ids"].to(DEVICE)
        attention_mask = batch["attention_mask"].to(DEVICE)
        labels = batch["labels"].to(DEVICE)
        decoder_attention_mask = batch["decoder_attention_mask"].to(DEVICE)

        outputs = MODEL(
                          input_ids=input_ids,
                          attention_mask=attention_mask,
                          labels=labels,
                          decoder_attention_mask=decoder_attention_mask
                        )

        val_loss += outputs.loss.item()
        val_batch_count += 1

    avg_val_loss = val_loss / val_batch_count
    val_losses.append(avg_val_loss)
    
    print(f"{epoch+1}/{EPOCHS} -> Train loss: {avg_train_loss}\tValidation loss: {avg_val_loss}")

In [None]:
# Plotting the loss
plt.figure(figsize=(10, 5))
plt.plot(range(1, EPOCHS+1), train_losses, marker='o', label='Train Loss')
plt.plot(range(1, EPOCHS+1), val_losses, marker='o', label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Over Epochs')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Save the fine-tuned model and tokenizer locally
MODEL.save_pretrained("T5-QG-finetuned-squad")
TOKENIZER.save_pretrained("T5-QG-finetuned-squad")

In [None]:
from transformers import T5ForConditionalGeneration

# Create your model card details
MODEL.push_to_hub("meowwShoo/T5-QG-finetuned-squad", use_temp_dir=False)
TOKENIZER.push_to_hub("meowwShoo/T5-QG-finetuned-squad", use_temp_dir=False)

In [None]:
MODEL.save_pretrained(f'{OUTPUT_DIR}{OUTPUT_MODEL_NAME}')
TOKENIZER.save_pretrained(f'{OUTPUT_DIR}{OUTPUT_MODEL_NAME}')

In [None]:
import shutil

# Zip the directory
shutil.make_archive('/kaggle/tmp/T5_Model', 'zip', '/kaggle/tmp/', 'AG_T5_Model')

In [None]:
def predict_question(context):
    # Tokenize the input context (without answers)
    inputs = TOKENIZER(context, max_length=Q_LEN, padding="max_length", truncation=True, add_special_tokens=True)
    
    # Convert to tensor and move to the device (GPU/CPU)
    input_ids = torch.tensor(inputs["input_ids"], dtype=torch.long).to(DEVICE).unsqueeze(0)
    attention_mask = torch.tensor(inputs["attention_mask"], dtype=torch.long).to(DEVICE).unsqueeze(0)

    # Generate the question based on the context
    outputs = MODEL.generate(input_ids=input_ids, attention_mask=attention_mask)
    
    # Decode the generated tokens to obtain the question
    generated_question = TOKENIZER.decode(outputs[0], skip_special_tokens=True)

    # Return the context and the generated question
    results = {
        "Context": context,
        "Generated Question": generated_question
    }

    return results

In [None]:
# Example context
context = "The Eiffel Tower is located in Paris, France. It was built in 1889."

# Generate a question from the context
result = predict_question(context)
print(result)

In [None]:
# Example context
context = "Once upon a time, in a small village nestled between rolling hills and deep forests, there lived a young girl named Elara. She was curious and adventurous, always wandering off to explore the hidden corners of the village and beyond. Her favorite place was the ancient forest, where she often found mysterious plants, animals, and strange sounds that whispered secrets of old."

# Generate a question from the context
result = predict_question(context)
print(result)

In [None]:
from transformers import T5ForConditionalGeneration

# Load the model from the Hugging Face Hub
model = T5ForConditionalGeneration.from_pretrained("meowwShoo/T5-QG-finetuned-squad")

from transformers import T5Tokenizer

# Load the tokenizer from the Hugging Face Hub
tokenizer = T5Tokenizer.from_pretrained("meowwShoo/T5-QG-finetuned-squad")


In [None]:
input_text = "Once upon a time, in a small village nestled between rolling hills and deep forests, there lived a young girl named Elara. She was curious and adventurous, always wandering off to explore the hidden corners of the village and beyond. Her favorite place was the ancient forest, where she often found mysterious plants, animals, and strange sounds that whispered secrets of old."
input_ids = tokenizer.encode(input_text, return_tensors="pt")

# Generate output (here for conditional generation tasks)
output_ids = model.generate(input_ids)
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print(generated_text)


In [None]:
input_text = "Most competitive neural sequence transduction models have an encoder-decoder structure [5, 2, 29].Here, the encoder maps an input sequence of symbol representations (x1, ..., xn) to a sequence of continuous representations z = (z1, ..., zn). Given z, the decoder then generates an outputsequence (y1, ..., ym) of symbols one element at a time. At each step the model is auto-regressive, consuming the previously generated symbols as additional input when generating the next"

input_ids = tokenizer.encode(input_text, return_tensors="pt")

# Generate output (here for conditional generation tasks)
output_ids = model.generate(input_ids)
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print(generated_text)


In [None]:
context = data.iloc[0]["context"]
question = data.iloc[0]["question"]
answer = data.iloc[0]["answer"]
predict_answer(context, question, answer)

In [None]:
context = test_data.iloc[0]["context"]
question = test_data.iloc[0]["question"]
answer = test_data.iloc[0]["answer"]
predict_answer(context, question, answer)

In [None]:
# Initialize lists to store metric scores and metadata
results_list = []
bleu_scores = []
rouge1_scores = []
rouge2_scores = []
rougeL_scores = []
rougeW_scores = []
rougeS_scores = []
rougeSU_scores = []
f1_scores = []
meteor_scores = []

# Iterate through test data
for idx in tqdm(range(len(test_data))):
    context = test_data.iloc[idx]["context"]
    question = test_data.iloc[idx]["question"]
    ref_answer = test_data.iloc[idx]["answer"]
    
    metrics = predict_answer(context, question, ref_answer)
    
    if ref_answer:
        # Append metrics to lists
        bleu_scores.append(metrics.get("Sentence BLEU Score", 0))
        rouge1_scores.append(metrics.get("ROUGE-1 Score", 0))
        rouge2_scores.append(metrics.get("ROUGE-2 Score", 0))
        rougeL_scores.append(metrics.get("ROUGE-L Score", 0))
        rougeW_scores.append(metrics.get("ROUGE-W Score", 0))
        rougeS_scores.append(metrics.get("ROUGE-S Score", 0))
        rougeSU_scores.append(metrics.get("ROUGE-SU Score", 0))
        f1_scores.append(metrics.get("GLUE Score (F1)", 0))
        meteor_scores.append(metrics.get("METEOR Score", 0))
        
        # Append all data to results list
        results_list.append({
            "ID": idx,
            "Context": context,
            "Question": question,
            "Ref Answer": ref_answer,
            "Predicted Answer": metrics.get("Predicted Answer", ""),
            "Sentence BLEU Score": metrics.get("Sentence BLEU Score", 0),
            "ROUGE-1 Score": metrics.get("ROUGE-1 Score", 0),
            "ROUGE-2 Score": metrics.get("ROUGE-2 Score", 0),
            "ROUGE-L Score": metrics.get("ROUGE-L Score", 0),
            "ROUGE-W Score": metrics.get("ROUGE-W Score", 0),
            "ROUGE-S Score": metrics.get("ROUGE-S Score", 0),
            "ROUGE-SU Score": metrics.get("ROUGE-SU Score", 0),
            "METEOR Score": metrics.get("METEOR Score", 0),
            "GLUE Score (F1)": metrics.get("GLUE Score (F1)", 0)
        })

# Convert results to DataFrame and save to CSV
results_df = pd.DataFrame(results_list)
results_df.to_csv('model_evaluation_results.csv', index=False)

# Calculate average, std, max, and min
def compute_stats(scores):
    return np.mean(scores), np.std(scores), np.max(scores), np.min(scores)

avg_bleu, std_bleu, max_bleu, min_bleu = compute_stats(bleu_scores)
avg_rouge1, std_rouge1, max_rouge1, min_rouge1 = compute_stats(rouge1_scores)
avg_rouge2, std_rouge2, max_rouge2, min_rouge2 = compute_stats(rouge2_scores)
avg_rougeL, std_rougeL, max_rougeL, min_rougeL = compute_stats(rougeL_scores)
avg_rougeW, std_rougeW, max_rougeW, min_rougeW = compute_stats(rougeW_scores)
avg_rougeS, std_rougeS, max_rougeS, min_rougeS = compute_stats(rougeS_scores)
avg_rougeSU, std_rougeSU, max_rougeSU, min_rougeSU = compute_stats(rougeSU_scores)
avg_f1, std_f1, max_f1, min_f1 = compute_stats(f1_scores)
avg_meteor, std_meteor, max_meteor, min_meteor = compute_stats(meteor_scores)

# Print results
print(f"Average BLEU Score: {avg_bleu:.4f}, Std Dev: {std_bleu:.4f}, Max: {max_bleu:.4f}, Min: {min_bleu:.4f}")
print(f"Average ROUGE-1 Score: {avg_rouge1:.4f}, Std Dev: {std_rouge1:.4f}, Max: {max_rouge1:.4f}, Min: {min_rouge1:.4f}")
print(f"Average ROUGE-2 Score: {avg_rouge2:.4f}, Std Dev: {std_rouge2:.4f}, Max: {max_rouge2:.4f}, Min: {min_rouge2:.4f}")
print(f"Average ROUGE-L Score: {avg_rougeL:.4f}, Std Dev: {std_rougeL:.4f}, Max: {max_rougeL:.4f}, Min: {min_rougeL:.4f}")
print(f"Average ROUGE-W Score: {avg_rougeW:.4f}, Std Dev: {std_rougeW:.4f}, Max: {max_rougeW:.4f}, Min: {min_rougeW:.4f}")
print(f"Average ROUGE-S Score: {avg_rougeS:.4f}, Std Dev: {std_rougeS:.4f}, Max: {max_rougeS:.4f}, Min: {min_rougeS:.4f}")
print(f"Average ROUGE-SU Score: {avg_rougeSU:.4f}, Std Dev: {std_rougeSU:.4f}, Max: {max_rougeSU:.4f}, Min: {min_rougeSU:.4f}")
print(f"Average METEOR Score: {avg_meteor:.4f}, Std Dev: {std_meteor:.4f}, Max: {max_meteor:.4f}, Min: {min_meteor:.4f}")
print(f"Average F1 Score: {avg_f1:.4f}, Std Dev: {std_f1:.4f}, Max: {max_f1:.4f}, Min: {min_f1:.4f}")