In [None]:
!pip install datasets
!pip install py7zr
!pip install rouge-score

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
import torch.nn as nn
from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import load_dataset, load_metric
from rouge_score import rouge_scorer, scoring

# Load the best model
model_path = r'/content/drive/MyDrive/LLM Models/FLAN-T5-base x SAMsum/best_model.pth'
checkpoint = torch.load(model_path)
model_name = "google/flan-t5-base"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
model.load_state_dict(checkpoint['model_state_dict'])

# Function to evaluate the model and compute validation loss and ROUGE score
def evaluate_model(model, dataloader, device):
    model.eval()
    eval_loss = 0
    rouge = load_metric("rouge")
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            eval_loss += outputs.loss.item()

            # Generate predictions
            predictions = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=128)
            decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
            decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

            # Compute ROUGE scores
            rouge.add_batch(predictions=decoded_preds, references=decoded_labels)

    avg_eval_loss = eval_loss / len(dataloader)
    rouge_score = rouge.compute()
    return avg_eval_loss, rouge_score

# Evaluate the size of the model before quantization
def get_model_size(model):
    param_size = 0
    param_bytes = 0
    for param in model.parameters():
        param_bytes += param.nelement() * param.element_size()
        param_size += param_bytes
    return param_size / 1e6  # Convert to MB

# Prepare the evaluation dataset and dataloader
dataset = load_dataset("samsum")
eval_dataset = dataset["validation"]

# Preprocessing for the SAMsum dataset
def preprocess_function(examples):
    inputs = [f"summarize: {dialogue}" for dialogue in examples["dialogue"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(examples["summary"], max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply the preprocessing function
tokenized_eval_dataset = eval_dataset.map(preprocess_function, batched=True, remove_columns=["dialogue", "summary", "id"])

def collate_fn(batch):
    input_ids = torch.tensor([item["input_ids"] for item in batch], dtype=torch.long)
    attention_mask = torch.tensor([item["attention_mask"] for item in batch], dtype=torch.long)
    labels = torch.tensor([item["labels"] for item in batch], dtype=torch.long)
    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

from torch.utils.data import DataLoader
eval_dataloader = DataLoader(tokenized_eval_dataset, batch_size=64, collate_fn=collate_fn)

# Move the original model to GPU and evaluate
gpu_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(gpu_device)

original_model_size = get_model_size(model)
print(f"Original model size: {original_model_size:.2f} MB")

original_eval_loss, original_rouge_score = evaluate_model(model, eval_dataloader, gpu_device)
print(f"Original Model Evaluation Loss: {original_eval_loss:.4f}")
print(f"Original Model ROUGE Score: {original_rouge_score}")

# Perform dynamic quantization and move the model to CPU
quantized_model = torch.quantization.quantize_dynamic(
    model.cpu(), {nn.Linear}, dtype=torch.qint8
)

# Evaluate the size of the quantized model
quantized_model_size = get_model_size(quantized_model)
print(f"Quantized model size: {quantized_model_size:.2f} MB")

# Evaluate the quantized model on the CPU
cpu_device = torch.device("cpu")
quantized_eval_loss, quantized_rouge_score = evaluate_model(quantized_model, eval_dataloader, cpu_device)
print(f"Quantized Model Evaluation Loss: {quantized_eval_loss:.4f}")
print(f"Quantized Model ROUGE Score: {quantized_rouge_score}")

# Compare model sizes and evaluation losses
size_reduction = 100 * (original_model_size - quantized_model_size) / original_model_size
print(f"Size reduction: {size_reduction:.2f}%")
loss_increase = 100 * (quantized_eval_loss - original_eval_loss) / original_eval_loss
print(f"Loss increase: {loss_increase:.2f}%")


In [None]:
import torch
import torch.nn as nn
from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import load_dataset, load_metric
from rouge_score import rouge_scorer, scoring

# Load the best model
model_path = r'/content/drive/MyDrive/LLM Models/FLAN-T5-base x SAMsum/best_model.pth'
checkpoint = torch.load(model_path)
model_name = "google/flan-t5-base"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
model.load_state_dict(checkpoint['model_state_dict'])

# Function to evaluate the model and compute validation loss and ROUGE score
def evaluate_model(model, dataloader, device):
    model.eval()
    eval_loss = 0
    rouge = load_metric("rouge")
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            eval_loss += outputs.loss.item()

            # Generate predictions
            predictions = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=128)
            decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
            decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

            # Compute ROUGE scores
            rouge.add_batch(predictions=decoded_preds, references=decoded_labels)

    avg_eval_loss = eval_loss / len(dataloader)
    rouge_score = rouge.compute()
    return avg_eval_loss, rouge_score

# Evaluate the size of the model before quantization
def get_model_size(model):
    param_size = 0
    param_bytes = 0
    for param in model.parameters():
        param_bytes += param.nelement() * param.element_size()
        param_size += param_bytes
    return param_size / 1e6  # Convert to MB

# Prepare the evaluation dataset and dataloader
dataset = load_dataset("samsum")
eval_dataset = dataset["validation"]

# Preprocessing for the SAMsum dataset
def preprocess_function(examples):
    inputs = [f"summarize: {dialogue}" for dialogue in examples["dialogue"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(examples["summary"], max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply the preprocessing function
tokenized_eval_dataset = eval_dataset.map(preprocess_function, batched=True, remove_columns=["dialogue", "summary", "id"])

def collate_fn(batch):
    input_ids = torch.tensor([item["input_ids"] for item in batch], dtype=torch.long)
    attention_mask = torch.tensor([item["attention_mask"] for item in batch], dtype=torch.long)
    labels = torch.tensor([item["labels"] for item in batch], dtype=torch.long)
    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

from torch.utils.data import DataLoader
eval_dataloader = DataLoader(tokenized_eval_dataset, batch_size=64, collate_fn=collate_fn)

In [None]:
import random
device = "cpu"
def evaluate_random_input(model, tokenizer, eval_dataset, device):
    model.eval()

    # Choose a random sample from the dataset
    random_idx = random.randint(0, len(eval_dataset) - 1)
    sample = eval_dataset[random_idx]
    input_dialogue = sample["dialogue"]
    ground_truth_summary = sample["summary"]

    # Preprocess the input dialogue
    input_text = f"summarize: {input_dialogue}"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)

    # Generate the output summary
    with torch.no_grad():
        outputs = model.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_length=128)

    # Decode the input, output, and ground truth
    decoded_input = tokenizer.decode(inputs["input_ids"][0], skip_special_tokens=True)
    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Print input, model output, and ground truth
    print(f"Input Dialogue: {decoded_input}")
    print(f"Model Output: {decoded_output}")
    print(f"Ground Truth Summary: {ground_truth_summary}")

# Evaluate and print a random input from the SAMsum validation set
evaluate_random_input(model, tokenizer, eval_dataset, device)