In [None]:
!pip install datasets

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
def save_model(model, optimizer, scheduler, epoch, train_losses, eval_losses, filepath):
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),  # Ensure model is a valid PyTorch model object
        'optimizer_state_dict': optimizer.state_dict(),  # Ensure optimizer is a valid optimizer object
        'scheduler_state_dict': scheduler.state_dict(),  # Ensure scheduler is a valid scheduler object
        'train_losses': train_losses,  # Ensure train_losses is a list or value
        'eval_losses': eval_losses,  # Ensure eval_losses is a list or value
    }
    torch.save(checkpoint, filepath)
    print(f"Checkpoint saved at {filepath}")

In [None]:
def load_model(model, optimizer, scheduler, filepath):
    checkpoint = torch.load(filepath)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    epoch = checkpoint['epoch']
    train_losses = checkpoint['train_losses']
    eval_losses = checkpoint['eval_losses']
    print(f"Checkpoint loaded from {filepath}")
    return model, optimizer, scheduler, epoch, train_losses, eval_losses

In [None]:
import torch
from torch.utils.data import DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW, get_linear_schedule_with_warmup
from datasets import load_dataset
import re

# Load the GSM8K dataset
dataset = load_dataset("gsm8k", "main")

# Load the FLAN-T5 Small model and tokenizer
model_name = "google/flan-t5-base"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)


# Preprocessing function for the dataset
def preprocess_function(examples):
    inputs = [question for question in examples["question"]]
    targets = [answer for answer in examples["answer"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply the preprocessing function to the dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=["question", "answer"])

# Custom collate function to handle padding and conversion to tensors
def collate_fn(batch):
    input_ids = torch.tensor([item["input_ids"] for item in batch], dtype=torch.long)
    attention_mask = torch.tensor([item["attention_mask"] for item in batch], dtype=torch.long)
    labels = torch.tensor([item["labels"] for item in batch], dtype=torch.long)
    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

# Set up DataLoader
train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"]

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=8, collate_fn=collate_fn)
eval_dataloader = DataLoader(eval_dataset, batch_size=8, collate_fn=collate_fn)

# Training setup
optimizer = AdamW(model.parameters(), lr=5e-5)

# Total number of training steps
total_steps = len(train_dataloader) * 10  # Assuming 20 epochs

# Create the learning rate scheduler
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=int(0.1 * total_steps), num_training_steps=total_steps)

# Early stopping setup
best_eval_loss = float('inf')
patience_counter = 0
patience = 3  # Early stopping patience

# Lists to store training and evaluation losses
train_losses = []
eval_losses = []

# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#model, optimizer, scheduler, epochss, train_losses, eval_losses = load_model(
#)
model.to(device)
epochs = 10

for epoch in range(epochs):
    model.train()
    train_loss = 0
    num_batches = len(train_dataloader)

    for step, batch in enumerate(train_dataloader):
        optimizer.zero_grad()

        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        # Backpropagation
        loss.backward()
        optimizer.step()
        scheduler.step()

        train_loss += loss.item()

    avg_train_loss = train_loss / num_batches
    train_losses.append(avg_train_loss)
    print(f"Epoch: {epoch+1}, Average Train Loss: {avg_train_loss}")

    # Evaluation loop
    model.eval()
    eval_loss = 0
    with torch.no_grad():
        for batch in eval_dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            eval_loss += outputs.loss.item()

    avg_eval_loss = eval_loss / len(eval_dataloader)
    eval_losses.append(avg_eval_loss)
    print(f"Epoch: {epoch+1}, Evaluation Loss: {avg_eval_loss}")

    # Check for early stopping and save the best model
    if avg_eval_loss < best_eval_loss:
        best_eval_loss = avg_eval_loss
        patience_counter = 0  # Reset patience counter
        save_model(
            epoch=epoch,
            model=model,
            optimizer=optimizer,
            scheduler=scheduler,
            train_losses=train_losses,
            eval_losses=eval_losses,
            filepath = r'/content/drive/MyDrive/LLM Models/FLAN-T5-base x GSM8K/best_model.pth'
        )
        print(f"Best model saved with eval loss {avg_eval_loss:.4f} at epoch {epoch+1}")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered")
            break

In [None]:
import torch
import random
from transformers import T5Tokenizer, T5ForConditionalGeneration
from datasets import load_dataset
from torch.utils.data import DataLoader

# Load the GSM8K dataset
dataset = load_dataset("gsm8k", "main")

# Load the FLAN-T5 Small model and tokenizer
model_name = "google/flan-t5-base"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Preprocessing function for the dataset
def preprocess_function(examples):
    inputs = [f"solve: {question}" for question in examples["question"]]
    targets = [answer for answer in examples["answer"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    labels = tokenizer(targets, max_length=128, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply the preprocessing function to the test dataset
tokenized_test_dataset = dataset['test'].map(preprocess_function, batched=True, remove_columns=["question", "answer"])

# Set up DataLoader for the test dataset
test_dataloader = DataLoader(tokenized_test_dataset, batch_size=1)  # Batch size 1 for single evaluation

# Load the saved checkpoint (if needed)
checkpoint_path = '/content/drive/MyDrive/LLM Models/FLAN-T5-base x GSM8K/best_model.pth'
model.load_state_dict(torch.load(checkpoint_path)['model_state_dict'])

# Set the model to evaluation mode
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Function to evaluate a single random example from the test dataset
def evaluate_random_example(model, tokenizer, dataset):
    model.eval()

    # Select a random example from the dataset
    random_idx = random.randint(0, len(dataset) - 1)
    example = dataset[random_idx]

    # Get the input and ground truth
    input_text = example['input_ids']
    ground_truth = example['labels']

    # Decode the input and ground truth
    input_decoded = tokenizer.decode(input_text, skip_special_tokens=True)
    ground_truth_decoded = tokenizer.decode(ground_truth, skip_special_tokens=True)

    # Generate the model's prediction
    input_tensor = torch.tensor([input_text]).to(device)
    attention_mask = torch.tensor([example['attention_mask']]).to(device)

    with torch.no_grad():
        output_ids = model.generate(input_tensor, attention_mask=attention_mask, max_length=128)
        prediction = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Print the results
    print(f"Question: {input_decoded}")
    print(f"Ground Truth Answer: {ground_truth_decoded}")
    print(f"Model's Predicted Answer: {prediction}")

# Evaluate a random example
evaluate_random_example(model, tokenizer, tokenized_test_dataset)

In [None]:
from datasets import list_datasets

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base", device_map="auto")

input_text = ""
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")

outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))

In [None]:
import torch

def load_losses(filepath):
    # Load the checkpoint
    checkpoint = torch.load(filepath)

    # Extract the train and eval losses
    train_losses = checkpoint['train_losses']
    eval_losses = checkpoint['eval_losses']

    print(f"Checkpoint loaded from {filepath}")

    return train_losses, eval_losses

# Usage
train_losses, eval_losses = load_losses('/content/drive/MyDrive/LLM Models/FLAN-T5-base x IMDB/best_model.pth')

In [None]:
train_losses

In [None]:
eval_losses