# Seq2Seq Model Fine-Tuning

This notebook fine-tunes a Seq2Seq model using Hugging Face Transformers.
It loads a dataset, tokenizes the data, sets up training arguments, and trains the model.
Mixed precision and checkpoint management are included.

In [None]:
import os
import shutil
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer, AutoModelForSeq2SeqLM,
    Trainer, TrainingArguments, AdamW, get_scheduler
)
from torch.cuda.amp import autocast

print("Libraries imported successfully!")

## Step 1: Load Dataset

In [None]:
dataset = load_dataset("chibbss/fitness-chat-prompt-completion-dataset")
print(dataset)

## Step 2: Load Tokenizer

In [None]:
model_path = "D:/cuda/final_model"  # Change this to your model path
tokenizer = AutoTokenizer.from_pretrained(model_path)

## Step 3: Tokenization Function

In [None]:
def tokenize_function(examples):
    inputs = examples['instruction']
    targets = examples['output']
    model_inputs = tokenizer(inputs, truncation=True, padding='max_length', max_length=512)

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, truncation=True, padding='max_length', max_length=512)

    model_inputs['labels'] = labels['input_ids']
    return model_inputs

## Step 4: Tokenize Dataset

In [None]:
tokenized_datasets = dataset['train'].map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['output', 'instruction'])
tokenized_datasets = tokenized_datasets.train_test_split(test_size=0.1)

## Step 5: Load Model

In [None]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print(f"Model is running on: {device}")

## Step 6: Training Arguments

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    eval_steps=1000,
    save_steps=1000,
    save_total_limit=5,
    learning_rate=5e-5,
    warmup_steps=500,
    lr_scheduler_type="linear",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=10,
    weight_decay=0.01,
    fp16=True,
    logging_dir='./logs',
    logging_steps=10,
    report_to="tensorboard",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    disable_tqdm=False
)

## Step 7: Custom Trainer with Checkpoint Cleanup

In [None]:
class CustomTrainer(Trainer):
    def save_checkpoint(self, output_dir=None):
        super().save_checkpoint(output_dir)
        checkpoints = sorted(
            [ckpt for ckpt in os.listdir(self.args.output_dir) if ckpt.startswith("checkpoint")],
            key=lambda x: int(x.split("-")[-1])
        )
        if len(checkpoints) > 5:
            for ckpt_to_delete in checkpoints[:-5]:
                shutil.rmtree(os.path.join(self.args.output_dir, ckpt_to_delete))
                print(f"Deleted old checkpoint: {ckpt_to_delete}")

## Step 8: Mixed Precision Trainer

In [None]:
class MixedPrecisionTrainer(CustomTrainer):
    def training_step(self, model, inputs):
        with autocast("cuda"):
            return super().training_step(model, inputs)

## Step 9: Optimizer and Scheduler

In [None]:
optimizer = AdamW(model.parameters(), lr=training_args.learning_rate)
num_training_steps = len(tokenized_datasets['train']) // (
    training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps
) * training_args.num_train_epochs

lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=training_args.warmup_steps,
    num_training_steps=num_training_steps
)

## Step 10: Initialize Trainer

In [None]:
trainer = MixedPrecisionTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['test'],
    optimizers=(optimizer, lr_scheduler)
)

## Step 11: Resume from Checkpoint (if available)

In [None]:
last_checkpoint = None
if os.path.isdir(training_args.output_dir):
    last_checkpoint = max(
        [
            os.path.join(training_args.output_dir, ckpt)
            for ckpt in os.listdir(training_args.output_dir)
            if ckpt.startswith("checkpoint")
        ],
        key=os.path.getctime,
        default=None
    )

if last_checkpoint:
    print(f"Resuming training from checkpoint: {last_checkpoint}")
else:
    print("No checkpoint found. Starting training from scratch.")

## Step 12: Train the Model

In [None]:
trainer.train(resume_from_checkpoint=last_checkpoint)