In [None]:
!pip install -q transformers datasets evaluate

In [None]:
import math
import random
import numpy as np
import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    Trainer,
    TrainingArguments,
    set_seed
)

In [None]:
# ---------------------------
# Settings
# ---------------------------
set_seed(42)
MODEL_NAME = "google/flan-t5-small"
OUTPUT_DIR = "./sft_flan_t5_formal_30"
MAX_INPUT_LEN = 128
MAX_TARGET_LEN = 64
BATCH_SIZE = 8
EPOCHS = 4
LR = 5e-5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

Device: cpu


In [None]:
# ---------------------------
# 30 instruction -> formal response examples
# ---------------------------
examples = {
    "instruction": [
        "Respond to customer asking about delayed shipment.",
        "Summarize the benefits of renewable energy in one sentence.",
        "Convert to informal tone: 'We appreciate your feedback.'",
        "Explain blockchain briefly for a friend.",
        "Write a welcome message for new users.",
        "Apologize for an incorrect invoice and offer to fix it.",
        "Shortly explain why backups are important.",
        "Turn into a polite follow-up email: 'Have you seen my last message?'",
        "Rewrite as a professional subject line for a meeting change.",
        "Give a concise compliment about someone's code quality.",
        "Make the sentence more formal: 'Can we meet tomorrow?'",
        "Draft a brief policy-notice about scheduled maintenance.",
        "Condense and formalize: 'We're running out of stock — order soon!'",
        "Reply politely to a refund request.",
        "Explain in one line why documentation matters.",
        "Create a short professional sign-off for customer emails.",
        "Rephrase to be more formal: 'Hey team, quick heads up.'",
        "Write a formal one-sentence status update on a project delay.",
        "Turn into a formal instruction: 'Please send the report by Friday.'",
        "Craft a professional apology for a service interruption.",
        "Convert to formal: 'I'll get back to you ASAP.'",
        "Summarize company values in a single sentence.",
        "Create a formal welcome for a new partner organization.",
        "Make this sentence formal: 'Thanks for your help!'",
        "Provide a one-line formal response declining an invitation.",
        "Shortly explain the value of code reviews.",
        "Rewrite: 'This feature is broken — fix it' into a formal bug report.",
        "Create a succinct formal reminder about policy compliance.",
        "Turn into a formal customer reply for a pricing inquiry.",
        "Make the phrase formal: 'Let me know what you think.'"
    ],
    "response": [
        "We apologize for the delay in your shipment; our team is working diligently to ensure delivery at the earliest opportunity.",
        "Renewable energy reduces emissions, lowers long-term costs, and fosters local employment opportunities.",
        "We appreciate your feedback and thank you for taking the time to share your thoughts.",
        "Blockchain is a distributed ledger technology that enables tamper-resistant records across a network of participants.",
        "Welcome to our platform; we are pleased to have you and look forward to supporting your success.",
        "We apologize for the error on your invoice; please provide the invoice number and we will correct it promptly.",
        "Regular backups protect data integrity and ensure rapid recovery in the event of system failure.",
        "I am following up regarding my previous message and would appreciate any updates at your convenience.",
        "Meeting rescheduled — please consult the updated calendar invitation for the revised time.",
        "Your code demonstrates clear structure and maintainability; well done.",
        "Would it be possible for us to schedule a meeting tomorrow?",
        "Please note that scheduled maintenance will occur during the stated window; services may be intermittently unavailable.",
        "We are experiencing limited stock; please place your order soon to avoid disappointment.",
        "Thank you for your request; we will review it and respond regarding the refund process shortly.",
        "Comprehensive documentation enhances knowledge transfer and reduces onboarding time.",
        "Thank you, and best regards,",
        "Please be advised of the following update to the team.",
        "We regret to inform you that the project has been delayed; a revised timeline will be provided shortly.",
        "Please submit the report by Friday to ensure timely review and processing.",
        "We sincerely apologize for the service interruption and appreciate your patience while we resolve the issue.",
        "I will respond to you as soon as possible; thank you for your patience.",
        "Our organization prioritizes integrity, innovation, and customer-focused service.",
        "We welcome your organization as a partner and anticipate a productive collaboration.",
        "Thank you for your assistance; your support is greatly appreciated.",
        "We must respectfully decline the invitation at this time; thank you for understanding.",
        "Code reviews improve quality by identifying defects early and promoting shared knowledge.",
        "Please file a detailed bug report including steps to reproduce, observed behavior, and expected behavior.",
        "This is a reminder to adhere to company policy; please review the guidelines and ensure compliance.",
        "Thank you for your interest; please find our pricing details attached and contact us for further clarification.",
        "Please let me know your thoughts at your earliest convenience."
    ]
}


In [None]:
# ---------------------------
# Create dataset and split
# ---------------------------
dataset = Dataset.from_dict(examples)
dataset = dataset.shuffle(seed=42)
split = dataset.train_test_split(test_size=0.2, seed=42)  # 80/20 -> 24 train / 6 eval
train_ds = split["train"]
eval_ds  = split["test"]

# ---------------------------
# Tokenizer & model
# ---------------------------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(DEVICE)

In [None]:
# ---------------------------
# Preprocess: encode inputs & labels; mask pad tokens in labels
# ---------------------------
def preprocess(batch):
    inputs = tokenizer(batch["instruction"], truncation=True, padding="max_length", max_length=MAX_INPUT_LEN)
    with tokenizer.as_target_tokenizer():
        targets = tokenizer(batch["response"], truncation=True, padding="max_length", max_length=MAX_TARGET_LEN)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]
    labels = targets["input_ids"]
    # mask pad token ids in the labels
    labels = [[(tok if tok != tokenizer.pad_token_id else -100) for tok in lbl] for lbl in labels]
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }

# ---------------------------
# Utility: generate text
# ---------------------------
def generate_text(prompt, max_new_tokens=64, num_beams=4):
    input_tok = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(DEVICE)
    out = model.generate(**input_tok, max_new_tokens=max_new_tokens, num_beams=num_beams, early_stopping=True)
    return tokenizer.decode(out[0], skip_special_tokens=True)

In [None]:
# Demo prompts for baseline
demo_prompts = [
    "Respond to customer asking about delayed shipment.",
    "Convert to informal tone: 'We appreciate your feedback.'",
    "Write a welcome message for new users."
]

print("=== Baseline generations (before fine-tuning) ===")
for p in demo_prompts:
    print("PROMPT:", p)
    print("GEN:", generate_text(p))
    print()


=== Baseline generations (before fine-tuning) ===
PROMPT: Respond to customer asking about delayed shipment.
GEN: Customer asked about delayed shipment.

PROMPT: Convert to informal tone: 'We appreciate your feedback.'
GEN: We appreciate your feedback.

PROMPT: Write a welcome message for new users.
GEN: I'm a new user. I'm a new user. I'm a new user.



In [None]:
train_tok = train_ds.map(preprocess, batched=True, remove_columns=["instruction", "response"])
eval_tok  = eval_ds.map(preprocess, batched=True, remove_columns=["instruction", "response"])

train_tok.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
eval_tok.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# ---------------------------
# Training
# ---------------------------
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    learning_rate=LR,
    logging_steps=10,
    seed=42,
    fp16=False,
    save_total_limit=1
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tok,
    eval_dataset=eval_tok,
    tokenizer=tokenizer,
)

trainer.train()

Map:   0%|          | 0/24 [00:00<?, ? examples/s]



Map:   0%|          | 0/6 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss
10,3.2857


TrainOutput(global_step=12, training_loss=3.2563681999842324, metrics={'train_runtime': 143.592, 'train_samples_per_second': 0.669, 'train_steps_per_second': 0.084, 'total_flos': 4461372112896.0, 'train_loss': 3.2563681999842324, 'epoch': 4.0})

In [None]:
# ---------------------------
# Eval and perplexity
# ---------------------------
metrics = trainer.evaluate()
eval_loss = metrics.get("eval_loss", None)
if eval_loss is not None:
    try:
        ppl = math.exp(eval_loss)
    except OverflowError:
        ppl = float("inf")
    print(f"Eval loss: {eval_loss:.4f}, Perplexity: {ppl:.2f}")
print("Eval metrics:", metrics)

# ---------------------------
# Generations after fine-tuning
# ---------------------------
print("\n=== Generations (after fine-tuning) ===")
for p in demo_prompts:
    print("PROMPT:", p)
    print("GEN:", generate_text(p))
    print()

# Save model
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print("Saved model to", OUTPUT_DIR)




Eval loss: 2.8588, Perplexity: 17.44
Eval metrics: {'eval_loss': 2.8588368892669678, 'eval_runtime': 1.5946, 'eval_samples_per_second': 3.763, 'eval_steps_per_second': 0.627, 'epoch': 4.0}

=== Generations (after fine-tuning) ===
PROMPT: Respond to customer asking about delayed shipment.
GEN: Waited for delivery.

PROMPT: Convert to informal tone: 'We appreciate your feedback.'
GEN: We appreciate your feedback.

PROMPT: Write a welcome message for new users.
GEN: I'm a new user and I'm looking forward to it. I'm looking forward to it. I'm looking forward to it.

Saved model to ./sft_flan_t5_formal_30
