In [None]:
# Cell 1 — Load CSV, parse 'conversation' column into prompt-response pairs
import os, ast, json, re, textwrap
import pandas as pd
DATA_PATH = "/content/lmsys_chat_train_part_5.csv"
assert os.path.exists(DATA_PATH), f"CSV not found: {DATA_PATH}"

df = pd.read_csv(DATA_PATH, low_memory=False)
df=df.head(5000)
print("Raw shape:", df.shape)
print("Columns:", df.columns.tolist())



def safe_eval_list(s):
    if not isinstance(s, str):
        return []
    s = s.strip()
    if s == "":
        return []
    try:
        return ast.literal_eval(s)
    except Exception:
        try:
            return json.loads(s.replace("'", '"'))
        except Exception:
            return []

def clean_text(s):
    if s is None:
        return ""
    s = str(s)
    s = re.sub(r'[\r\n\t]+', ' ', s)
    s = re.sub(r'\s+', ' ', s).strip()
    s = re.sub(r'\S+@\S+\.\w+', '', s)
    s = re.sub(r'http\S+|www\.\S+', '', s)
    s = ''.join(ch for ch in s if ord(ch) >= 32)
    return s.strip()

pairs = []
for idx, row in df.iterrows():
    convo_raw = row.get("conversation", "")
    convo = safe_eval_list(convo_raw)
    if not isinstance(convo, list):
        continue
    # normalize into list of dicts with 'role' and 'content'
    msgs = []
    for item in convo:
        if not isinstance(item, dict):
            continue
        role = item.get('role') or item.get('from') or item.get('actor') or ''
        content = item.get('content') or item.get('message') or item.get('text') or ''
        msgs.append({'role': str(role).lower(), 'content': str(content)})
    # collect user -> assistant pairs
    for i in range(len(msgs)-1):
        a, b = msgs[i], msgs[i+1]
        if a['role'].startswith('user') and b['role'].startswith('assistant'):
            p = clean_text(a['content'])
            r = clean_text(b['content'])
            if p and r:
                pairs.append({'prompt': p, 'response': r})

pairs_df = pd.DataFrame(pairs)
print("Extracted pairs:", len(pairs_df))



# dedupe, length limits
pairs_df.drop_duplicates(subset=['prompt','response'], inplace=True)
MAX_CHARS = 30000
pairs_df['prompt'] = pairs_df['prompt'].str.slice(0, MAX_CHARS)
pairs_df['response'] = pairs_df['response'].str.slice(0, MAX_CHARS)

pairs_df.to_csv("/clean_pairs.csv", index=False)



Raw shape: (5000, 3)
Columns: ['conversation', 'openai_moderation', 'redacted']
Extracted pairs: 8167


In [None]:
# Cell 2 — Build HF dataset and tokenize (static pad + labels -> -100)
from datasets import Dataset, DatasetDict
from transformers import AutoTokenizer
import pandas as pd

clean_path = "/clean_pairs.csv"
assert os.path.exists(clean_path), "Run Cell 1 first."

df = pd.read_csv(clean_path)
print("Loaded cleaned pairs:", len(df))
# drop bad rows
df["prompt"] = df["prompt"].astype(str).fillna("").str.strip()
df["response"] = df["response"].astype(str).fillna("").str.strip()
df = df[(df["prompt"] != "") & (df["response"] != "")]
print("After dropping empties:", len(df))

ds = Dataset.from_pandas(df[['prompt','response']])
if len(ds) > 20:
    split = ds.train_test_split(test_size=0.05, seed=42)
    dataset = DatasetDict({"train": split["train"], "validation": split["test"]})
else:
    dataset = DatasetDict({"train": ds, "validation": ds})

# Create text template for causal LM
def to_text(ex):
    prompt = ex['prompt'].strip()
    response = ex['response'].strip()
    text = f"### Instruction:\n{prompt}\n\n### Response:\n{response}"
    return {"text": text}

dataset = dataset.map(to_text, remove_columns=['prompt','response'], batched=False)

# Tokenizer settings
BASE_TOKENIZER = "gpt2"   # change if you prefer another base
tokenizer = AutoTokenizer.from_pretrained(BASE_TOKENIZER, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

MAX_LENGTH = 512   # lower if VRAM tight
def tokenize_and_mask(ex):
    enc = tokenizer(ex['text'], truncation=True, padding="max_length", max_length=MAX_LENGTH)
    pad_id = tokenizer.pad_token_id
    input_ids = enc['input_ids']
    labels = [[(tid if tid != pad_id else -100) for tid in seq] for seq in input_ids]
    enc['labels'] = labels
    return enc

tokenized = dataset.map(tokenize_and_mask, batched=True, remove_columns=['text'])
print("Tokenized dataset:", tokenized)
# Save tokenized dataset to disk
tokenized.save_to_disk("tokenized_dataset")
print("Saved tokenized dataset -> /mnt/data/tokenized_dataset")


Loaded cleaned pairs: 7962
After dropping empties: 7962


Map:   0%|          | 0/7563 [00:00<?, ? examples/s]

Map:   0%|          | 0/399 [00:00<?, ? examples/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/7563 [00:00<?, ? examples/s]

Map:   0%|          | 0/399 [00:00<?, ? examples/s]

Tokenized dataset: DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 7563
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 399
    })
})


Saving the dataset (0/1 shards):   0%|          | 0/7563 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/399 [00:00<?, ? examples/s]

Saved tokenized dataset -> /mnt/data/tokenized_dataset


In [None]:
! pip install -U transformers datasets accelerate evaluate tqdm sentencepiece tokenizers safetensors

Collecting datasets
  Downloading datasets-4.3.0-py3-none-any.whl.metadata (18 kB)
Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.2 kB)
Downloading datasets-4.3.0-py3-none-any.whl (506 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m506.8/506.8 kB[0m [31m33.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl (47.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 MB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyarrow, datasets, evaluate
  Attempting uninstall: pyarrow
    Found existing installation: pyarrow 18.1.0
    Uninst

In [None]:
! pip install -U peft



In [None]:
!  pip install -U bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.2


In [None]:
# ---------------- Cell 3: Train LoRA + KL (reference model ON GPU) ----------------
import os, inspect, torch
os.environ["WANDB_MODE"] = "disabled"
os.environ["WANDB_API_KEY"] = ""
os.environ["WANDB_SILENT"] = "true"

from datasets import load_from_disk
from transformers import AutoModelForCausalLM, TrainingArguments, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch.nn.functional as F

# load tokenized dataset
if "tokenized" not in globals():
    tokenized = load_from_disk("/tokenized_dataset")
print("Train size:", len(tokenized["train"]))

# CONFIG
BASE_MODEL = "gpt2"            # change if you want a different base
OUT_DIR = "./ft-out-lora"
KL_COEF = 0.1
NUM_EPOCHS = 3

# detect bitsandbytes (8-bit) availability
use_bnb_8bit = False
try:
    import bitsandbytes as bnb  # noqa
    use_bnb_8bit = True
except Exception:
    use_bnb_8bit = False
print("bitsandbytes available:", use_bnb_8bit)

# tokenizer (assumes it's in memory from previous cells, else load)
try:
    tokenizer  # noqa
except NameError:
    from transformers import AutoTokenizer
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Training device:", device)

# ---------- REFERENCE MODEL (FROZEN) on GPU ----------
# Put the reference model on same device (GPU) for faster KL computation per step.
ref_device = torch.device(device)
ref_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL).to(ref_device)
ref_model.eval()
for p in ref_model.parameters():
    p.requires_grad = False
# ensure no use_cache when training with checkpointing
try:
    ref_model.config.use_cache = False
except Exception:
    pass
print("Reference model device:", next(ref_model.parameters()).device)

# ---------- POLICY MODEL (trainable with LoRA) ----------
if use_bnb_8bit:
    # 8-bit load saves VRAM if supported (may not be supported on Windows)
    model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True, device_map="auto")
    model = prepare_model_for_kbit_training(model)
    # ensure no use_cache flag
    try:
        model.config.use_cache = False
    except Exception:
        pass
else:
    model = AutoModelForCausalLM.from_pretrained(BASE_MODEL).to(device)
    try:
        model.config.use_cache = False
    except Exception:
        pass

# ---------- LoRA config ----------
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["c_attn", "c_proj"],  # GPT2 common targets; change if model differs
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# data collator (tokenization used static padding)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# ---------- Custom KLTrainer (compatible with HF Trainer API) ----------
from transformers import Trainer

class KLTrainer(Trainer):
    def __init__(self, ref_model, kl_coef=0.1, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # ensure ref_model is on same device if desired
        self.ref_model = ref_model
        self.kl_coef = kl_coef

    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        # accepts extra kwargs for HF compatibility
        labels = inputs.get("labels", None)
        outputs = model(**inputs)
        logits = outputs.logits  # (B, L, V)

        # CE on next-token (shifted)
        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = labels[..., 1:].contiguous() if labels is not None else None
        loss_fct = torch.nn.CrossEntropyLoss(ignore_index=-100, reduction="mean")
        if shift_labels is not None:
            ce_loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
        else:
            ce_loss = torch.tensor(0.0, device=logits.device)

        # compute reference logits on ref_device (ref_model is on GPU)
        with torch.no_grad():
            # assume inputs are on device; pass them directly to ref_model (same device)
            ref_inputs = {}
            for k, v in inputs.items():
                if k in ("input_ids", "attention_mask"):
                    # move to ref_model device if needed
                    ref_inputs[k] = v.to(self.ref_model.device)
            ref_out = self.ref_model(**ref_inputs)
            ref_logits = ref_out.logits.to(logits.device)

        # compute per-token KL: sum p*(log p - log q)
        logp = torch.nn.functional.log_softmax(logits, dim=-1)
        logp_ref = torch.nn.functional.log_softmax(ref_logits, dim=-1)
        p = logp.exp()
        kl_per_token = (p * (logp - logp_ref)).sum(dim=-1)  # (B, L)

        if shift_labels is not None:
            mask = (shift_labels != -100).float()  # (B, L-1)
            kl_shift = kl_per_token[..., :-1]
            kl_sum = (kl_shift * mask).sum()
            denom = mask.sum().clamp_min(1.0)
            kl_mean = kl_sum / denom
        else:
            kl_mean = kl_per_token.mean()

        total_loss = ce_loss + self.kl_coef * kl_mean
        return (total_loss, outputs) if return_outputs else total_loss

# ---------- TrainingArguments (defensive by signature) ----------
out_dir = OUT_DIR
per_device_train_batch_size = 2 if torch.cuda.is_available() else 1
args_dict = {
    "output_dir": out_dir,
    "num_train_epochs": NUM_EPOCHS,
    "per_device_train_batch_size": per_device_train_batch_size,
    "gradient_accumulation_steps": 2,
    "logging_steps": 100,
    "save_steps": 1000,
    "learning_rate": 2e-4,    # often larger for LoRA
    "fp16": torch.cuda.is_available(),
    "weight_decay": 0.0,
    "warmup_steps": 50,
    "remove_unused_columns": False,
    "push_to_hub": False,
}
sig = inspect.signature(TrainingArguments.__init__)
# add evaluation/reporting options if supported
if "evaluation_strategy" in sig.parameters:
    args_dict["evaluation_strategy"] = "steps"
    args_dict["eval_steps"] = 500
if "report_to" in sig.parameters:
    args_dict["report_to"] = []
# filter to supported args
args_filtered = {k: v for k, v in args_dict.items() if k in sig.parameters}
training_args = TrainingArguments(**args_filtered)

# ---------- Trainer instantiation ----------
trainer = KLTrainer(
    model=model,
    ref_model=ref_model,
    kl_coef=KL_COEF,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized.get("validation", None),
    data_collator=data_collator,
)

# Start training
print("Starting training ...")
trainer.train()

# Save LoRA adapters & a checkpoint (adapter files saved in adapter_dir)
adapter_dir = os.path.join(out_dir, "adapters")
os.makedirs(adapter_dir, exist_ok=True)
# Save adapters (PEFT)
model.save_pretrained(adapter_dir)
tokenizer.save_pretrained(adapter_dir)
# also save trainer checkpoint
trainer.save_model(os.path.join(out_dir, "checkpoint-after-train"))

print("Training complete. Adapters saved to:", adapter_dir)
# ---------------- end Cell 3 ----------------


Train size: 7563
bitsandbytes available: True
Training device: cuda
Reference model device: cuda:0


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


trainable params: 811,008 || all params: 125,250,816 || trainable%: 0.6475
Starting training ...


  return fn(*args, **kwargs)


Step,Training Loss
100,6.1513
200,5.5147
300,5.6273
400,5.4026
500,5.6453
600,5.4105
700,5.5486
800,5.3511
900,5.2872
1000,5.3676


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Training complete. Adapters saved to: ./ft-out-lora/adapters


In [None]:
# ---------------- Cell 4: quick check of saved adapters / checkpoint ----------------
import os
adapter_dir = "./ft-out-lora/adapters"
ckpt_dir = "./ft-out-lora/checkpoint-after-train"
print("Adapter dir exists:", os.path.isdir(adapter_dir))
if os.path.isdir(adapter_dir):
    print("Adapter files:", os.listdir(adapter_dir))
else:
    print("No adapters found. Run Cell 3 first.")

print("Checkpoint dir exists:", os.path.isdir(ckpt_dir))
if os.path.isdir(ckpt_dir):
    print("Checkpoint files:", os.listdir(ckpt_dir))
else:
    print("No checkpoint found.")

# Show how to load the fine-tuned model for inference (example)
print("\nExample: to load for inference you can do:")
print("from transformers import AutoModelForCausalLM, AutoTokenizer")
print("from peft import PeftModel")
print("base = AutoModelForCausalLM.from_pretrained(BASE_MODEL).to(device)")
print("model_ft = PeftModel.from_pretrained(base, adapter_dir).to(device)")
# ---------------- end Cell 4 ----------------


Adapter dir exists: True
Adapter files: ['adapter_config.json', 'adapter_model.safetensors', 'special_tokens_map.json', 'merges.txt', 'tokenizer.json', 'README.md', 'tokenizer_config.json', 'vocab.json']
Checkpoint dir exists: True
Checkpoint files: ['adapter_config.json', 'adapter_model.safetensors', 'special_tokens_map.json', 'merges.txt', 'training_args.bin', 'tokenizer.json', 'README.md', 'tokenizer_config.json', 'vocab.json']

Example: to load for inference you can do:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
base = AutoModelForCausalLM.from_pretrained(BASE_MODEL).to(device)
model_ft = PeftModel.from_pretrained(base, adapter_dir).to(device)


In [None]:
# ---------------- Cell 5: Compare base model vs fine-tuned (LoRA) outputs ----------------
import os, torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

BASE_MODEL = "gpt2"
ADAPTER_DIR = "./ft-out-lora/adapters"
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

# load tokenizer (prefer saved adapter tokenizer if present)
if os.path.isdir(ADAPTER_DIR):
    tokenizer = AutoTokenizer.from_pretrained(ADAPTER_DIR, local_files_only=True)
else:
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# load base model
base = AutoModelForCausalLM.from_pretrained(BASE_MODEL).to(device)

# load fine-tuned model by applying LoRA adapters onto base
if os.path.isdir(ADAPTER_DIR):
    model_ft = PeftModel.from_pretrained(base, ADAPTER_DIR).to(device)
else:
    raise FileNotFoundError(f"Adapter dir not found: {ADAPTER_DIR} - run Cell 3 first")

# simple generator
def generate_text(model, tokenizer, prompt, max_new_tokens=128):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )
    return tokenizer.decode(out[0], skip_special_tokens=True)

# pick sample prompts (from cleaned pairs if available)
sample_prompts = []
if os.path.exists("/mnt/data/clean_pairs.csv"):
    import pandas as pd
    cdf = pd.read_csv("/mnt/data/clean_pairs.csv")
    sample_prompts = cdf['prompt'].dropna().astype(str).head(3).tolist()
if not sample_prompts:
    sample_prompts = [

        "Summarize the importance of data privacy.",
        "Write a short motivational statement about learning.",

    ]

print("\n=== Base vs Fine-Tuned (LoRA) ===\n")
for i, prompt in enumerate(sample_prompts, 1):
    print(f"--- Prompt {i} ---\n{prompt}\n")
    print("Base model output:\n", generate_text(base, tokenizer, prompt), "\n")
    print("Fine-tuned (LoRA+KL) output:\n", generate_text(model_ft, tokenizer, prompt), "\n")
    print("="*80)
print("Done.")
# ---------------- end Cell 5 ----------------


Device: cuda

=== Base vs Fine-Tuned (LoRA) ===

--- Prompt 1 ---
Summarize the importance of data privacy.

Base model output:
 Summarize the importance of data privacy.

###
The importance of data privacy is not limited to data stored on computers or mobile devices. It can be used to improve the quality of information stored on computers, smartphones, and tablets. It can also be used to improve the security of data stored on computers and mobile devices. Data privacy is also important for companies that have data collection and retention policies. Companies must ensure that data is collected and retained in a way that protects the data's privacy and confidentiality. Companies should ensure that data is collected and retained in a way that protects the data's privacy and confidentiality. Companies should ensure that data is collected and retained in a way that 

Fine-tuned (LoRA+KL) output:
 Summarize the importance of data privacy.

Data privacy is a fundamental right that is enshrin

In [None]:
# !zip -r myfolder.zip /content/ft-out-lora
# from google.colab import files
# files.download("myfolder.zip")


  adding: content/ft-out-lora/ (stored 0%)
  adding: content/ft-out-lora/checkpoint-2000/ (stored 0%)
  adding: content/ft-out-lora/checkpoint-2000/adapter_config.json (deflated 56%)
  adding: content/ft-out-lora/checkpoint-2000/scheduler.pt (deflated 61%)
  adding: content/ft-out-lora/checkpoint-2000/adapter_model.safetensors (deflated 7%)
  adding: content/ft-out-lora/checkpoint-2000/special_tokens_map.json (deflated 60%)
  adding: content/ft-out-lora/checkpoint-2000/merges.txt (deflated 53%)
  adding: content/ft-out-lora/checkpoint-2000/training_args.bin (deflated 53%)
  adding: content/ft-out-lora/checkpoint-2000/rng_state.pth (deflated 26%)
  adding: content/ft-out-lora/checkpoint-2000/trainer_state.json (deflated 71%)
  adding: content/ft-out-lora/checkpoint-2000/scaler.pt (deflated 64%)
  adding: content/ft-out-lora/checkpoint-2000/optimizer.pt (deflated 9%)
  adding: content/ft-out-lora/checkpoint-2000/tokenizer.json (deflated 82%)
  adding: content/ft-out-lora/checkpoint-2000/

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>