Install

In [1]:
!pip -q install "transformers>=4.43" datasets peft accelerate bitsandbytes gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25h

Imports & config

In [2]:
from datasets import load_dataset, Dataset
from transformers import (AutoTokenizer, AutoModelForCausalLM,
                          Trainer, TrainingArguments, DataCollatorForLanguageModeling)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import pandas as pd, torch, os, json, random
from pathlib import Path

BASE_MODEL = "distilgpt2"          # small/fast
HF_REPO    = "your-username/support-reply-gpt-lora"   # <- change this
SEED       = 42
random.seed(SEED); torch.manual_seed(SEED)

# Where to save the LoRA adapter
ADAPTER_DIR = "lora_adapter"
Path(ADAPTER_DIR).mkdir(exist_ok=True)

Load dataset (Twitter Customer Support)

In [3]:
seed_pairs = [
  {"user":"My order hasn’t arrived yet, can you help?",
   "agent":"I’m sorry for the delay! Could you share your order ID so I can check the status right away?"},
  {"user":"I was charged twice for the same purchase.",
   "agent":"That shouldn’t happen—sorry about this. Please DM your last 4 digits and the transaction date; I’ll arrange a refund for the duplicate charge."},
  {"user":"The app keeps crashing on login.",
   "agent":"Thanks for flagging this! Please try clearing cache and updating to the latest version. If it persists, send your device model and OS version so we can investigate."},
  {"user":"How do I cancel my subscription?",
   "agent":"You can cancel anytime from Settings → Subscription → Cancel. If you prefer, I can cancel it for you—just confirm the email on file."},
  {"user":"Package says delivered but I didn’t get it.",
   "agent":"I’m sorry about that. Sometimes carriers mark early. Could you confirm your address and check with neighbors? If not found in 24h, I’ll ship a replacement or refund—your choice."},
  {"user":"Promo code isn’t working at checkout.",
   "agent":"Got it—please share the code and cart items. Some codes exclude sale items or require a minimum. I can add a one-time credit if you qualify."}
]

df = pd.DataFrame(seed_pairs)
df.head()

Unnamed: 0,user,agent
0,"My order hasn’t arrived yet, can you help?",I’m sorry for the delay! Could you share your ...
1,I was charged twice for the same purchase.,That shouldn’t happen—sorry about this. Please...
2,The app keeps crashing on login.,Thanks for flagging this! Please try clearing ...
3,How do I cancel my subscription?,You can cancel anytime from Settings → Subscri...
4,Package says delivered but I didn’t get it.,I’m sorry about that. Sometimes carriers mark ...


Build training texts

In [5]:
from datasets import Dataset

PROMPT_TMPL = """You are a helpful, empathetic customer support agent.
Customer: {user}
Agent:"""

def to_lm(example):
    prompt = PROMPT_TMPL.format(user=example["user"])
    full = prompt + " " + example["agent"]
    return {"text": full}

# Convert full dataframe to HF Dataset first
hf_ds = Dataset.from_pandas(df)

# Apply transformation
train_texts = hf_ds.map(to_lm, remove_columns=["user", "agent"])
train_texts = train_texts.shuffle(seed=SEED)

len(train_texts), train_texts[0]

Map:   0%|          | 0/6 [00:00<?, ? examples/s]

(6,
 {'text': 'You are a helpful, empathetic customer support agent.\nCustomer: How do I cancel my subscription?\nAgent: You can cancel anytime from Settings → Subscription → Cancel. If you prefer, I can cancel it for you—just confirm the email on file.'})

Tokenize + chunk

In [6]:
tok = AutoTokenizer.from_pretrained(BASE_MODEL)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token
BLOCK_SIZE = 256

def tokenize(batch):
    return tok(batch["text"], truncation=True, max_length=BLOCK_SIZE)

tok_ds = train_texts.map(tokenize, batched=True, remove_columns=["text"])
collator = DataCollatorForLanguageModeling(tok, mlm=False)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/762 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/6 [00:00<?, ? examples/s]

Load base (4-bit) + LoRA

In [7]:
bnb_kwargs = dict(load_in_4bit=True, bnb_4bit_use_double_quant=True,
                  bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16)

model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, **bnb_kwargs)
model = prepare_model_for_kbit_training(model)

lora_cfg = LoraConfig(
    r=16, lora_alpha=32, lora_dropout=0.05,
    target_modules=["c_attn","c_proj"],  # GPT2 family
    bias="none", task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_cfg)
model.print_trainable_parameters()

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/353M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

trainable params: 811,008 || all params: 82,723,584 || trainable%: 0.9804


Train

In [8]:
args = TrainingArguments(
    output_dir="out",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=5,      # small set → few epochs okay
    learning_rate=2e-4,
    fp16=True,
    save_strategy="epoch",
    logging_steps=10,
    report_to="none"
)

trainer = Trainer(model=model, args=args, train_dataset=tok_ds, data_collator=collator)
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
  return fn(*args, **kwargs)
`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Step,Training Loss


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=5, training_loss=4.32716064453125, metrics={'train_runtime': 4.9222, 'train_samples_per_second': 6.095, 'train_steps_per_second': 1.016, 'total_flos': 500314374144.0, 'train_loss': 4.32716064453125, 'epoch': 5.0})

Save LoRA adapter

In [9]:
model.save_pretrained(ADAPTER_DIR)
tok.save_pretrained(ADAPTER_DIR)
print("Saved:", ADAPTER_DIR, os.listdir(ADAPTER_DIR))

Saved: lora_adapter ['merges.txt', 'README.md', 'adapter_model.safetensors', 'vocab.json', 'tokenizer_config.json', 'tokenizer.json', 'adapter_config.json', 'special_tokens_map.json']


Quick generation test

In [10]:
from peft import PeftModel
from transformers import pipeline

base = AutoModelForCausalLM.from_pretrained(BASE_MODEL, torch_dtype=torch.float16, device_map="auto")
base = PeftModel.from_pretrained(base, ADAPTER_DIR)
pipe = pipeline("text-generation", model=base, tokenizer=tok, device_map="auto")

prompt = PROMPT_TMPL.format(user="I can’t log into my account. Can you help?")
print(pipe(prompt, max_new_tokens=120, temperature=0.7, top_p=0.9)[0]["generated_text"])

`torch_dtype` is deprecated! Use `dtype` instead!
Device set to use cuda:0


You are a helpful, empathetic customer support agent.
Customer: I can’t log into my account. Can you help?
Agent: I can’t. Can you help?
Agent: I can’t. Can you help?
Agent: I can’t. Can you help?
Agent: I can’t. Can you help?
Agent: I can’t. Can you help?
Agent: I can’t. Can you help?
Agent: I can’t. Can you help?
Agent: I can’t. Can you help?
Agent: I can’t. Can you help?
Agent: I can�


In [15]:
!pip -q install -U huggingface_hub

from huggingface_hub import notebook_login, create_repo, upload_folder, whoami
import os

# 1) Login with a token that has at least "Read and write" (repo) scope
notebook_login()              # paste token; tick "Add token as git credential?"

# 2) Build a valid repo id under YOUR namespace
me = whoami()                 # verifies the login and returns your username
HF_USERNAME = me["name"]      # e.g., "aakash-malhan"
REPO_NAME  = "customer-support-lora"   # choose any unique name
REPO_ID    = f"{HF_USERNAME}/{REPO_NAME}"   # full repo id

print("Will push to:", REPO_ID)

# 3) Create (or reuse) a model repo
create_repo(REPO_ID, repo_type="model", exist_ok=True, private=False)

# 4) Push your LoRA folder
ADAPTER_DIR = "lora_adapter"   # <-- whatever folder you saved your adapter to
assert os.path.isdir(ADAPTER_DIR), f"Folder not found: {ADAPTER_DIR}"

upload_folder(
    repo_id=REPO_ID,
    folder_path=ADAPTER_DIR,
    repo_type="model"
)

print("✅ Pushed LoRA to:", REPO_ID)

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/563.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m563.4/563.4 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[?25h

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Will push to: aakash-malhan/customer-support-lora


Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...adapter_model.safetensors:  17%|#7        |  564kB / 3.25MB            

✅ Pushed LoRA to: aakash-malhan/customer-support-lora
