#Ensure you select T4 GPU to enable cuda here

#Install Dependency and then restart the session

In [1]:
!pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3,>=2.2->bitsandbytes)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-c

#Import all the libs here

In [1]:
import torch
import pandas as pd
import urllib.request, zipfile
from datasets import Dataset
from transformers import (AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments,
                          BitsAndBytesConfig)
from peft import (get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training,
                  PromptTuningConfig, PeftModel)

##Supress Warnings

In [2]:
import logging

logging.getLogger("transformers.modeling_utils").setLevel(logging.ERROR)
logging.getLogger("accelerate").setLevel(logging.ERROR)

#Load data after fetching it

In [3]:
def load_sms_data():
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip"
    urllib.request.urlretrieve(url, "smsspamcollection.zip")
    with zipfile.ZipFile("smsspamcollection.zip", 'r') as zip_ref:
        zip_ref.extractall(".")
    df = pd.read_csv("SMSSpamCollection", sep='\t', header=None, names=['label', 'message'])
    return df

def build_dataset(df):
    def to_qa(x):
        q = f"Is this SMS spam or ham? '{x['message']}'"
        a = "This is spam." if x['label'] == 'spam' else "This is ham."
        return {"question": q, "answer": a}
    ds = Dataset.from_pandas(df).map(to_qa)
    return ds.train_test_split(0.05)

#Training

##Load Base model

In [4]:
def load_base_model(model_id):
    quant_cfg = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )
    model = AutoModelForCausalLM.from_pretrained(model_id,
                                                 quantization_config=quant_cfg,
                                                 device_map="auto")
    tok = AutoTokenizer.from_pretrained(model_id)
    tok.pad_token = tok.eos_token
    return model, tok

##define Chat prompts and format

In [5]:
def apply_chat_format(ds, tok):
    def format_prompt(x):
        msgs = [{"role": "user", "content": x["question"]},
                {"role": "assistant", "content": x["answer"]}]
        return {"text": tok.apply_chat_template(msgs, tokenize=False)}
    return ds.map(format_prompt)

def tokenize_dataset(ds, tok):
    def tok_fn(batch):
        out = tok(batch["text"], padding="max_length", truncation=True, max_length=96)
        out["labels"] = [[-100 if tid == tok.pad_token_id else tid for tid in ids]
                         for ids in out["input_ids"]]
        return out
    return ds.map(tok_fn, batched=True, remove_columns=ds["train"].column_names)

## Finetune QLoRA here

In [6]:
def prepare_lora_model(model):
    model = prepare_model_for_kbit_training(model)
    lora_cfg = LoraConfig(
        r=8,
        lora_alpha=16,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.1,
        bias="none",
        task_type=TaskType.CAUSAL_LM
    )
    return get_peft_model(model, lora_cfg)

##Train the model on LoRA

In [9]:
def run_training(model, tok_ds, tok, out_dir):
    args = TrainingArguments(
        output_dir=out_dir,
        eval_strategy="steps",
        eval_steps=500,
        logging_steps=500,
        save_steps=500,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=2,
        gradient_checkpointing=True,
        fp16=True,
        learning_rate=1e-4,
        max_grad_norm=1.0,
        report_to="none"
    )
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=tok_ds["train"],
        eval_dataset=tok_ds["test"],
        tokenizer=tok
    )
    trainer.train()
    return trainer


##Call for QLoRA training

In [10]:
df = load_sms_data()
ds = build_dataset(df)
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
base_model, tok = load_base_model(model_id)

lora_model = prepare_lora_model(base_model)
formatted_ds = apply_chat_format(ds, tok)
tok_ds = tokenize_dataset(formatted_ds, tok)
lora_trainer = run_training(lora_model, tok_ds, tok, "./results_lora")
lora_trainer.save_model("./trained/lora_model")
tok.save_pretrained("./trained/lora_model")

Map:   0%|          | 0/5572 [00:00<?, ? examples/s]

The following TP rules were not applied on any of the layers: {'layers.*.self_attn.q_proj': 'colwise', 'layers.*.self_attn.k_proj': 'colwise', 'layers.*.self_attn.v_proj': 'colwise', 'layers.*.self_attn.o_proj': 'rowwise', 'layers.*.mlp.gate_proj': 'colwise', 'layers.*.mlp.up_proj': 'colwise', 'layers.*.mlp.down_proj': 'rowwise'}
The following layers were not sharded: model.layers.*.input_layernorm.weight, model.norm.weight, model.layers.*.post_attention_layernorm.weight, lm_head.weight, model.embed_tokens.weight


Map:   0%|          | 0/5293 [00:00<?, ? examples/s]

Map:   0%|          | 0/279 [00:00<?, ? examples/s]

Map:   0%|          | 0/5293 [00:00<?, ? examples/s]

Map:   0%|          | 0/279 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
500,1.7934,1.718554
1000,1.6698,1.696676


('./trained/lora_model/tokenizer_config.json',
 './trained/lora_model/special_tokens_map.json',
 './trained/lora_model/chat_template.jinja',
 './trained/lora_model/tokenizer.model',
 './trained/lora_model/added_tokens.json',
 './trained/lora_model/tokenizer.json')

##Promp-tuning

In [11]:
def prepare_prompt_model(model_id):
    base = AutoModelForCausalLM.from_pretrained(model_id,
                                                device_map="auto", torch_dtype=torch.float16)
    base = prepare_model_for_kbit_training(base)
    prompt_cfg = PromptTuningConfig(
        task_type=TaskType.CAUSAL_LM,
        num_virtual_tokens=8,
        tokenizer_name_or_path=model_id
    )
    prompt_model = get_peft_model(base, prompt_cfg)
    prompt_model.print_trainable_parameters()
    return prompt_model

prompt_model = prepare_prompt_model(model_id)
prompt_trainer = run_training(prompt_model, tok_ds, tok, "./results_prompt")
prompt_trainer.save_model("./trained/prompt_model")
tok.save_pretrained("./trained/prompt_model")

The following TP rules were not applied on any of the layers: {'layers.*.self_attn.q_proj': 'colwise', 'layers.*.self_attn.k_proj': 'colwise', 'layers.*.self_attn.v_proj': 'colwise', 'layers.*.self_attn.o_proj': 'rowwise', 'layers.*.mlp.gate_proj': 'colwise', 'layers.*.mlp.up_proj': 'colwise', 'layers.*.mlp.down_proj': 'rowwise'}
The following layers were not sharded: model.layers.*.input_layernorm.weight, model.norm.weight, model.layers.*.post_attention_layernorm.weight, lm_head.weight, model.embed_tokens.weight
  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 16,384 || all params: 1,100,064,768 || trainable%: 0.0015


Step,Training Loss,Validation Loss
500,5.0272,4.442979
1000,4.2194,4.044755


('./trained/prompt_model/tokenizer_config.json',
 './trained/prompt_model/special_tokens_map.json',
 './trained/prompt_model/chat_template.jinja',
 './trained/prompt_model/tokenizer.model',
 './trained/prompt_model/added_tokens.json',
 './trained/prompt_model/tokenizer.json')

#Testing models

In [12]:
def classify_sms(text, model, tok):
    prompt = tok.apply_chat_template(
        [{"role": "user", "content": f"Is this SMS spam or ham? '{text}' Answer with 'Spam' or 'Ham' and explain."}],
        tokenize=False, add_generation_prompt=True
    )
    inputs = tok(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=80, do_sample=True, temperature=0.3, repetition_penalty=1.2)
    return tok.decode(out[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()

def spam_rules(text, llm_out):
    lower = text.lower()
    spam_words = ["free", "win", "urgent", "claim"]
    spam_patterns = ["click here", "call now"]
    has_kw = any(w in lower for w in spam_words)
    has_pat = any(p in lower for p in spam_patterns)
    caps_ratio = sum(1 for c in text if c.isupper()) / max(len(text),1)
    too_caps = caps_ratio > 0.3
    is_spam = "spam" in llm_out.lower() and "ham" not in llm_out.lower()

    if has_kw and (has_pat or too_caps):
        return True, 0.9, "Strong spam indicators"
    if has_kw or has_pat:
        return True, 0.8, "Moderate indicators"
    if is_spam:
        return True, 0.7, "LLM classified as spam"
    return False, 0.6, "Likely ham"


##Run on samples

In [13]:
examples = [
    "Congratulations! You've won a FREE iPhone! Click here now.",
    "Hey, are we still meeting for lunch?",
    "URGENT: Your account will be closed. Call immediately!",
    "Thanks for the birthday wishes.",
    "LIMITED TIME OFFER: Get 50% off. Act now!"
]

lora_loaded = AutoModelForCausalLM.from_pretrained("./trained/lora_model").to("cuda")
prompt_loaded = PeftModel.from_pretrained(
    AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16),
    "./trained/prompt_model"
)

for sms in examples:
    l_out = classify_sms(sms, lora_loaded, tok)
    is_spam, conf, reason = spam_rules(sms, l_out)
    print(f"\n[LoRA] {sms} -> {l_out} | Spam? {is_spam} ({conf}) {reason}")

    p_out = classify_sms(sms, prompt_loaded, tok)
    is_spam, conf, reason = spam_rules(sms, p_out)
    print(f"[Prompt] {sms} -> {p_out} | Spam? {is_spam} ({conf}) {reason}")


The following TP rules were not applied on any of the layers: {'layers.*.self_attn.q_proj': 'colwise', 'layers.*.self_attn.k_proj': 'colwise', 'layers.*.self_attn.v_proj': 'colwise', 'layers.*.self_attn.o_proj': 'rowwise', 'layers.*.mlp.gate_proj': 'colwise', 'layers.*.mlp.up_proj': 'colwise', 'layers.*.mlp.down_proj': 'rowwise'}
The following layers were not sharded: model.layers.*.input_layernorm.weight, model.norm.weight, model.layers.*.post_attention_layernorm.weight, lm_head.weight, model.embed_tokens.weight
The following TP rules were not applied on any of the layers: {'layers.*.self_attn.q_proj': 'colwise', 'layers.*.self_attn.k_proj': 'colwise', 'layers.*.self_attn.v_proj': 'colwise', 'layers.*.self_attn.o_proj': 'rowwise', 'layers.*.mlp.gate_proj': 'colwise', 'layers.*.mlp.up_proj': 'colwise', 'layers.*.mlp.down_proj': 'rowwise'}
The following layers were not sharded: model.layers.*.input_layernorm.weight, model.norm.weight, model.layers.*.post_attention_layernorm.weight, lm_h


[LoRA] Congratulations! You've won a FREE iPhone! Click here now. -> This is spam. It means you have not been selected for the prize. Please do not reply to it as it will be considered spam. If you are interested in claiming your free iphone, please visit http://www.iphonemasterclass.com/win-free-ipod-touch-and-phone/ . Thank you. HAM. | Spam? True (0.9) Strong spam indicators




[Prompt] Congratulations! You've won a FREE iPhone! Click here now. -> Their new home, their old. I have no idea what to do at all, but they did it for the first time. They also sent out a few e-mails about how to do again, because of that. The exact, not even, so you had already done once, which is on my behing, then, so I was there too, and then they were gone | Spam? True (0.9) Strong spam indicators

[LoRA] Hey, are we still meeting for lunch? -> This is ham.

Answer: Ham

Sent: 12/04/2019 at 3:56 pm

From: [Name]
To: [Recipient]
Subject: Hey, are we still meeting for lunch?

Reply-to: [Recipient]
Message: Spam

I | Spam? False (0.6) Likely ham
[Prompt] Hey, are we still meeting for lunch? -> This is the final message from my office to send me a few times. I will be getting paid to pay for an extra $100. And then, too. It was about 2-3 years ago, but now it was about 5. The last time was 6, so now, and so: 7. A new one, 8: more than that, | Spam? False (0.6) Likely ham

[LoRA] URGE

##Run on test set

In [14]:
def eval_with_metrics(model, data_split, tokenizer, original_ds):
    tp = fp = fn = tn = 0
    n = len(original_ds)

    for i in range(n):
        entry = original_ds[i]
        q_text = entry['question']
        true_cls = "spam" if "spam" in entry['answer'].lower() else "ham"

        llm_resp = classify_sms(q_text, model, tokenizer)
        is_spam, _, _ = spam_rules(q_text, llm_resp)
        pred_cls = "spam" if is_spam else "ham"

        if true_cls == "spam" and pred_cls == "spam":
            tp += 1
        elif true_cls == "ham" and pred_cls == "spam":
            fp += 1
        elif true_cls == "spam" and pred_cls == "ham":
            fn += 1
        else:
            tn += 1

        print(f"\nQuestion: {q_text}")
        print(f"True: {true_cls} | Pred: {pred_cls} | LLM: {llm_resp}")

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0
    acc = (tp + tn) / n

    print("\nEvaluation metrics on test set:")
    print(f"Accuracy:  {acc:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall:    {recall:.2f}")
    print(f"F1 Score:  {f1:.2f}")

In [15]:
print("Evaluating LoRA model\n")
eval_with_metrics(lora_loaded, tok_ds["test"], tok, formatted_ds["test"])

print("Evaluating Prompt Tuned model\n")
eval_with_metrics(prompt_loaded, tok_ds["test"], tok, formatted_ds["test"])


Evaluating LoRA model


Question: Is this SMS spam or ham? 'Sorry, I can't help you on this.'
True: ham | Pred: ham | LLM: This is ham.

Question: Is this SMS spam or ham? 'The fact that you're cleaning shows you know why i'm upset. Your priority is constantly "what i want to do," not "what i need to do."'
True: ham | Pred: ham | LLM: This is ham. It means the message has been sent as a result of an error in your network settings, rather than being spam. You can try resetting your phone's security settings by going into Settings > Security & Location Services > Reset Network Settings. If it doesn't work, please contact customer support for assistance.

Question: Is this SMS spam or ham? 'Yes I know the cheesy songs from frosty the snowman :)'
True: ham | Pred: ham | LLM: This is ham.

Question: Is this SMS spam or ham? 'You are not bothering me but you have to trust my answers. Pls.'
True: ham | Pred: ham | LLM: This is ham. It means that the sender has decided not to answer your quest