In [18]:
pip install torch transformers datasets peft accelerate bitsandbytes



In [19]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_id = "prajjwal1/bert-mini"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=2)



config.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/45.1M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-mini and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
from datasets import load_dataset

dataset = load_dataset("sms_spam", split="train")

def format_example(example):
    return {
        "text": example["sms"],
        "label": int(example["label"] == "spam")  # spam=1, ham=0
    }

dataset = dataset.map(format_example, remove_columns=dataset.column_names)

def tokenize(batch):
    out = tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",   # or use dynamic padding via a DataCollator
        max_length=128,
    )
    out["labels"] = batch["label"]            # Trainer expects 'labels'
    return out

encoded_dataset = dataset.map(tokenize, batched=True)

model.safetensors:   0%|          | 0.00/45.1M [00:00<?, ?B/s]

Map:   0%|          | 0/5574 [00:00<?, ? examples/s]

In [21]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05
)

model = get_peft_model(model, lora_config)

In [22]:
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding
import torch

print("bf16 supported:", torch.cuda.is_bf16_supported())  # should be True

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

training_args = TrainingArguments(
    output_dir="./falcon_spam_lora",
    per_device_train_batch_size=4,
    learning_rate=2e-4,
    num_train_epochs=3,
     eval_strategy="steps",
    eval_steps=100,
    save_strategy="epoch",
    logging_steps=50,
    fp16=False,          # <- turn off fp16
    bf16=True,           # <- use bf16 to match bnb_4bit_compute_dtype
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset,
    eval_dataset=encoded_dataset.select(range(500)),
    data_collator=data_collator,
)
trainer.train()

bf16 supported: True


Step,Training Loss,Validation Loss
100,0.0087,0.004234
200,0.0021,0.001439
300,0.0011,0.000773
400,0.0006,0.000496
500,0.0005,0.00035
600,0.0004,0.000261
700,0.0003,0.000204
800,0.0002,0.000165
900,0.0002,0.000137
1000,0.0002,0.000116


TrainOutput(global_step=4182, training_loss=0.001923435793887594, metrics={'train_runtime': 179.6545, 'train_samples_per_second': 93.079, 'train_steps_per_second': 23.278, 'total_flos': 42276315322368.0, 'train_loss': 0.001923435793887594, 'epoch': 3.0})

In [24]:
import torch

text = "Congratulations! You have won a free gift. Click here!"
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model(**inputs)
pred = torch.argmax(outputs.logits, dim=-1).item()

print("Spam" if pred == 1 else "Not Spam")

Not Spam
