In [1]:
pip install torch transformers datasets peft accelerate bitsandbytes

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os, shutil, pathlib

# 1) Put HF caches on a normal, writeable disk (not inside your repo folder)
CACHE_ROOT = "/tmp/hf_cache"     # or "/root/.cache/huggingface"
os.environ["HF_HOME"] = CACHE_ROOT
os.environ["TRANSFORMERS_CACHE"] = f"{CACHE_ROOT}/transformers"
os.environ["HF_DATASETS_CACHE"]  = f"{CACHE_ROOT}/datasets"
for p in (os.environ["TRANSFORMERS_CACHE"], os.environ["HF_DATASETS_CACHE"]):
    pathlib.Path(p).mkdir(parents=True, exist_ok=True)

# 2) (Optional) Nuke any corrupted partial downloads from the default place
shutil.rmtree(os.path.expanduser("~/.cache/huggingface"), ignore_errors=True)

# 3) Faster/more reliable downloads
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"   # uses rust-based downloader when available

In [5]:
!pip -q install hf_transfer

In [3]:
!nvidia-smi


Mon Sep 22 11:00:07 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.172.08             Driver Version: 570.172.08     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:1E.0 Off |                    0 |
| N/A   34C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_id = "tiiuae/falcon-7b"

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    num_labels=2,              # spam vs not spam
    load_in_4bit=True,         # fits in 16GB VRAM
    device_map="auto"
)


tokenizer_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of FalconForSequenceClassification were not initialized from the model checkpoint at tiiuae/falcon-7b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# 0) After you create tokenizer & model
from transformers import DataCollatorWithPadding

# If the tokenizer has no pad token, use EOS or add a new one
if tokenizer.pad_token is None:
    if tokenizer.eos_token is not None:
        tokenizer.pad_token = tokenizer.eos_token
    else:
        tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        model.resize_token_embeddings(len(tokenizer))  # only needed when we *add* a token

# Make sure model knows the pad id
model.config.pad_token_id = tokenizer.pad_token_id

# (optional) set padding side for decoder-style models
tokenizer.padding_side = "right"  # or "left" if your model prefers it

In [8]:
from datasets import load_dataset

dataset = load_dataset("sms_spam", split="train")

def format_example(example):
    return {
        "text": example["sms"],
        "label": int(example["label"] == "spam")  # spam=1, ham=0
    }

dataset = dataset.map(format_example, remove_columns=dataset.column_names)

def tokenize(batch):
    out = tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",   # or use dynamic padding via a DataCollator
        max_length=128,
    )
    out["labels"] = batch["label"]            # Trainer expects 'labels'
    return out

encoded_dataset = dataset.map(tokenize, batched=True)

README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/359k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5574 [00:00<?, ? examples/s]

Map:   0%|          | 0/5574 [00:00<?, ? examples/s]

Map:   0%|          | 0/5574 [00:00<?, ? examples/s]

In [9]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05
)

model = get_peft_model(model, lora_config)

In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding
import torch

print("bf16 supported:", torch.cuda.is_bf16_supported())  # should be True

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

training_args = TrainingArguments(
    output_dir="./falcon_spam_lora",
    per_device_train_batch_size=4,
    learning_rate=2e-4,
    num_train_epochs=3,
     eval_strategy="steps",
    eval_steps=100,
    save_strategy="epoch",
    logging_steps=50,
    fp16=False,          # <- turn off fp16
    bf16=True,           # <- use bf16 to match bnb_4bit_compute_dtype
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset,
    eval_dataset=encoded_dataset.select(range(500)),
    data_collator=data_collator,
)
trainer.train()

bf16 supported: True


Step,Training Loss,Validation Loss
100,0.0,


In [None]:
import torch

text = "Congratulations! You have won a free gift. Click here!"
inputs = tokenizer(text, return_tensors="pt").to("cuda")
outputs = model(**inputs)
pred = torch.argmax(outputs.logits, dim=-1).item()

print("Spam" if pred == 1 else "Not Spam")