In [2]:
import torch
from torch.utils.data import DataLoader
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
)
from datasets import load_dataset

In [3]:
# 1. Load SST-2 (a binary sentiment task)
raw_ds = load_dataset("glue", "sst2")  

# 2. Tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
def preprocess(batch):
    return tokenizer(batch["sentence"], truncation=True)

tokenized_ds = raw_ds.map(preprocess, batched=True)
tokenized_ds.set_format(type="torch", columns=["input_ids","attention_mask","label"])

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [4]:
from peft import LoraConfig, get_peft_model

# Wrap a pretrained BERT for classification
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased")

# LoRA config: low-rank adapters on Q/K/V projections
lora_cfg = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query","value"],
    lora_dropout=0.1,
    bias="none",
    task_type="SEQ_CLS"
)
model = get_peft_model(model, lora_cfg)  # LoRA injects adapters :contentReference[oaicite:2]{index=2}

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
# DataLoader
train_dl = DataLoader(
    tokenized_ds["train"],
    batch_size=128,
    shuffle=True,
    collate_fn=DataCollatorWithPadding(tokenizer),
    num_workers=4
)

# Optimizer & device
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
model.to(device)
optim = torch.optim.AdamW(model.parameters(), lr=1e-6)

# Training loop
model.train()
for epoch in range(3):
    for batch in train_dl:
        batch = {k:v.to(device) for k,v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        optim.zero_grad()
        loss.backward()
        optim.step()
        print(f"Epoch {epoch+1} loss: {loss.item():.4f}")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Epoch 1 loss: 0.5548
Epoch 1 loss: 0.6330
Epoch 1 loss: 0.6144
Epoch 1 loss: 0.5789
Epoch 1 loss: 0.5505
Epoch 1 loss: 0.5664
Epoch 1 loss: 0.6254
Epoch 1 loss: 0.5369
Epoch 1 loss: 0.5965
Epoch 1 loss: 0.5712
Epoch 1 loss: 0.5932
Epoch 1 loss: 0.5161
Epoch 1 loss: 0.6289
Epoch 1 loss: 0.6240
Epoch 1 loss: 0.5633
Epoch 1 loss: 0.5984
Epoch 1 loss: 0.6162
Epoch 1 loss: 0.6140
Epoch 1 loss: 0.5860
Epoch 1 loss: 0.5713
Epoch 1 loss: 0.5792
Epoch 1 loss: 0.6137
Epoch 1 loss: 0.5647
Epoch 1 loss: 0.5743
Epoch 1 loss: 0.6161
Epoch 1 loss: 0.5847
Epoch 1 loss: 0.6302
Epoch 1 loss: 0.5491
Epoch 1 loss: 0.6148
Epoch 1 loss: 0.5971
Epoch 1 loss: 0.5849
Epoch 1 loss: 0.5906
Epoch 1 loss: 0.5455
Epoch 1 loss: 0.5599
Epoch 1 loss: 0.5633
Epoch 1 loss: 0.6059
Epoch 1 loss: 0.5798
Epoch 1 loss: 0.5977
Epoch 1 loss: 0.5595
Epoch 1 loss: 0.5454
Epoch 1 loss: 0.5690
Epoch 1 loss: 0.5665
Epoch 1 loss: 0.6240
Epoch 1 loss: 0.5675
Epoch 1 loss: 0.5675
Epoch 1 loss: 0.5667
Epoch 1 loss: 0.6122
Epoch 1 loss:

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Epoch 2 loss: 0.5382
Epoch 2 loss: 0.5330
Epoch 2 loss: 0.5558
Epoch 2 loss: 0.5407
Epoch 2 loss: 0.5258
Epoch 2 loss: 0.5708
Epoch 2 loss: 0.5544
Epoch 2 loss: 0.5077
Epoch 2 loss: 0.5598
Epoch 2 loss: 0.5517
Epoch 2 loss: 0.5361
Epoch 2 loss: 0.5298
Epoch 2 loss: 0.5488
Epoch 2 loss: 0.5323
Epoch 2 loss: 0.5587
Epoch 2 loss: 0.5505
Epoch 2 loss: 0.5039
Epoch 2 loss: 0.5585
Epoch 2 loss: 0.5214
Epoch 2 loss: 0.5340
Epoch 2 loss: 0.5456
Epoch 2 loss: 0.5607
Epoch 2 loss: 0.5222
Epoch 2 loss: 0.5495
Epoch 2 loss: 0.5253
Epoch 2 loss: 0.5140
Epoch 2 loss: 0.5191
Epoch 2 loss: 0.5536
Epoch 2 loss: 0.5180
Epoch 2 loss: 0.5791
Epoch 2 loss: 0.5574
Epoch 2 loss: 0.4667
Epoch 2 loss: 0.5426
Epoch 2 loss: 0.5482
Epoch 2 loss: 0.6102
Epoch 2 loss: 0.5304
Epoch 2 loss: 0.5245
Epoch 2 loss: 0.5152
Epoch 2 loss: 0.5138
Epoch 2 loss: 0.5357
Epoch 2 loss: 0.5107
Epoch 2 loss: 0.4903
Epoch 2 loss: 0.4697
Epoch 2 loss: 0.5194
Epoch 2 loss: 0.5456
Epoch 2 loss: 0.5268
Epoch 2 loss: 0.5597
Epoch 2 loss:

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Epoch 3 loss: 0.4784
Epoch 3 loss: 0.4720
Epoch 3 loss: 0.5012
Epoch 3 loss: 0.5288
Epoch 3 loss: 0.4397
Epoch 3 loss: 0.5294
Epoch 3 loss: 0.4346
Epoch 3 loss: 0.4964
Epoch 3 loss: 0.4996
Epoch 3 loss: 0.5161
Epoch 3 loss: 0.4610
Epoch 3 loss: 0.4833
Epoch 3 loss: 0.4662
Epoch 3 loss: 0.4588
Epoch 3 loss: 0.4414
Epoch 3 loss: 0.5200
Epoch 3 loss: 0.4402
Epoch 3 loss: 0.5021
Epoch 3 loss: 0.5199
Epoch 3 loss: 0.5036
Epoch 3 loss: 0.4618
Epoch 3 loss: 0.5520
Epoch 3 loss: 0.4046
Epoch 3 loss: 0.4573
Epoch 3 loss: 0.4834
Epoch 3 loss: 0.5029
Epoch 3 loss: 0.4898
Epoch 3 loss: 0.4871
Epoch 3 loss: 0.4923
Epoch 3 loss: 0.5338
Epoch 3 loss: 0.4848
Epoch 3 loss: 0.4407
Epoch 3 loss: 0.4416
Epoch 3 loss: 0.5547
Epoch 3 loss: 0.4530
Epoch 3 loss: 0.4785
Epoch 3 loss: 0.4599
Epoch 3 loss: 0.4828
Epoch 3 loss: 0.4413
Epoch 3 loss: 0.4099
Epoch 3 loss: 0.4506
Epoch 3 loss: 0.5017
Epoch 3 loss: 0.5517
Epoch 3 loss: 0.4694
Epoch 3 loss: 0.5008
Epoch 3 loss: 0.4683
Epoch 3 loss: 0.4850
Epoch 3 loss: