In [None]:
!pip install torch
!pip install safetensors



In [None]:
from lora_transformer import LoraConfig, LoraModel
from typing import Literal
import torch
from transformers import AutoTokenizer,AutoModelForSequenceClassification, get_linear_schedule_with_warmup, DataCollatorWithPadding
from datasets import load_dataset
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from sklearn.metrics import accuracy_score
from torch.cuda.amp import autocast, GradScaler
import os

In [None]:
device = torch.device("cuda")

In [None]:
print(device)

cuda


In [None]:
MODEL_NAME = "roberta-base"
TASK = "sst2"
BATCH_SIZE = 16
LR = 5e-4
EPOCHS = 60
MAX_LEN = 512
RANK = 8
ALPHA = 16
BIAS = "none"
DROPOUT = 0.0
TARGET_MODULES = []
EXCLUDE_MODULES =  ["classifier"]

In [None]:
dataset = load_dataset("glue", TASK)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [None]:
def preprocess_function(examples):
    return tokenizer(examples["sentence"], truncation=True, max_length=MAX_LEN)

In [None]:
tokenized_datasets = dataset.map(preprocess_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(["sentence", "idx"])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

In [None]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
NUM_WORKERS = os.cpu_count()

train_dataloader = DataLoader(tokenized_datasets["train"],
    shuffle=True,
    batch_size=BATCH_SIZE,
    collate_fn=data_collator,
    num_workers = NUM_WORKERS)
eval_dataloader = DataLoader(tokenized_datasets["validation"], batch_size=BATCH_SIZE, collate_fn=data_collator)

In [None]:
pretrained_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
lora_config = LoraConfig(rank = RANK, bias = BIAS, alpha = ALPHA,
                        dropout = DROPOUT,
                        target_modules = TARGET_MODULES , exclude_modules = EXCLUDE_MODULES)


In [None]:
model = LoraModel(pretrained_model, lora_config)
model.to(device)

LoraModel(
  (model): RobertaForSequenceClassification(
    (roberta): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50265, 768, padding_idx=1)
        (position_embeddings): Embedding(514, 768, padding_idx=1)
        (token_type_embeddings): Embedding(1, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0-11): 12 x RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSdpaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): RobertaSelfOutput(
                (dense): 

In [None]:
def count_parameters(model):
    trainable = model.get_n_trainable()
    # pretrained model total
    total = sum(p.numel() for p in model.model.parameters())
    print(f"Trainable Params: {trainable:,} || Total Params: {total:,} || %: {100 * trainable / total:.2f}%")

In [None]:
count_parameters(model)

Trainable Params: 592,130 || Total Params: 124,647,170 || %: 0.48%


In [None]:
optimizer = AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=LR,
)
num_training_steps = EPOCHS * len(train_dataloader)
scheduler = get_linear_schedule_with_warmup(optimizer,
    num_warmup_steps=int(0.06 * num_training_steps),
    num_training_steps=num_training_steps)

In [None]:
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0

    progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch + 1}/{EPOCHS}")

    for batch in progress_bar:
        batch = {k: v.to(device) for k, v in batch.items()}

        outputs = model(**batch)
        loss = outputs.loss

        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

        total_loss += loss.item()
        progress_bar.set_postfix({"loss": loss.item()})

    avg_train_loss = total_loss / len(train_dataloader)

    model.eval()
    preds = []
    labels = []

    for batch in eval_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)

        preds.extend(predictions.cpu().numpy())
        labels.extend(batch["labels"].cpu().numpy())

    acc = accuracy_score(labels, preds)
    print(f"\nEpoch {epoch + 1}. Train Loss: {avg_train_loss:.4f} | Val Accuracy: {acc:.4f}\n")

model.save_model("roberta_lora_sst2_withoutting.safetensors", merge_weights=False)
print("Model Saved.")

Epoch 1/60: 100%|██████████| 4210/4210 [02:31<00:00, 27.88it/s, loss=0.79]



Epoch 1. Train Loss: 0.6314 | Val Accuracy: 0.7638



Epoch 2/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.92it/s, loss=0.363]



Epoch 2. Train Loss: 0.5348 | Val Accuracy: 0.7236



Epoch 3/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.93it/s, loss=0.466]



Epoch 3. Train Loss: 0.5166 | Val Accuracy: 0.7420



Epoch 4/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.93it/s, loss=0.385]



Epoch 4. Train Loss: 0.5130 | Val Accuracy: 0.8360



Epoch 5/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.96it/s, loss=0.422]



Epoch 5. Train Loss: 0.5068 | Val Accuracy: 0.8360



Epoch 6/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.86it/s, loss=0.895]



Epoch 6. Train Loss: 0.5014 | Val Accuracy: 0.8085



Epoch 7/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.84it/s, loss=0.334]



Epoch 7. Train Loss: 0.4972 | Val Accuracy: 0.8337



Epoch 8/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.92it/s, loss=0.396]



Epoch 8. Train Loss: 0.4947 | Val Accuracy: 0.8349



Epoch 9/60: 100%|██████████| 4210/4210 [02:37<00:00, 26.81it/s, loss=0.418]



Epoch 9. Train Loss: 0.4951 | Val Accuracy: 0.8314



Epoch 10/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.90it/s, loss=0.756]



Epoch 10. Train Loss: 0.4919 | Val Accuracy: 0.8475



Epoch 11/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.85it/s, loss=0.149]



Epoch 11. Train Loss: 0.4907 | Val Accuracy: 0.8268



Epoch 12/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.87it/s, loss=0.198]



Epoch 12. Train Loss: 0.4896 | Val Accuracy: 0.7970



Epoch 13/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.85it/s, loss=0.284]



Epoch 13. Train Loss: 0.4907 | Val Accuracy: 0.8326



Epoch 14/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.90it/s, loss=0.13]



Epoch 14. Train Loss: 0.4888 | Val Accuracy: 0.8085



Epoch 15/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.95it/s, loss=0.243]



Epoch 15. Train Loss: 0.4869 | Val Accuracy: 0.8372



Epoch 16/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.83it/s, loss=0.208]



Epoch 16. Train Loss: 0.4894 | Val Accuracy: 0.7947



Epoch 17/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.86it/s, loss=0.23]



Epoch 17. Train Loss: 0.4872 | Val Accuracy: 0.7947



Epoch 18/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.95it/s, loss=0.725]



Epoch 18. Train Loss: 0.4888 | Val Accuracy: 0.8303



Epoch 19/60: 100%|██████████| 4210/4210 [02:35<00:00, 27.04it/s, loss=0.168]



Epoch 19. Train Loss: 0.4859 | Val Accuracy: 0.7993



Epoch 20/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.95it/s, loss=0.311]



Epoch 20. Train Loss: 0.4849 | Val Accuracy: 0.8188



Epoch 21/60: 100%|██████████| 4210/4210 [02:35<00:00, 27.02it/s, loss=0.47]



Epoch 21. Train Loss: 0.4835 | Val Accuracy: 0.8463



Epoch 22/60: 100%|██████████| 4210/4210 [02:35<00:00, 26.99it/s, loss=0.312]



Epoch 22. Train Loss: 0.4857 | Val Accuracy: 0.8360



Epoch 23/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.95it/s, loss=0.301]



Epoch 23. Train Loss: 0.4862 | Val Accuracy: 0.7810



Epoch 24/60: 100%|██████████| 4210/4210 [02:35<00:00, 27.03it/s, loss=0.855]



Epoch 24. Train Loss: 0.4880 | Val Accuracy: 0.8394



Epoch 25/60: 100%|██████████| 4210/4210 [02:35<00:00, 27.02it/s, loss=0.122]



Epoch 25. Train Loss: 0.4842 | Val Accuracy: 0.8303



Epoch 26/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.90it/s, loss=0.476]



Epoch 26. Train Loss: 0.4843 | Val Accuracy: 0.8337



Epoch 27/60: 100%|██████████| 4210/4210 [02:35<00:00, 26.99it/s, loss=0.246]



Epoch 27. Train Loss: 0.4841 | Val Accuracy: 0.8360



Epoch 28/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.97it/s, loss=0.36]



Epoch 28. Train Loss: 0.4828 | Val Accuracy: 0.8291



Epoch 29/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.94it/s, loss=0.407]



Epoch 29. Train Loss: 0.4834 | Val Accuracy: 0.8291



Epoch 30/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.92it/s, loss=0.192]



Epoch 30. Train Loss: 0.4835 | Val Accuracy: 0.8452



Epoch 31/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.96it/s, loss=0.39]



Epoch 31. Train Loss: 0.4797 | Val Accuracy: 0.8394



Epoch 32/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.99it/s, loss=0.407]



Epoch 32. Train Loss: 0.4840 | Val Accuracy: 0.8142



Epoch 33/60: 100%|██████████| 4210/4210 [02:35<00:00, 27.03it/s, loss=1.2]



Epoch 33. Train Loss: 0.4824 | Val Accuracy: 0.8234



Epoch 34/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.93it/s, loss=0.932]



Epoch 34. Train Loss: 0.4814 | Val Accuracy: 0.8417



Epoch 35/60: 100%|██████████| 4210/4210 [02:35<00:00, 27.04it/s, loss=0.348]



Epoch 35. Train Loss: 0.4803 | Val Accuracy: 0.8406



Epoch 36/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.89it/s, loss=0.311]



Epoch 36. Train Loss: 0.4791 | Val Accuracy: 0.8108



Epoch 37/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.96it/s, loss=0.163]



Epoch 37. Train Loss: 0.4787 | Val Accuracy: 0.8486



Epoch 38/60: 100%|██████████| 4210/4210 [02:35<00:00, 26.99it/s, loss=0.425]



Epoch 38. Train Loss: 0.4800 | Val Accuracy: 0.8452



Epoch 39/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.97it/s, loss=0.584]



Epoch 39. Train Loss: 0.4765 | Val Accuracy: 0.8417



Epoch 40/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.93it/s, loss=0.633]



Epoch 40. Train Loss: 0.4798 | Val Accuracy: 0.8372



Epoch 41/60: 100%|██████████| 4210/4210 [02:35<00:00, 27.04it/s, loss=0.406]



Epoch 41. Train Loss: 0.4807 | Val Accuracy: 0.8177



Epoch 42/60: 100%|██████████| 4210/4210 [02:35<00:00, 27.00it/s, loss=0.236]



Epoch 42. Train Loss: 0.4781 | Val Accuracy: 0.8429



Epoch 43/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.96it/s, loss=0.334]



Epoch 43. Train Loss: 0.4782 | Val Accuracy: 0.8142



Epoch 44/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.89it/s, loss=0.918]



Epoch 44. Train Loss: 0.4756 | Val Accuracy: 0.8417



Epoch 45/60: 100%|██████████| 4210/4210 [02:37<00:00, 26.81it/s, loss=0.519]



Epoch 45. Train Loss: 0.4789 | Val Accuracy: 0.8200



Epoch 46/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.86it/s, loss=0.798]



Epoch 46. Train Loss: 0.4753 | Val Accuracy: 0.8360



Epoch 47/60: 100%|██████████| 4210/4210 [02:35<00:00, 27.01it/s, loss=0.314]



Epoch 47. Train Loss: 0.4769 | Val Accuracy: 0.8463



Epoch 48/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.94it/s, loss=0.568]



Epoch 48. Train Loss: 0.4768 | Val Accuracy: 0.8440



Epoch 49/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.84it/s, loss=0.491]



Epoch 49. Train Loss: 0.4761 | Val Accuracy: 0.8452



Epoch 50/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.87it/s, loss=0.264]



Epoch 50. Train Loss: 0.4771 | Val Accuracy: 0.8165



Epoch 51/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.84it/s, loss=0.192]



Epoch 51. Train Loss: 0.4730 | Val Accuracy: 0.8337



Epoch 52/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.88it/s, loss=0.757]



Epoch 52. Train Loss: 0.4772 | Val Accuracy: 0.8177



Epoch 53/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.95it/s, loss=0.525]



Epoch 53. Train Loss: 0.4734 | Val Accuracy: 0.8326



Epoch 54/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.85it/s, loss=0.138]



Epoch 54. Train Loss: 0.4750 | Val Accuracy: 0.8463



Epoch 55/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.88it/s, loss=0.522]



Epoch 55. Train Loss: 0.4746 | Val Accuracy: 0.8268



Epoch 56/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.95it/s, loss=0.345]



Epoch 56. Train Loss: 0.4730 | Val Accuracy: 0.8349



Epoch 57/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.91it/s, loss=0.731]



Epoch 57. Train Loss: 0.4734 | Val Accuracy: 0.8383



Epoch 58/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.87it/s, loss=0.192]



Epoch 58. Train Loss: 0.4735 | Val Accuracy: 0.8291



Epoch 59/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.86it/s, loss=0.951]



Epoch 59. Train Loss: 0.4738 | Val Accuracy: 0.8394



Epoch 60/60: 100%|██████████| 4210/4210 [02:36<00:00, 26.84it/s, loss=0.804]



Epoch 60. Train Loss: 0.4702 | Val Accuracy: 0.8314

Model Saved.
