In [16]:
# Libraries
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments, Trainer
import torch




MODEL_NAME = "roberta-base"

In [4]:
# Model Base

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME,num_labels=2)
print(model)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
         

In [9]:

# Load Dataset
dataset = load_dataset("imdb")
print(dataset["train"].column_names)
print(dataset["train"][0]["text"])
print(dataset["train"][0]["label"])

['text', 'label']
I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered "controversial" I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.<br /><br />What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few an

In [10]:
# Tokenization 
def tokenize_function(example):
    return tokenizer(example["text"],padding="max_length",truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
print(tokenized_datasets["train"][0])

Map: 100%|██████████| 25000/25000 [00:20<00:00, 1223.77 examples/s]
Map: 100%|██████████| 25000/25000 [00:23<00:00, 1066.00 examples/s]
Map: 100%|██████████| 50000/50000 [01:13<00:00, 678.04 examples/s]

{'text': 'I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered "controversial" I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and ordinary denizens of Stockholm about their opinions on politics, she has sex with her drama teacher, classmates, and married men.<br /><br />What kills me about I AM CURIOUS-YELLOW is that 40 years ago, this was considered pornographic. Really, the sex and nudity scenes are few and far be




In [12]:
# Configure Lore
lora_config = LoraConfig(
    task_type="SEQ_CLS",  # Secuencia de clasificación
    r=8,                  # Dimensión baja del adaptador
    lora_alpha=32,        # Escala
    lora_dropout=0.1,     # Dropout para regularización
    target_modules=["query", "value"]  # Solo aplicamos LoRA a Q y V en self-attention
)


### LoRA Configuration Parameters


| Parameter | Description | How to Choose / Rule of Thumb |
|-----------|-------------|-------------------------------|
| **task_type** | Type of task the model is fine-tuned for (e.g., sequence classification, generation). | `"SEQ_CLS"` for classification, `"SEQ_2_SEQ_LM"` for generation, `"CAUSAL_LM"` for causal decoders. |
| **r** | Low-rank dimension of the LoRA matrices (controls capacity). | Small models: 4–16; Large models: 16–64. Higher → more expressive but more parameters. |
| **lora_alpha** | Scaling factor for the LoRA update \(W + αBA\). | Usually 1–4 × `r`. Too low → weak updates; too high → unstable training. |
| **lora_dropout** | Dropout applied to the LoRA module for regularization. | Small datasets: 0.1–0.2; Large datasets: 0–0.1. Prevents overfitting. |
| **target_modules** | Specifies which layers are modified with LoRA. | Common: `["query", "value"]` for attention. Can include feed-forward (`"dense"`) or `"all"`. |
| **fan_in_fan_out** | Adjusts matrix orientation; required for some architectures like GPT. | Usually left as default unless the model needs it. |
| **merge_weights** | Whether to merge LoRA weights into the base model after training. | Merge after fine-tuning to reduce memory usage. |
| **bias** | Whether LoRA affects biases or only weight matrices. | Usually keep bias unchanged. |


In [None]:
# Add LoRA to the Model
model_lora = get_peft_model(model, lora_config)
model_lora.print_trainable_parameters()

trainable params: 887,042 || all params: 125,534,212 || trainable%: 0.7066


In [15]:
# Explore each module of our Model
for name, module in model_lora.named_modules():
    if "lora" in name.lower():
        print(name, module)


base_model.model.roberta.encoder.layer.0.attention.self.query.lora_dropout ModuleDict(
  (default): Dropout(p=0.1, inplace=False)
)
base_model.model.roberta.encoder.layer.0.attention.self.query.lora_dropout.default Dropout(p=0.1, inplace=False)
base_model.model.roberta.encoder.layer.0.attention.self.query.lora_A ModuleDict(
  (default): Linear(in_features=768, out_features=8, bias=False)
)
base_model.model.roberta.encoder.layer.0.attention.self.query.lora_A.default Linear(in_features=768, out_features=8, bias=False)
base_model.model.roberta.encoder.layer.0.attention.self.query.lora_B ModuleDict(
  (default): Linear(in_features=8, out_features=768, bias=False)
)
base_model.model.roberta.encoder.layer.0.attention.self.query.lora_B.default Linear(in_features=8, out_features=768, bias=False)
base_model.model.roberta.encoder.layer.0.attention.self.query.lora_embedding_A ParameterDict()
base_model.model.roberta.encoder.layer.0.attention.self.query.lora_embedding_B ParameterDict()
base_model.

In [18]:
#TRaining Arguments

training_args = TrainingArguments(
    output_dir="./results_lora",
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    save_total_limit=2,
    report_to="none")


In [None]:
# Definir columnas que el modelo espera
tokenized_datasets.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"]


In [20]:
trainer = Trainer(
    model=model_lora,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer
)


  trainer = Trainer(


In [21]:
trainer.train()




Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 