# Finetuning using LoRA for sentiment analysis

In [14]:
#importting necessary library
from datasets import load_dataset, DatasetDict, Dataset
from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification, DataCollatorWithPadding,TrainingArguments,TrainerState,Trainer
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
import evaluate
import torch
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [8]:
#Using the base model distilbert-base-uncased
model_checkpoint = 'distilbert-base-uncased'


In [11]:
#Defining the labels for mapping
id2label = {0:"Negative", 1:"Possitive"}
label2id = {"Negative":0, "Possitive":1}

In [15]:
model = AutoModelForSequenceClassification.from_pretrained(
model_checkpoint,num_labels=2,id2label=id2label,label2id=label2id).

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
#load the dataset 
dataset = load_dataset("shawhin/imdb-truncated")
dataset

DatasetDict({
    train: Dataset({
        features: ['label', 'text'],
        num_rows: 1000
    })
    validation: Dataset({
        features: ['label', 'text'],
        num_rows: 1000
    })
})

In [18]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint,add_prefix_space=True)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [19]:
def tokenize_function(inputs):
    text = inputs['text']
    
    tokenizer.truncation_side="left"
    tokenized_inputs = tokenizer(
    text,
    return_tensors="np",
    truncation=True,
    max_length=512)
    
    return tokenized_inputs

In [20]:
#If none exists then add pad token
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token':'[PAD]'})
    model.resize_token_embeddings(len(tokenizer))
    

In [21]:
tokenized_dataset = dataset.map(tokenize_function,batched=True)
tokenized_dataset

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 1000
    })
    validation: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 1000
    })
})

In [22]:
#data collatorto dinamically pad inputs in each batch during training \
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
data_collator

DataCollatorWithPadding(tokenizer=DistilBertTokenizerFast(name_or_path='distilbert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=True, padding_side='right', truncation_side='left', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}, padding=True, max_length=None, pad_to_multiple_of=None, return_tensors='pt')

In [23]:
!nvidia-smi

Sat Jan 18 15:08:51 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 566.14                 Driver Version: 566.14         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce GTX 1650 Ti   WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   52C    P8              2W /   50W |      75MiB /   4096MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [37]:
accuracy = evaluate.load('accuracy')

In [38]:
def metrics(p):
    predictions,labels=p
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy":accuracy.compute(predictions=predictions, references=labels)}

In [26]:
#list of inputs 
sentiment_examples = [ "Oh, great! Another meeting. Just what I needed.",
    "The movie was amazing, but the ending was terrible.",
    "I’m not sure if I like it or not.",
    "I just love waiting in long lines for hours!",
    "If this app worked better, I’d love it.",
    "It’s not the best thing I’ve ever seen, but I guess it’s okay.",
    "This is the worst movie ever made! It’s unbearable!",
    "I’m so excited to be stuck in traffic for an hour!",
    "The food was great, but the service was terrible.",
    "This is hands down the best pizza in the world."]

In [27]:
print("Predictions of the untrained models")
print("*----------------------------------*")
for sentiment in sentiment_examples:
    inputs = tokenizer.encode(sentiment,return_tensors="pt")
    logits = model(inputs).logits
    predictions = torch.argmax(logits)
    
    print(sentiment+" = " + id2label[predictions.tolist()])
    

Predictions of the untrained models
*----------------------------------*
Oh, great! Another meeting. Just what I needed. = Negative
The movie was amazing, but the ending was terrible. = Negative
I’m not sure if I like it or not. = Negative
I just love waiting in long lines for hours! = Possitive
If this app worked better, I’d love it. = Negative
It’s not the best thing I’ve ever seen, but I guess it’s okay. = Negative
This is the worst movie ever made! It’s unbearable! = Possitive
I’m so excited to be stuck in traffic for an hour! = Negative
The food was great, but the service was terrible. = Negative
This is hands down the best pizza in the world. = Negative


In [29]:
# now finetuning the base model using peft
peft_config = LoraConfig(task_type="SEQ_CLS",
                        r = 4,
                        lora_alpha=32,
                        lora_dropout = 0.01,
                        target_modules=['q_lin']
                        )

In [30]:
model = get_peft_model(model,peft_config=peft_config)
model.print_trainable_parameters()


trainable params: 628,994 || all params: 67,584,004 || trainable%: 0.9307


In [31]:
#defining hypterparameters
lr = 0.001
batch_size = 4
num_epochs = 10


In [32]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [33]:
#define training arguments
training_args = TrainingArguments(
output_dir = model_checkpoint+"-lora-text-classification",
learning_rate=lr,
per_device_eval_batch_size=batch_size,
per_device_train_batch_size=batch_size,
num_train_epochs=num_epochs,
weight_decay=0.01,
evaluation_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
)



In [34]:
model.to(device)

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): DistilBertForSequenceClassification(
      (distilbert): DistilBertModel(
        (embeddings): Embeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (transformer): Transformer(
          (layer): ModuleList(
            (0-5): 6 x TransformerBlock(
              (attention): MultiHeadSelfAttention(
                (dropout): Dropout(p=0.1, inplace=False)
                (q_lin): lora.Linear(
                  (base_layer): Linear(in_features=768, out_features=768, bias=True)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.01, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=76

In [35]:
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset['train'],
eval_dataset=tokenized_dataset['validation'],
tokenizer= tokenizer,
data_collator=data_collator,
compute_metrics=metrics
)

In [39]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.440025,{'accuracy': 0.881}
2,0.311600,0.558453,{'accuracy': 0.884}
3,0.311600,0.655431,{'accuracy': 0.886}
4,0.121900,0.83652,{'accuracy': 0.886}
5,0.121900,0.933128,{'accuracy': 0.876}
6,0.032100,1.073383,{'accuracy': 0.88}
7,0.032100,1.066068,{'accuracy': 0.888}
8,0.015900,1.097809,{'accuracy': 0.881}
9,0.015900,1.049472,{'accuracy': 0.887}
10,0.007600,1.051321,{'accuracy': 0.884}


Trainer is attempting to log a value of "{'accuracy': 0.881}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'accuracy': 0.884}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'accuracy': 0.886}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'accuracy': 0.886}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'accuracy': 0.876}" of type <class 'dict'> for key "eval/accuracy" as a scalar. This i

TrainOutput(global_step=2500, training_loss=0.09780876092910766, metrics={'train_runtime': 890.4965, 'train_samples_per_second': 11.23, 'train_steps_per_second': 2.807, 'total_flos': 1221911383617024.0, 'train_loss': 0.09780876092910766, 'epoch': 10.0})

In [41]:
#Now validaiton on the text lists
print(" Prediciton with trained model")

sentiment_example = ["The movie was absolutely amazing, with incredible performances from the cast!",
    "I couldn’t stand the movie, it was so boring and predictable.",
    "The visuals were stunning, but the storyline was a bit weak.",
    "A fun movie for the whole family, I really enjoyed it!",
    "I didn’t enjoy the movie at all, I thought it was a waste of time.",
    "The acting was decent, but the plot didn’t make much sense.",
    "It’s a classic! This movie will always be one of my favorites.",
    "I expected more from this film, it didn’t live up to the hype.",
    "The movie was fine, nothing extraordinary, but I didn’t hate it.",
    "What a disappointment! I was really looking forward to it, but it was terrible."]

for sentiment in sentiment_example:
    inputs = tokenizer.encode(sentiment, return_tensors="pt").to(device)
    logits = model(inputs).logits
    predictions = torch.max(logits,1).indices
    print(sentiment+" = "+id2label[predictions.tolist()[0]])
    

 Prediciton with trained model
Oh, great! Another meeting. Just what I needed. = Possitive
The movie was amazing, but the ending was terrible. = Negative
I’m not sure if I like it or not. = Possitive
I just love waiting in long lines for hours! = Possitive
If this app worked better, I’d love it. = Possitive
It’s not the best thing I’ve ever seen, but I guess it’s okay. = Possitive
This is the worst movie ever made! It’s unbearable! = Negative
I’m so excited to be stuck in traffic for an hour! = Possitive
The food was great, but the service was terrible. = Negative
This is hands down the best pizza in the world. = Possitive
