In [None]:
!pip install peft evaluate datasets transformers BitsandBytes torch torchinfo accelerate

In [None]:
!pip install git+https://github.com/huggingface/accelerate

In [8]:
import os
import pandas as pd
import numpy as np
import warnings
import re
import string
import torch
from transformers import AutoTokenizer, pipeline, DistilBertForSequenceClassification, BitsAndBytesConfig
from warnings import filterwarnings
from transformers import TrainingArguments, Trainer, AutoModelForSequenceClassification
from datasets import Dataset
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig,TaskType
import evaluate
import accelerate
from torchinfo import summary

In [4]:
filterwarnings(action = "ignore")

In [55]:
#Loading our pretrained LLM Model and create tokenizer and model variable which we will use for our Fine-tuning
#define label maps

id2label = {0: 'Negative', 1:'Positive'}
label2id = {'Negative':0, 'Positive':1}
LLM = "distilbert/distilbert-base-uncased"
token = AutoTokenizer.from_pretrained(LLM)
model = DistilBertForSequenceClassification.from_pretrained(LLM,num_labels = 2, id2label = id2label,label2id = label2id)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [56]:
#Let's see the parameters we have in our pretrained model. We have around 67 Million Trainable Parameters
from torchinfo import summary
summary(model)

Layer (type:depth-idx)                                  Param #
DistilBertForSequenceClassification                     --
├─DistilBertModel: 1-1                                  --
│    └─Embeddings: 2-1                                  --
│    │    └─Embedding: 3-1                              23,440,896
│    │    └─Embedding: 3-2                              393,216
│    │    └─LayerNorm: 3-3                              1,536
│    │    └─Dropout: 3-4                                --
│    └─Transformer: 2-2                                 --
│    │    └─ModuleList: 3-5                             42,527,232
├─Linear: 1-2                                           590,592
├─Linear: 1-3                                           1,538
├─Dropout: 1-4                                          --
Total params: 66,955,010
Trainable params: 66,955,010
Non-trainable params: 0

In [57]:
#We can see that we have approx 67 million parameters to train but we will not train all the parameters here we will use PEFT(Parameter Efficient Fine Tuning)
#to track those weights which we will update during fine tuning. We will use LORA and QLORA technique here which will reduce the trainable parameters.

In [58]:
#For the fine tuning first we will load our final data through Hugging Face Dataset library because we need our data in dataset object for fine tuning

In [6]:
Final_Data = Dataset.from_csv("/content/drive/MyDrive/Colab Notebooks/LLM Projects/Sentiment_Analysis_Dataset.csv",usecols = ["Reviews","Sentiment"])
Final_Data = Final_Data.rename_columns({"Reviews":"text","Sentiment":"labels"})

Generating train split: 0 examples [00:00, ? examples/s]

In [7]:
Final_Data = Final_Data.train_test_split(train_size = 0.8, test_size = 0.2,shuffle = True, seed = 10)

In [60]:
#Our Final Datset is ready with Datadict object
Final_Data

DatasetDict({
    train: Dataset({
        features: ['text', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 64723
    })
    test: Dataset({
        features: ['text', 'labels', 'input_ids', 'attention_mask'],
        num_rows: 16181
    })
})

In [9]:
##Defining the function for tokenization
def tokenizer(data):
    return token(data["text"],padding = "max_length",truncation=True,return_tensors = "pt")

In [10]:
##Applying the tokenizer function on our dataset to convert our data into numerical form
Final_Data = Final_Data.map(tokenizer,batched = True)

Map:   0%|          | 0/64723 [00:00<?, ? examples/s]

Map:   0%|          | 0/16181 [00:00<?, ? examples/s]

In [11]:
  ## Creating evaluation function to analyse the performance of the model.
accuracy_metric= evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    return accuracy

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [12]:
#Before fine tuning our LLM Model let's first check how the model is performing before fine tuning. We will take some small text randomly for which
#we know the sentiment and check whether the model is able to understand it or not.
text = ["Your support team is useless", "I hate watching anime movies","I love being in quiet place","I hate this product don't buy better to go with something else"]
predict = pipeline("sentiment-analysis",model=model,tokenizer=token,truncation = True)

In [24]:
##Creating lora object which will help us in increase the performance of our training while maintaining the efficiency.
lora_config = LoraConfig(task_type=TaskType.SEQ_CLS,r = 32,lora_alpha = 32,lora_dropout=0.1,target_modules=["q_lin", "v_lin"])
peft_model = get_peft_model(model,lora_config)

In [25]:
#Setting Training Arguments

training_args = TrainingArguments(output_dir="C:\\Users\\Abhinav Khandelwal\\Desktop\\Machine Learning\\LLM Projects\\Sentiment Analysis\\Fine_Tuning Checkpoints",
                                  per_device_train_batch_size = 50,
                                  per_device_eval_batch_size = 50,
                                  gradient_accumulation_steps = 2,
                                  num_train_epochs= 15,
                                  fp16=True,
                                  learning_rate = 1e-4,
                                  evaluation_strategy="epoch",
                                  save_strategy="epoch",
                                  weight_decay=0.001,
                                  optim="paged_adamw_32bit",
                                  gradient_checkpointing=True,
                                  logging_steps=3,
                                  warmup_ratio=0.03,
                                  lr_scheduler_type="cosine")

In [26]:
#We will create our Training object which we will train
trainer = Trainer(model = peft_model,train_dataset=Final_Data["train"],eval_dataset=Final_Data["test"],compute_metrics=compute_metrics,
                  args = training_args)

In [27]:
#Finally training our model
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
0,0.3102,0.254249,0.894259
2,0.2399,0.23019,0.904394
4,0.2358,0.21184,0.911625
6,0.1917,0.211293,0.913911
8,0.1797,0.208809,0.916383
10,0.2186,0.20987,0.917249
12,0.2115,0.210845,0.915704
14,0.1466,0.210557,0.916507


TrainOutput(global_step=9705, training_loss=0.2059569572708399, metrics={'train_runtime': 18957.519, 'train_samples_per_second': 51.212, 'train_steps_per_second': 0.512, 'total_flos': 1.3203201450376397e+17, 'train_loss': 0.2059569572708399, 'epoch': 14.988416988416988})

In [1]:
#From the training we can see that we have achieved 91.6% accuracy which is not that bad. And in the last epoch there is a tendency of overfitting.
#Which can be taken care of by increasing the weight decay parameter. Now we will save the model and test the model on the same text and on other texts

In [None]:
#Saving Model locally
peft_model.save_pretrained("C:\\Users\\Abhinav Khandelwal\\Desktop\\Machine Learning\\LLM Projects\\Sentiment Analysis\\Trained Model")

In [None]:
#Uploading fine tuned model to check the performance
id2label_FT = {0: 'Negative', 1:'Positive'}
label2id_FT = {'Negative':0, 'Positive':1}
fine_tune_model = AutoModelForSequenceClassification.from_pretrained("C:\\Users\\Abhinav Khandelwal\\Desktop\\Machine Learning\\LLM Projects\\Sentiment Analysis\\Trained Model",num_labels = 2, id2label = id2label_FT,label2id = label2id_FT)

In [40]:
#Pipeline has been created for prediction
pred = pipeline("sentiment-analysis",model=fine_tune_model,tokenizer=token,truncation = True)

In [4]:
# Same sample text were used earlier on pretrained model
text = ["Your support team is useless", "I hate watching anime movies","I love being in quiet place","I hate this product don't buy better to go with something else"]

In [14]:
#Prediction before fine-tuning
for i in text:
    pr = predict(i)
    lab = predict[0]["label"]
    print(f"{i} ------>{lab}")

Your support team is useless ------>Negative
I hate watching anime movies ------>Negative
I love being in quiet place ------>Negative
I hate this product don't buy better to go with something else ------>Negative


In [None]:
#Prediction after fine-tuning
for i in text:
    pr = pred(i)
    lab = pred[0]["label"]
    print(f"{i} ------>{lab}")

In [2]:
# From the above comparison we can conclude that the performace of the model have been improved.