# Task - Fine tuning Llama2 7B instruct model on twitter sentiment classification dataset - custom instruction added to the data 

# Importing libraries

In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch
import pandas as pd
from datasets import load_dataset, Dataset
from trl import SFTTrainer

In [2]:
pwd

'/home/pragyan/Desktop/Notebooks/Assignment3/Notebook-Assignment3'

# Setting up the model and path directories

In [3]:
# setting up the model 
base_model = "NousResearch/Llama-2-7b-chat-hf"
fine_tuned_model = "llama2-finetunedSentimentClassificationOneInstruction"
output_dir_forArgs = "/home/pragyan/Desktop/Notebooks/Assignment3/FineTunedModels-Assignment3/TunedOnSentimentDataOnly/FromArgs"
output_dir_forSave = "/home/pragyan/Desktop/Notebooks/Assignment3/FineTunedModels-Assignment3/TunedOnSentimentDataOnly/FromSave"


# loading the dataset
dataset_name = "carblacac/twitter-sentiment-analysis"

In [4]:
training_dataset = load_dataset(dataset_name, split="train[0:7000]")
testing_dataset = load_dataset(dataset_name, split="test[-50:]")

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [5]:
print(len(training_dataset))
print(len(testing_dataset))

7000
50


In [6]:
print(training_dataset[0])
print(testing_dataset[0])

{'text': '@fa6ami86 so happy that salman won.  btw the 14sec clip is truely a teaser', 'feeling': 0}
{'text': '@WULFFBOY going to see The Roots in SF', 'feeling': 1}


# Creating the instruction dataset for the fine tuning process

In [7]:
instructionPrompt = "Your task is to classify the the text into one of two feelings. Each feeling has two possible values: 0 indicates the text has a negative sentiment, while 1 indicates a positive feeling."

In [8]:
# store all the instruct here 
training_instances = []

# loop to create the instruction training data for fine tuning 
for i in training_dataset:
    training_input = instructionPrompt + " ### Text: " + i['text'] + " ### Sentiment: " + str(i['feeling'])
    training_instances.append(training_input)

In [9]:
print(len(training_instances))
print(training_instances[10])

7000
Your task is to classify the the text into one of two feelings. Each feeling has two possible values: 0 indicates the text has a negative sentiment, while 1 indicates a positive feeling. ### Text: About to go to bed. Sleeping really late tomorrow!  I am so glad the Tigers won tonight!! ### Sentiment: 1


In [10]:
df = pd.DataFrame()
df['originalData'] = training_dataset
df['instructionInputForFineTuning'] = training_instances
makeDataset = Dataset.from_pandas(df)

In [11]:
df

Unnamed: 0,originalData,instructionInputForFineTuning
0,{'text': '@fa6ami86 so happy that salman won. ...,Your task is to classify the the text into one...
1,{'text': '@phantompoptart .......oops.... I gu...,Your task is to classify the the text into one...
2,{'text': '@bradleyjp decidedly undecided. Depe...,Your task is to classify the the text into one...
3,{'text': '@Mountgrace lol i know! its so frust...,Your task is to classify the the text into one...
4,{'text': '@kathystover Didn't go much of any w...,Your task is to classify the the text into one...
...,...,...
6995,{'text': 'Will someone please enlighten me as ...,Your task is to classify the the text into one...
6996,{'text': 'Ã?Ã¡Â»â¢i phÃÂ°ÃÂ¡ng ÃÂ¡n: mua ...,Your task is to classify the the text into one...
6997,"{'text': 'Not liking Setanta Sports Ah well, ...",Your task is to classify the the text into one...
6998,{'text': '@asafrommaui Awesome! Happy Aloha Fr...,Your task is to classify the the text into one...


# Loading the model and setting up the quantization

In [12]:
#setting up bitsandbites configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

In [13]:
# load the tokenizer 
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [14]:
# load the model 
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



# Setting up the LORA Configuration 

In [15]:
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        'q_proj',
        'k_proj',
        'v_proj',
        'dense',
        'fc1',
        'fc2',
    ]
)
model = get_peft_model(model, peft_config)
model.get_memory_footprint()

4405092352

# Set the training parameters

In [16]:
training_arguments = TrainingArguments(
    output_dir= output_dir_forArgs,
    num_train_epochs=1,
    per_device_train_batch_size=3,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_strategy="steps",
    save_steps=5000,
    logging_steps=1000,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    disable_tqdm=False,
    report_to="none",
)

In [17]:
trainer = SFTTrainer(
    model=model,
    train_dataset=makeDataset,
    peft_config=peft_config,
    max_seq_length= 2048,
    dataset_text_field="instructionInputForFineTuning",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)

Map:   0%|          | 0/7000 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


# Training Begins

In [18]:
trainer.train()



Step,Training Loss
1000,1.1727
2000,1.1056


TrainOutput(global_step=2334, training_loss=1.1362032461288962, metrics={'train_runtime': 1466.6242, 'train_samples_per_second': 4.773, 'train_steps_per_second': 1.591, 'total_flos': 2.0396907182628864e+16, 'train_loss': 1.1362032461288962, 'epoch': 1.0})

# Saving the fine tuned model 

In [19]:
trainer.save_model(output_dir_forSave)
trainer.save_model(fine_tuned_model)
trainer.tokenizer.save_pretrained(fine_tuned_model)

('llama2-finetunedSentimentClassificationOneInstruction/tokenizer_config.json',
 'llama2-finetunedSentimentClassificationOneInstruction/special_tokens_map.json',
 'llama2-finetunedSentimentClassificationOneInstruction/tokenizer.json')

# Testing the new fine tuned model

In [28]:
# test case
j = testing_dataset[3]
testPropmpt = instructionPrompt + " ### Text: " + j['text'] + " ### Sentiment: "
testPropmptAll = instructionPrompt + " ### Text: " + j['text'] + " ### Sentiment: " + str(j['feeling'])

In [29]:
print(testPropmpt)
print("----------")
print(testPropmptAll)

Your task is to classify the the text into one of two feelings. Each feeling has two possible values: 0 indicates the text has a negative sentiment, while 1 indicates a positive feeling. ### Text: I hate the V plotarc on True Blood  that poor chubby vampire guy getting staked was so sad ### Sentiment: 
----------
Your task is to classify the the text into one of two feelings. Each feeling has two possible values: 0 indicates the text has a negative sentiment, while 1 indicates a positive feeling. ### Text: I hate the V plotarc on True Blood  that poor chubby vampire guy getting staked was so sad ### Sentiment: 0


In [30]:
pipe = pipeline(task="text-generation",
                model=base_model,
                tokenizer=tokenizer,
                max_new_tokens=1)
result = pipe(f"{testPropmpt}")
print(result[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Your task is to classify the the text into one of two feelings. Each feeling has two possible values: 0 indicates the text has a negative sentiment, while 1 indicates a positive feeling. ### Text: I hate the V plotarc on True Blood  that poor chubby vampire guy getting staked was so sad ### Sentiment: 0


In [31]:
pipe1 = pipeline(task="text-generation",
                model=fine_tuned_model,
                tokenizer=fine_tuned_model,
                max_new_tokens=1)
result1 = pipe1(f"{testPropmpt}")
print(result1[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Your task is to classify the the text into one of two feelings. Each feeling has two possible values: 0 indicates the text has a negative sentiment, while 1 indicates a positive feeling. ### Text: I hate the V plotarc on True Blood  that poor chubby vampire guy getting staked was so sad ### Sentiment: 0
