# Task - Fine tuning Llama2 7B instruct model on twitter sentiment classification dataset and alpaca dataset - Two instructions mixed - alpaca and custom - custom only used for the twitter dataset

# Import Libraries

In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch
import pandas as pd
from datasets import load_dataset, Dataset
from trl import SFTTrainer

In [2]:
# setting up the model 
base_model = "NousResearch/Llama-2-7b-chat-hf"
fine_tuned_model = "llama2-finetunedSentimentClassificationTwoInstruction"
output_dir_forArgs = "/home/pragyan/Desktop/Notebooks/Assignment3/FineTunedModels-Assignment3/TunedOnSentimentAndAlpaca/FromArgs"
output_dir_forSave = "/home/pragyan/Desktop/Notebooks/Assignment3/FineTunedModels-Assignment3/TunedOnSentimentAndAlpaca/FromSave"


# loading the dataset
dataset_name_twitter = "carblacac/twitter-sentiment-analysis"
dataset_name_alpaca = "tatsu-lab/alpaca"

In [3]:
training_dataset_twitter = load_dataset(dataset_name_twitter, split="train[0:4000]")
testing_dataset_twitter = load_dataset(dataset_name_twitter, split="test[-50:]")

training_dataset_alpaca = load_dataset(dataset_name_alpaca, split="train[0:4000]")
testing_dataset_alpaca = load_dataset(dataset_name_alpaca, split="train[-50:]")

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [4]:
print(len(training_dataset_twitter))
print(len(testing_dataset_twitter))

print(len(training_dataset_alpaca))
print(len(testing_dataset_alpaca))

4000
50
4000
50


In [5]:
print(training_dataset_twitter[0])
print(testing_dataset_twitter[0])

print(training_dataset_alpaca[0])
print(testing_dataset_alpaca[0])

{'text': '@fa6ami86 so happy that salman won.  btw the 14sec clip is truely a teaser', 'feeling': 0}
{'text': '@WULFFBOY going to see The Roots in SF', 'feeling': 1}
{'instruction': 'Give three tips for staying healthy.', 'input': '', 'output': '1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.', 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nGive three tips for staying healthy.\n\n### Response:\n1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.'}
{'instruction': 'Create a Scratch program that generates a random number between 0 and 100.', 'input': '', 'output': "The following Scratch program c

# Creating the instruction dataset for the fine tuning process

In [6]:
instructionPrompt = "Your task is to classify the the text into one of two feelings. Each feeling has two possible values: 0 indicates the text has a negative sentiment, while 1 indicates a positive feeling."

In [7]:
# store all the instruct here 
training_original = []
training_instances = []

# loop to create the instruction training data for fine tuning 
for i in training_dataset_twitter:
    training_input = instructionPrompt + " ### Text: " + i['text'] + " ### Sentiment: " + str(i['feeling'])
    training_instances.append(training_input)
    training_original.append(i)

for j in training_dataset_alpaca:
    training_instances.append(j['text'])
    training_original.append(j)

In [8]:
print(len(training_instances))
print(training_instances[10])

8000
Your task is to classify the the text into one of two feelings. Each feeling has two possible values: 0 indicates the text has a negative sentiment, while 1 indicates a positive feeling. ### Text: About to go to bed. Sleeping really late tomorrow!  I am so glad the Tigers won tonight!! ### Sentiment: 1


In [9]:
df = pd.DataFrame()
df['originalData'] = training_original
df['instructionInputForFineTuning'] = training_instances
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
makeDataset = Dataset.from_pandas(df)

In [25]:
makeDataset

Dataset({
    features: ['originalData', 'instructionInputForFineTuning'],
    num_rows: 8000
})

In [10]:
df

Unnamed: 0,originalData,instructionInputForFineTuning
0,{'text': 'is playing the Aion beta !!! Man I'...,Your task is to classify the the text into one...
1,{'text': 'just did a search for #wolframalpha ...,Your task is to classify the the text into one...
2,"{'text': 'Cool! Nice result Also, can I just ...",Your task is to classify the the text into one...
3,{'text': '@PandaMayhem i think. did you have 1...,Your task is to classify the the text into one...
4,{'instruction': 'Write a blog post about how A...,Below is an instruction that describes a task....
...,...,...
7995,{'instruction': 'Give five examples of an extr...,Below is an instruction that describes a task....
7996,{'instruction': 'Make a list of books and auth...,Below is an instruction that describes a task....
7997,{'text': '@MiniBlueDragon Has come on a long w...,Your task is to classify the the text into one...
7998,{'instruction': 'Write a review of a restauran...,"Below is an instruction that describes a task,..."


# Loading the model and setting up the quantization

In [12]:
#setting up bitsandbites configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False,
)

In [13]:
# load the tokenizer 
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [14]:
# load the model 
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Setting up the LORA Configuration

In [14]:
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        'q_proj',
        'k_proj',
        'v_proj',
        'dense',
        'fc1',
        'fc2',
    ]
)
model = get_peft_model(model, peft_config)
model.get_memory_footprint()

4405092352

# Set the training parameters

In [15]:
training_arguments = TrainingArguments(
    output_dir= output_dir_forArgs,
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_strategy="steps",
    save_steps=5000,
    logging_steps=1000,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    disable_tqdm=False,
    report_to="none",
)

In [16]:
trainer = SFTTrainer(
    model=model,
    train_dataset=makeDataset,
    peft_config=peft_config,
    max_seq_length= 2048,
    dataset_text_field="instructionInputForFineTuning",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


# Training Begins

In [17]:
trainer.train()



Step,Training Loss
1000,1.0392
2000,0.9478


TrainOutput(global_step=2000, training_loss=0.9934906005859375, metrics={'train_runtime': 1937.8869, 'train_samples_per_second': 4.128, 'train_steps_per_second': 1.032, 'total_flos': 3.221637556292813e+16, 'train_loss': 0.9934906005859375, 'epoch': 1.0})

# Saving the fine tuned model

In [18]:
trainer.save_model(output_dir_forSave)
trainer.save_model(fine_tuned_model)
trainer.tokenizer.save_pretrained(fine_tuned_model)

('llama2-finetunedSentimentClassificationTwoInstruction/tokenizer_config.json',
 'llama2-finetunedSentimentClassificationTwoInstruction/special_tokens_map.json',
 'llama2-finetunedSentimentClassificationTwoInstruction/tokenizer.json')

# Testing the new fine tuned model

In [3]:
testPrompt = "Your task is to classify the the text into one of two feelings. Each feeling has two possible values: 0 indicates the text has a negative sentiment, while 1 indicates a positive feeling. ### Text: I hate the V plotarc on True Blood  that poor chubby vampire guy getting staked was so sad. ### Sentiment: "

In [15]:
pipe = pipeline(task="text-generation",
                model=base_model,
                tokenizer=tokenizer,
                max_new_tokens=1)
result = pipe(f"{testPrompt}")
print(result[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Your task is to classify the the text into one of two feelings. Each feeling has two possible values: 0 indicates the text has a negative sentiment, while 1 indicates a positive feeling. ### Text: I hate the V plotarc on True Blood  that poor chubby vampire guy getting staked was so sad. ### Sentiment: 0


In [11]:
pipe1 = pipeline(task="text-generation",
                model=fine_tuned_model,
                tokenizer=fine_tuned_model,
                max_new_tokens=1)
result1 = pipe1(f"{testPrompt}")
print(result1[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Your task is to classify the the text into one of two feelings. Each feeling has two possible values: 0 indicates the text has a negative sentiment, while 1 indicates a positive feeling. ### Text: I hate the V plotarc on True Blood  that poor chubby vampire guy getting staked was so sad. ### Sentiment: 0
