## Fine-tuning Llama2-7b using QLoRA method

_______________________________________________________________________________________________________________________________

#### Installing Libraries

In [1]:
%%capture

!pip install -q huggingface_hub
!pip install -q -U trl transformers accelerate peft
!pip install -q -U datasets bitsandbytes einops wandb
!pip install git+https://github.com/huggingface/peft.git
!pip install git+https://github.com/huggingface/transformers.git

#### HuggingFace Login

In [2]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

#### Model Loading & Setting Parameters

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from trl import SFTTrainer
import torch

model_name = "meta-llama/Llama-2-7b-chat-hf"

bnb_config = BitsAndBytesConfig(
 load_in_4bit=True,
 bnb_4bit_use_double_quant=True,
 bnb_4bit_quant_type="nf4",
 bnb_4bit_compute_dtype=torch.bfloat16
)

device_map = {"": 0}
foundation_model = AutoModelForCausalLM.from_pretrained(model_name,
 quantization_config=bnb_config,
 device_map=device_map,
 use_cache = False)



config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

#### Loading the dataset from HuggingFace

In [5]:
from datasets import load_dataset

dataset_name = "Asad182/train_sentences"
dataset = load_dataset(dataset_name, split="train")

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/2.97M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [6]:
# Checking shape of the data

dataset.shape

(2000, 4)

#### Setting QLoRA parameters

In [8]:
import peft
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
 r=16, 
 lora_alpha=16, 
 lora_dropout=0.05, 
 bias="none", 
 task_type="CAUSAL_LM"
)

#### Output Directory

In [8]:
#Create a directory to contain the Model
import os
working_dir = './Fine_tuning_urdu'

output_directory = os.path.join(working_dir, "peft_lab_outputs")

#### Training Arguments

In [9]:
#Creating the TrainingArgs
import transformers
from transformers import TrainingArguments

training_args = TrainingArguments(
 output_dir=output_directory,
 auto_find_batch_size=True,
 learning_rate= 2e-4, 
 num_train_epochs=5
)

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

#### Creating Output Function

In [6]:
#this function returns the outputs from the model received, and inputs.

def get_outputs(model, inputs, max_new_tokens=256):
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_new_tokens=max_new_tokens,
        repetition_penalty=1.5, #Avoid repetition.
        early_stopping=False, #The model can stop before reach the max_length
        eos_token_id=tokenizer.eos_token_id,
 )
    return outputs

#### Model Training

In [10]:
trainer = SFTTrainer(
 model=foundation_model,
 args=training_args,
 train_dataset=dataset,
 peft_config = lora_config,
 dataset_text_field="text",
 tokenizer=tokenizer,
 data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]



Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [11]:
trainer.train()

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
500,0.5691
1000,0.4925
1500,0.4799
2000,0.4729
2500,0.4425
3000,0.4352
3500,0.4428
4000,0.4311
4500,0.393
5000,0.3975




TrainOutput(global_step=10000, training_loss=0.4036454681396484, metrics={'train_runtime': 35289.8796, 'train_samples_per_second': 0.283, 'train_steps_per_second': 0.283, 'total_flos': 1.3864437859995648e+17, 'train_loss': 0.4036454681396484, 'epoch': 5.0})

#### Saving the model

In [12]:
#Save the model.

peft_model_path = os.path.join(output_directory, f"qlora_model")

trainer.model.save_pretrained(peft_model_path)

### Model Testing Phase

In [7]:
import peft
from peft import AutoPeftModelForCausalLM, PeftConfig
import os
import torch

device_map = {"": 0}
working_dir = '/kaggle/input/fine-tuning-urdu/Fine_tuning_urdu'

output_directory = os.path.join(working_dir, "peft_lab_outputs")
peft_model_path = os.path.join(output_directory, f"qlora_model")

#Load the Model.
loaded_model = AutoPeftModelForCausalLM.from_pretrained(
 peft_model_path,
 torch_dtype=torch.bfloat16,
 is_trainable=False,
 load_in_4bit=True,
 device_map = 'auto')

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

#### Loading sentences for testing

In [8]:
import pandas as pd

df_test = pd.read_csv('/kaggle/input/test-sentences/test_sentences.csv')

In [9]:
df_test.head()

Unnamed: 0,Instructions,Prompt,text,True Value
0,Add appropriate punctuations and sentence boun...,وہ پہلے ایک بہت ہی ہمدرد کردار کی حیثیت سے نہی...,<s>[INST] <<SYS>>Add appropriate punctuations ...,وہ پہلے ایک بہت ہی ہمدرد کردار کی حیثیت سے نہی...
1,Add appropriate punctuations and sentence boun...,میں نے اس سے پہلے کبھی نہیں سنا تھا اور نہ ہی ...,<s>[INST] <<SYS>>Add appropriate punctuations ...,میں نے اس سے پہلے کبھی نہیں سنا تھا اور نہ ہی ...
2,Add appropriate punctuations and sentence boun...,اس فلم کی خوبصورتی کا ثبوت ماں کی محبت کی طاقت...,<s>[INST] <<SYS>>Add appropriate punctuations ...,اس فلم کی خوبصورتی کا ثبوت ماں کی محبت کی طاقت...
3,Add appropriate punctuations and sentence boun...,اس فلم میں ایک ہندوستانی خاتون نندنی کی کہانی ...,<s>[INST] <<SYS>>Add appropriate punctuations ...,اس فلم میں ایک ہندوستانی خاتون، نندنی کی کہانی...
4,Add appropriate punctuations and sentence boun...,مقامات حیرت انگیز ہیں موسیقی حیرت انگیز ہے اور...,<s>[INST] <<SYS>>Add appropriate punctuations ...,مقامات حیرت انگیز ہیں، موسیقی حیرت انگیز ہے، ا...


#### Testing on the sentences in dataframe

In [10]:
# Loop through each row and apply the model

for index, row in df_test.iterrows():
    input_sentence = tokenizer(row['text'], return_tensors="pt").to('cuda')
    foundational_output_sentence = get_outputs(loaded_model, input_sentence)
    decoded_output = tokenizer.batch_decode(foundational_output_sentence, skip_special_tokens=True)
    df_test.at[index, 'output'] = decoded_output[0]

In [22]:
# Print the updated DataFrame
df_test.loc[0, 'output']

"[INST] <<SYS>>Add appropriate punctuations and sentence boundaries to the following Urdu text in input. Don't include any kind of html tags and don't truncate the output. Provide the actual input that has been asked. '،' '۔' These are the punctuations that need to be added<</SYS>>\n وہ پہلے ایک بہت ہی ہمدرد کردار کی حیثیت سے نہیں آسکتی ہے لیکن پوری فلم کو دیکھ کر آپ چاہتے ہیں کہ اس کی کامیابی ہوجائے [/INST] \nFollowing is the correct sentence: وہ پhlen أ earliest 4072 همDRRD کرดار کی حیظти سے نہیں آسکتی ہے، لیکن پوری フلم کو دیکھ کر آپ چاہتے ہیں كہ اس कي کاмаیابی ہوجائے۔ "

In [18]:
import pickle

with open('urdu_test_fine_tuned.pkl', 'wb') as file:
    pickle.dump(df_test, file)