# Fine-tune LLaMA-2 with SQuAD Dataset


## 1. install and import necessary packages

In [1]:
!pip install -q accelerate peft bitsandbytes transformers trl

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/265.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/265.7 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.3/168.3 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.1/141.1 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m43.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.9/78.9 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90

In [2]:
!pip list |egrep 'accelerate|peft|bitsandbytes|transformers|trl'

accelerate                       0.25.0
bitsandbytes                     0.42.0
fastrlock                        0.8.2
peft                             0.7.1
transformers                     4.35.2
trl                              0.7.9


In [3]:
import os, torch
from trl import SFTTrainer
from peft import LoraConfig, PeftModel
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    BitsAndBytesConfig,
    HfArgumentParser,
    pipeline,
)

## 2. Obtain the dataset (SQuAD)

In [4]:
import pandas as pd
dataset = load_dataset("squad")
train=pd.DataFrame(dataset["train"].select(range(1000)))
val=pd.DataFrame(dataset["train"].select(range(1000,1350)))
train.iloc[:,2:].head()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/7.83k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.82M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

Unnamed: 0,context,question,answers
0,"Architecturally, the school has a Catholic cha...",To whom did the Virgin Mary allegedly appear i...,"{'text': ['Saint Bernadette Soubirous'], 'answ..."
1,"Architecturally, the school has a Catholic cha...",What is in front of the Notre Dame Main Building?,"{'text': ['a copper statue of Christ'], 'answe..."
2,"Architecturally, the school has a Catholic cha...",The Basilica of the Sacred heart at Notre Dame...,"{'text': ['the Main Building'], 'answer_start'..."
3,"Architecturally, the school has a Catholic cha...",What is the Grotto at Notre Dame?,{'text': ['a Marian place of prayer and reflec...
4,"Architecturally, the school has a Catholic cha...",What sits on top of the Main Building at Notre...,{'text': ['a golden statue of the Virgin Mary'...


Unnamed: 0,context,question,answers
0,"Architecturally, the school has a Catholic cha...",To whom did the Virgin Mary allegedly appear i...,"{'text': ['Saint Bernadette Soubirous'], 'answ..."
1,"Architecturally, the school has a Catholic cha...",What is in front of the Notre Dame Main Building?,"{'text': ['a copper statue of Christ'], 'answe..."
2,"Architecturally, the school has a Catholic cha...",The Basilica of the Sacred heart at Notre Dame...,"{'text': ['the Main Building'], 'answer_start'..."
3,"Architecturally, the school has a Catholic cha...",What is the Grotto at Notre Dame?,{'text': ['a Marian place of prayer and reflec...
4,"Architecturally, the school has a Catholic cha...",What sits on top of the Main Building at Notre...,{'text': ['a golden statue of the Virgin Mary'...


### Transform the dataset into LLaMA prompt





```
<s>[INST]  ... Question ? [/INST] Answer... </s>
```
An example
```
<s>[INST] how long does an American football match REALLY last, if you substract all the downtime? [/INST] According to the Wall Street Journal, the ball is only in play for an average of 11 minutes during the typical NFL game, out of an average total game length of 3 hours and 12 minutes. </s>
```



In [7]:
# LLaMA template : <s>[INST]  ... Question ? [/INST] Answer... </s>
train["text"]=train.apply(lambda x:
    f"<s>[INST]Context: {x.context} \
      Question: {x.question}[/INST] \
      {x.answers['text'][0]} </s>",
    axis=1)

val["text"]=val.apply(lambda x:
    f"<s>[INST]Context: {x.context} \
     Question: {x.question}[/INST] \
      {x.answers['text'][0]} </s>",
    axis=1)

from datasets import Dataset
train_dataset=Dataset.from_pandas(train[["text"]])
eval_dataset=Dataset.from_pandas(val[["text"]])

## 3. Lora Configuration

In [7]:
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

## 4.  BitsAndBytes Configuration

In [8]:
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

## 5. Accessing to LLaMA Checkpoint

1) LLaMa is a Gated model. You need to have been granted access to this model. Use the link ! https://huggingface.co/meta-llama/Llama-2-7b-chat-hf

2) Once granted, to authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens) OR  go to  Hugging Face Settings > Access Tokens > New token and creating a new Read token. You must copy this access token as follows:

In [9]:
from huggingface_hub import login
access_token_read = "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"
login(token = access_token_read)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [10]:
# LLaMA model
model_name="meta-llama/Llama-2-7b-chat-hf"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map={"": 0}
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## Training with SFTTrainer

In [None]:
train_dataset["text"]

# Training

In [12]:
# Set training parameters
training_arguments = TrainingArguments(
    output_dir="my_llama",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    evaluation_strategy="epoch",
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=True, # if you have A100 resource, you can set it
    lr_scheduler_type="linear"
)


# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_arguments,
)

trainer.train()



Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/350 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss


OutOfMemoryError: CUDA out of memory. Tried to allocate 172.00 MiB. GPU 0 has a total capacty of 39.56 GiB of which 2.81 MiB is free. Process 65642 has 39.53 GiB memory in use. Of the allocated memory 37.61 GiB is allocated by PyTorch, and 1.41 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

## 6. Run LLaMa model fine-tuned with SQuAD dataset

In [None]:
prompt='''
Context:Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary.
Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes".
Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection.
It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858.
 At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome),
 is a simple, modern stone statue of Mary.

 Question:To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?
 '''
# Expected answer:  Saint Bernadette Soubirous

squad_llama = pipeline(task="text-generation",
                       model=model,
                       tokenizer=tokenizer,
                       max_length=350)
result = squad_llama(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])