# Llamantino fine-tuning

## Imports

In [1]:
from huggingface_hub import login

# Set token in HF_TOKEN environment variable before executing
login()

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/luca/.cache/huggingface/token
Login successful


In [32]:
import os
import torch
from datasets import load_dataset
from datetime import datetime
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
    TrainingArguments
)
from peft import LoraConfig, prepare_model_for_kbit_training
from trl import SFTTrainer

In [3]:
# pre-trained model name
model_name = "swap-uniba/LLaMAntino-2-chat-7b-hf-UltraChat-ITA"

# dataset name
train_set_name = "sample_data/train_isear_it.csv"
eval_set_name = "sample_data/val_isear_it.csv"
test_set_name = "sample_data/test_isear_it.csv"

# 0 gioia
# 1 tristezza
# 2 rabbia
# 3 paura
# 4 vergogna
# 5 disgusto
# 6 colpevolezza
sentiments = ["gioia", "tristezza", "rabbia", "paura", "vergogna", "disgusto", "colpevolezza"]

# fine-tuned model name
new_model = "llama-2-7b-emotions-final"

## Dataset splits loading

In [4]:
# load dataset
train_set = load_dataset("csv", data_files=train_set_name, split="train", delimiter="|")
eval_set = load_dataset("csv", data_files=eval_set_name, split="train", delimiter="|")
test_set = load_dataset("csv", data_files=test_set_name, split="train", delimiter="|")

In [5]:
train_set[0]

{'text': 'Quando gli americani hanno piazzato i loro missili nucleari in Europa.',
 'label': 5}

In [6]:
eval_set[0]

{'text': 'Ho superato il primo semestre di università.', 'label': 0}

In [7]:
test_set[0]

{'text': 'Da bambino - essere mandato al negozio dalla mamma - comprare del cioccolato - essere ripreso dalla mamma.',
 'label': 4}

## Fine-tuning

Llamantino, as Llama 2 does, needs the prompts to be formatted in a specific way:
```
<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don’t know the answer to a question, please don’t share false information.
<</SYS>>

Hi, how are you? [/INST] Good thanks!</s>
```

Here are the rules:
- A single prompt is composed of one or more sequences
- A sequence is a pair (user's input, model's output) and its start and its end are indicated with the tags `<s>` and `</s>`
- More sequences can be concatenated with `<s>...</s><s>...</s>`
- User's input is represented with the tag `[INST]` and `[/INST]`. Each input includes a system prompt and the user's request
- `<<SYS>>` and `<</SYS>>` are used to delimit the system prompt, i.e. the way to guide the model interpretation and the generation of the answers to user requests
- User's request is appended after the system prompt and after a new line space \n, without a particular tag
- Model's output is appended after the `[/INST]` tag and it ends with `</s>` tag


In [8]:
def format_instruction(example):
    instruction = "Sei un assistente disponibile, rispettoso e onesto. " \
         "Riconosci quale sentimento esprime la frase che ti viene posta"
    example["formatted_instruction"] = """<s>[INST] <<SYS>>
{}
<</SYS>>

{} [/INST] {}</s>""".format(instruction, example["text"], sentiments[example["label"]])
    return example

In [9]:
def format_instruction_test(example):
    instruction = "Sei un assistente disponibile, rispettoso e onesto. " \
         "Riconosci quale sentimento esprime la frase che ti viene posta"
    example["formatted_instruction"] = """<s>[INST] <<SYS>>
{}
<</SYS>>

{} [/INST]</s>""".format(instruction, example["text"])
    return example

In [11]:
train_set = train_set.map(format_instruction)
eval_set = eval_set.map(format_instruction)
test_set = test_set.map(format_instruction_test)

In [12]:
train_set[0]

{'text': 'Quando gli americani hanno piazzato i loro missili nucleari in Europa.',
 'label': 5,
 'formatted_instruction': '<s>[INST] <<SYS>>\nSei un assistente disponibile, rispettoso e onesto. Riconosci quale sentimento esprime la frase che ti viene posta\n<</SYS>>\n\nQuando gli americani hanno piazzato i loro missili nucleari in Europa. [/INST] disgusto</s>'}

In [13]:
eval_set[0]

{'text': 'Ho superato il primo semestre di università.',
 'label': 0,
 'formatted_instruction': '<s>[INST] <<SYS>>\nSei un assistente disponibile, rispettoso e onesto. Riconosci quale sentimento esprime la frase che ti viene posta\n<</SYS>>\n\nHo superato il primo semestre di università. [/INST] gioia</s>'}

In [14]:
test_set[0]

{'text': 'Da bambino - essere mandato al negozio dalla mamma - comprare del cioccolato - essere ripreso dalla mamma.',
 'label': 4,
 'formatted_instruction': '<s>[INST] <<SYS>>\nSei un assistente disponibile, rispettoso e onesto. Riconosci quale sentimento esprime la frase che ti viene posta\n<</SYS>>\n\nDa bambino - essere mandato al negozio dalla mamma - comprare del cioccolato - essere ripreso dalla mamma. [/INST]</s>'}

### Configure settings

Since LLMs have billions of parameters, a technique of parameter efficient tuning (PEFT) is needed, in particular, in order to execute the fine tuning on the available hardware, it is necessary to load the model quantized in 4 bit using QLoRa, that actually represents each parameter using 4 bits only, allowing a huge gain in terms of memory.

In [15]:
# load model
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.float16,
    bnb_4bit_use_double_quant= False,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map={"": 0}
)

model = prepare_model_for_kbit_training(model)
model.config.use_cache = False
model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.add_eos_token = False
tokenizer.add_bos_token = False
tokenizer.add_bos_token, tokenizer.add_eos_token

(False, False)

In [17]:
peft_config = LoraConfig(
    # scale the adapter of lora_alpha/r
    lora_alpha= 8,
    lora_dropout= 0.1,
    r= 16,
    bias="none",
    task_type="CAUSAL_LM"
)

In [18]:
OUTPUT_DIR = "experiments"

In [19]:
training_arguments = TrainingArguments(
    num_train_epochs= 1,
    per_device_train_batch_size= 4,
    gradient_accumulation_steps= 2,
    optim = "paged_adamw_8bit",
    save_steps= 100,
    logging_steps= 20,
    learning_rate= 2e-4,
    weight_decay= 0.001,
    fp16= False,
    bf16= False,
    max_grad_norm= 0.3,
    max_steps= -1,
    warmup_ratio= 0.03,
    group_by_length= True,
    lr_scheduler_type= "cosine",
    report_to="tensorboard",
    output_dir=OUTPUT_DIR,
    evaluation_strategy="steps",
    eval_steps=100
)

In [20]:
# supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_set,
    eval_dataset=eval_set,
    peft_config=peft_config,
    max_seq_length= None,
    dataset_text_field="formatted_instruction",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False
)

### Train

In [22]:
# start training
start = datetime.now()
trainer.train()
end = datetime.now()
print(f"Elapsed time for training: {end-start}")

Step,Training Loss,Validation Loss
100,0.4813,0.833545
200,0.4897,0.823417
300,0.4971,0.813513
400,0.5033,0.80892
500,0.4723,0.805948
600,0.4484,0.804265


Elapsed time for training: 2:44:01.724009


In [23]:
trainer.save_model(new_model)