# Training a Model similar to ChatGPT using Alpaca-loca

The notebook from my blogpost: https://armandolivares.tech/

### Install the requires libraries

In [1]:

!pip -qq install datasets
!pip -qq install bitsandbytes==0.37.2
!pip -qq install six==1.16.0
!pip -qq install -q datasets loralib sentencepiece
!pip -qq install -q git+https://github.com/zphang/transformers@c3dc391
!pip -qq install -q git+https://github.com/huggingface/peft.git
!pip -qq install huggingface_hub
!pip -qq install ipywidgets


In [2]:
import os
import torch
import torch.nn as nn
import bitsandbytes as bnb
from datasets import load_dataset
import transformers
from transformers import AutoTokenizer, AutoConfig, LLaMAForCausalLM, LLaMATokenizer
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model, get_peft_model_state_dict
     


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /home/usergpu/.local/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda118.so...


  warn(msg)
  warn(msg)
  warn(msg)


Load the translated data from the json file:

In [3]:
from datasets import load_dataset, Dataset

data = load_dataset("json", data_files="alpaca_data_translated.json")

data = data.shuffle()
     

Found cached dataset json (/home/usergpu/.cache/huggingface/datasets/json/default-3306e0878942785e/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/1 [00:00<?, ?it/s]

Let's check the data

In [4]:
data

DatasetDict({
    train: Dataset({
        features: ['instruction', 'output', 'input'],
        num_rows: 52002
    })
})

And check a sample:

In [5]:
data["train"][0]

{'instruction': '¿Cuál es el precio promedio de un galón de gasolina regular sin plomo en los Estados Unidos?',
 'output': 'A diciembre de 2020, el precio promedio de un galón de gasolina regular sin plomo en los Estados Unidos es de $2,51.',
 'input': ''}

Now let's set some model's parameters 

In [6]:
MICRO_BATCH_SIZE = 12  # use 4-5 
BATCH_SIZE = 128
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
EPOCHS = 3  # 
LEARNING_RATE = 3e-4  # the Karpathy constant
CUTOFF_LEN = 256  # 256 accounts for about 96% of the data
LORA_R = 8
LORA_ALPHA = 16
LORA_DROPOUT = 0.05
VAL_SET_SIZE=2000

Split data set into train and validation data.

In [7]:

train_val = data["train"].train_test_split(
    test_size=VAL_SET_SIZE, shuffle=True, seed=42
)
train_data = train_val["train"]
val_data = train_val["test"]


# The model

In [8]:
model = LLaMAForCausalLM.from_pretrained(
    "decapoda-research/llama-7b-hf",
    load_in_8bit=True,
    device_map="auto",
)
tokenizer = LLaMATokenizer.from_pretrained(
    "decapoda-research/llama-7b-hf", add_eos_token=True
)

model = prepare_model_for_int8_training(model)

config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token




Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

In [9]:

def generate_prompt(data_point):

    if data_point["input"]:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
                ### Instruction:
                {data_point["instruction"]}
                ### Input:
                {data_point["input"]}
                ### Response:
                {data_point["output"]}"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
                ### Instruction:
                {data_point["instruction"]}
                ### Response:
                {data_point["output"]}"""


def tokenize(prompt):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN + 1,
        padding="max_length",
    )
    return {
        "input_ids": result["input_ids"][:-1],
        "attention_mask": result["attention_mask"][:-1],
    }


train_data = train_data.shuffle().map(lambda x: tokenize(generate_prompt(x)))
val_data = val_data.shuffle().map(lambda x: tokenize(generate_prompt(x)))


Map:   0%|          | 0/50002 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [10]:
world_size = int(os.environ.get('WORLD_SIZE', 1))
ddp = world_size != 1

In [11]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=MICRO_BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        warmup_steps=100,
        num_train_epochs=EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=True,
        logging_steps=20,
        evaluation_strategy="steps",
        save_strategy="steps",
        eval_steps=200,
        save_steps=200,
        output_dir="spa-lora-alpaca",
        save_total_limit=3,
        load_best_model_at_end=True,
        ddp_find_unused_parameters=False if ddp else None,
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)


## Start Training 

In [12]:
model.config.use_cache = False
trainer.train()




Step,Training Loss,Validation Loss
200,0.9694,0.966256
400,0.9385,0.942444
600,0.9339,0.931122
800,0.9228,0.923243
1000,0.8958,0.919768
1200,0.9073,0.916304


TrainOutput(global_step=1248, training_loss=0.9616337785354028, metrics={'train_runtime': 21243.7431, 'train_samples_per_second': 7.061, 'train_steps_per_second': 0.059, 'total_flos': 1.522526532277371e+18, 'train_loss': 0.9616337785354028, 'epoch': 3.0})

Save the model to disk:

In [13]:
model.save_pretrained("spa-lora-alpaca")

# Evaluation

In [14]:

from peft import PeftModel
from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig

In [15]:
tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
model_custom = LLaMAForCausalLM.from_pretrained(
    "decapoda-research/llama-7b-hf",
    load_in_8bit=True,
    device_map="auto",
)

model_custom = PeftModel.from_pretrained(model_custom, "spa-lora-alpaca")



Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

In [16]:
                  


def generate_instruction_prompt(instruction, input=None):

    if input:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
                ### Instruction:
                {instruction}
                ### Input:
                {input}
                ### Response:"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
                ### Instruction:
                {instruction}
                ### Response:"""

In [17]:
generation_config = GenerationConfig(
    temperature=0.1,
    top_p=0.75,
    num_beams=4,
)

def evaluate(model_aaa, instruction, input=None):
    prompt = generate_instruction_prompt(instruction, input)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()
    generation_output = model_aaa.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256
    )
    for s in generation_output.sequences:
        output = tokenizer.decode(s)
        print("Response:", output.split("### Response:")[1].strip())

# Time for text generation

### Write me a little story:

In [18]:
evaluate(model_custom,"Crea una historia sobre un heroe llamado Jones que vivia en las montañas")

Response: Jones vivía en las montañas. Era un hombre fuerte y valiente que siempre estaba dispuesto a ayudar a los demás. Un día, se encontró con un grupo de bandidos que estaban intentando robar un tesoro. Jones se enfrentó a los bandidos y los derrotó, salvando el tesoro y protegiendo a la gente de las montañas.


### Give me some info:

In [19]:
evaluate(model_custom,"Que es la diabetes?")

Response: La diabetes es un trastorno metabólico causado por una deficiencia de insulina o resistencia a la insulina. La insulina es un hormona producida por el páncreas que ayuda a regular los niveles de azúcar en la sangre. La diabetes se caracteriza por altos niveles de azúcar en la sangre debido a la falta de insulina o la resistencia a la insulina.


### Give me some advices:

In [20]:
evaluate(model_custom,"Enumera 3 consejos de vida")

Response: 1. Asegúrese de mantenerse activo físicamente.
2. Asegúrese de tomar descansos regulares.
3. Asegúrese de mantenerse en contacto con amigos y familiares.


### What about writing code?

In [21]:
evaluate(model_custom,"Escribe una function en python para revisar la longitud de un string")

Response: def get_string_length(str):
    volver len(str)


In [22]:
evaluate(model_custom,"Quien fue napoleon?")

Response: Napoleón Bonaparte fue un general y emperador francés.


In [23]:
evaluate(model_custom, "clafifica este tweet en positivo, negativo o neutro: Se ha convertido en el jugador más joven en debutar en LaLiga como azulgrana")

Response: Negativo


### These aren't bad at all!

## Display Model Info

In [24]:
# Get all of the model's parameters as a list of tuples.
params = list(model_custom.named_parameters())

print('The LLM model has {:} different named parameters.\n'.format(len(params)))

print('==== Embedding Layer ====\n')

for p in params[0:2]:
    print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

print('\n==== First Transformer ====\n')

for p in params[2:14]:
    print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

print('\n==== Output Layer ====\n')

for p in params[-2:]:
    print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

The LLM model has 419 different named parameters.

==== Embedding Layer ====

base_model.model.model.embed_tokens.weight              (32000, 4096)
base_model.model.model.layers.0.self_attn.q_proj.weight (4096, 4096)

==== First Transformer ====

base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight    (8, 4096)
base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight    (4096, 8)
base_model.model.model.layers.0.self_attn.k_proj.weight (4096, 4096)
base_model.model.model.layers.0.self_attn.v_proj.weight (4096, 4096)
base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight    (8, 4096)
base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight    (4096, 8)
base_model.model.model.layers.0.self_attn.o_proj.weight (4096, 4096)
base_model.model.model.layers.0.mlp.gate_proj.weight    (11008, 4096)
base_model.model.model.layers.0.mlp.down_proj.weight    (4096, 11008)
base_model.model.model.layers.0.mlp.up_proj.weight      (11008, 4096)


## Upload to Hugginface

In [51]:
import os
from huggingface_hub import login

login(token="your_token")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid.
Your token has been saved to /home/usergpu/.cache/huggingface/token
Login successful


In [52]:
model_custom.push_to_hub("Armandoliv/spa-alpaca-lora")

Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]
adapter_model.bin:   0%|          | 0.00/16.8M [00:00<?, ?B/s][A
adapter_model.bin:   0%|          | 8.19k/16.8M [00:00<09:18, 30.1kB/s][A
adapter_model.bin:   1%|          | 180k/16.8M [00:00<00:33, 500kB/s]  [A
adapter_model.bin:   4%|▍         | 745k/16.8M [00:00<00:09, 1.68MB/s][A
adapter_model.bin:  11%|█▏        | 1.90M/16.8M [00:00<00:03, 3.99MB/s][A
adapter_model.bin:  28%|██▊       | 4.66M/16.8M [00:00<00:01, 7.98MB/s][A
adapter_model.bin:  57%|█████▋    | 9.63M/16.8M [00:01<00:00, 17.4MB/s][A
adapter_model.bin:  74%|███████▍  | 12.4M/16.8M [00:01<00:00, 19.1MB/s][A
adapter_model.bin: 100%|██████████| 16.8M/16.8M [00:01<00:00, 10.6MB/s][A
Upload 1 LFS files: 100%|██████████| 1/1 [00:01<00:00,  1.90s/it]


CommitInfo(commit_url='https://huggingface.co/Armandoliv/spa-alpaca-lora/commit/ff954c99c1d32431ff64f78c40a7a96ee760c204', commit_message='Upload model', commit_description='', oid='ff954c99c1d32431ff64f78c40a7a96ee760c204', pr_url=None, pr_revision=None, pr_num=None)

### Now, you can use the model directly from huggingface:

In [54]:
tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
model_custom = LLaMAForCausalLM.from_pretrained(
    "decapoda-research/llama-7b-hf",
    load_in_8bit=True,
    device_map="auto",
)

model_custom = PeftModel.from_pretrained(model_custom, "Armandoliv/spa-alpaca-lora")

Loading checkpoint shards: 100%|██████████| 33/33 [00:15<00:00,  2.16it/s]
Downloading (…)/adapter_config.json: 100%|██████████| 350/350 [00:00<00:00, 33.9kB/s]
Downloading adapter_model.bin: 100%|██████████| 16.8M/16.8M [00:01<00:00, 16.6MB/s]


In [56]:
evaluate(model_custom,"Crea una historia sobre un heroe llamado Jones que vivia en las montañas azules")

Response: Jones vivía en las montañas azules, una hermosa región de montañas y bosques. Era un valiente guerrero que siempre estaba dispuesto a ayudar a quienes lo necesitaban. Un día, Jones se encontró con una criatura misteriosa que le pidió que le ayudara a salir de las montañas azules. Jones aceptó la misión y se embarcó en una aventura para ayudar a la criatura a salir de las montañas azules. Durante su viaje, Jones se encontró con muchos peligros y desafíos, pero finalmente logró ayudar a la criatura a salir de las montañas azules.
