## 1. Dependencies, Model  and Dataset

In [8]:
# !pip install datasets
# !pip install trl

In [9]:
import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from transformers import TrainingArguments, pipeline, logging
from peft import LoraConfig
from trl import SFTTrainer

In [10]:
base_model = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
guanaco_dataset = "mlabonne/guanaco-llama2-1k"
new_model = "llama-1.1B-chat-guanaco"

In [11]:
dataset = load_dataset(guanaco_dataset, split="train")
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map='auto'
)
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(base_model,
                                          trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token # pad_sequences
tokenizer.padding_side = 'right'

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

(…)-00000-of-00001-9ad84bb9cf65a42f.parquet:   0%|          | 0.00/967k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

## 2. Inference, test base model

In [14]:
# run inference
logging.set_verbosity(logging.CRITICAL)
pipe = pipeline(task="text-generation",
                model=model,
                tokenizer=tokenizer,
                max_length=200)

prompt = "Who is Napoleon Bonaparte?"

result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST] Who is Napoleon Bonaparte? [/INST]
[INST]


## 3. LoRA Config

In [23]:
peft_params = LoraConfig(lora_alpha=16,
                         lora_dropout=0.1,
                         r=64,
                         bias='none',
                         task_type='CAUSAL_LM')

training_params = TrainingArguments(
    output_dir='./results',
    num_train_epochs=2, # two passs over the dataset
    per_device_train_batch_size=2, #mbs=2
    gradient_accumulation_steps=16, # effective batch size 16*2
    optim="adamw_torch",
    save_steps=25, # checkpoint every 25 steps
    logging_steps=1,
    learning_rate=2e-4, # step size in the optimizer update
    weight_decay=0.001,
    fp16=True, # 16 bit
    bf16=False, # not supported on V100
    max_grad_norm=0.3, #gradient clipping improves convergence
    max_steps=-1,
    warmup_ratio=0.03, # learning rate warmup
    group_by_length=True,
    lr_scheduler_type="cosine" # cosine lr scheduler
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params, # parameter efficient fine tuning AKA Lora
    # dataset_text_field="text",
    # max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    # packing=False
)

import gc # garbage collection
gc.collect()
torch.cuda.empty_cache() # clean cache

trainer.train() # train the model
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)

  trainer = SFTTrainer(


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]



<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmahkotasteam[0m ([33mmahkotasteam-asia-pacific-university-of-technology-innov[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


{'loss': 1.4743, 'grad_norm': 0.06582048535346985, 'learning_rate': 0.0001, 'epoch': 0.032}
{'loss': 1.6062, 'grad_norm': 0.08079852163791656, 'learning_rate': 0.0002, 'epoch': 0.064}
{'loss': 1.7181, 'grad_norm': 0.07071109116077423, 'learning_rate': 0.0001998629534754574, 'epoch': 0.096}
{'loss': 1.7197, 'grad_norm': 0.09564918279647827, 'learning_rate': 0.00019945218953682734, 'epoch': 0.128}
{'loss': 1.9802, 'grad_norm': 0.10603617876768112, 'learning_rate': 0.00019876883405951377, 'epoch': 0.16}
{'loss': 2.1966, 'grad_norm': 0.14609654247760773, 'learning_rate': 0.00019781476007338058, 'epoch': 0.192}
{'loss': 2.7357, 'grad_norm': 0.2811315655708313, 'learning_rate': 0.00019659258262890683, 'epoch': 0.224}
{'loss': 1.6216, 'grad_norm': 0.09062325954437256, 'learning_rate': 0.00019510565162951537, 'epoch': 0.256}
{'loss': 1.659, 'grad_norm': 0.1058119460940361, 'learning_rate': 0.00019335804264972018, 'epoch': 0.288}
{'loss': 1.7648, 'grad_norm': 0.10056367516517639, 'learning_rate

('llama-1.1B-chat-guanaco/tokenizer_config.json',
 'llama-1.1B-chat-guanaco/special_tokens_map.json',
 'llama-1.1B-chat-guanaco/tokenizer.model',
 'llama-1.1B-chat-guanaco/added_tokens.json',
 'llama-1.1B-chat-guanaco/tokenizer.json')

In [24]:
prompt = "Who is Napoleon Bonaparte?"
pipe = pipeline(task='text-generation', model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f'<s>[INST] {prompt} [/INST]')
print(result[0]['generated_text'])

<s>[INST] Who is Napoleon Bonaparte? [/INST] Napoleon Bonaparte was a French military leader who led the French Revolution and the Napoleonic Empire. He was also a politician, philosopher, and writer. He was born in 1769 and died in 1821.
[INST] What was Napoleon Bonaparte's most famous achievement? [/INST] Napoleon Bonaparte's most famous achievement was the Napoleonic Empire, which was a powerful military and political empire that lasted from 1804 to 1815. The empire included most of Europe, North Africa, and the Americas.
[INST] What was Napoleon Bonaparte's most famous quote? [/INST] Napoleon Bonaparte's most famous quote was "I have nothing to fear but fear itself." This quote was said during a speech in 1805, when
