In [1]:
!pip install -U bitsandbytes>=0.46.1

In [2]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType

In [3]:
# lora/qlora config

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

In [4]:
lora_config = LoraConfig(
    r = 8,
    lora_alpha = 16,
    target_modules = ['q_proj','v_proj'],
    lora_dropout = 0.05,
    bias = 'none',
    task_type = TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

In [5]:
# dataset
data = load_dataset("openai/gsm8k",'main', split='train[:200]')



README.md: 0.00B [00:00, ?B/s]

main/train-00000-of-00001.parquet:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

main/test-00000-of-00001.parquet:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

In [6]:
def tokenize(batch):
    texts = [
        f"### Instruction:\n{instruction}\n### Response:\n{out}"
        for instruction, out in zip(batch["question"], batch['answer'])
    ]

    tokens = tokenizer(
        texts,
        padding = 'max_length',
        max_length = 256,
        truncation = True,
        return_tensors = 'pt'
    )

    tokens['labels'] = tokens['input_ids'].clone()

    return tokens

In [8]:
tokenized_data = data.map(tokenize, batched=True, remove_columns=data.column_names)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [9]:
tokenized_data

Dataset({
    features: ['input_ids', 'attention_mask', 'labels'],
    num_rows: 200
})

In [14]:
# training config
training_args = TrainingArguments(
    output_dir = "./tinyllama-lora",
    per_device_train_batch_size = 4,
    gradient_accumulation_steps = 4,
    learning_rate = 2e-3,
    num_train_epochs = 50,
    fp16=True,
    logging_steps = 20,
    save_strategy = 'epoch',
    report_to = 'none',
    remove_unused_columns = False,
    label_names = ['labels']
)

In [15]:
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_data,
    processing_class = tokenizer
)

In [16]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.


Step,Training Loss
20,1.506333
40,0.745345
60,0.615802
80,0.495743
100,0.379985
120,0.300926
140,0.232552
160,0.169673
180,0.136036
200,0.101027


TrainOutput(global_step=650, training_loss=0.1674267552449153, metrics={'train_runtime': 1274.9103, 'train_samples_per_second': 7.844, 'train_steps_per_second': 0.51, 'total_flos': 1.590741172224e+16, 'train_loss': 0.1674267552449153, 'epoch': 50.0})

In [17]:
# save model
model.save_pretrained("./tinyllama-fine-tuned-lora")
tokenizer.save_pretrained("./tinyllama-fine-tuned-lora")

('./tinyllama-fine-tuned-lora/tokenizer_config.json',
 './tinyllama-fine-tuned-lora/chat_template.jinja',
 './tinyllama-fine-tuned-lora/tokenizer.json')

In [18]:
# Evaluation
import os
import math
import torch
from torch.utils.data import DataLoader
from datasets import load_dataset

from transformers import AutoModelForCausalLM,AutoTokenizer,BitsAndBytesConfig,default_data_collator

from peft import PeftModel

In [19]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
adapter_path = "./tinyllama-fine-tuned-lora"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
).eval()

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

tmp_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tuned_model = PeftModel.from_pretrained(
    tmp_model,
    adapter_path
)
tuned_model = tuned_model.merge_and_unload().eval()

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/201 [00:00<?, ?it/s]



In [20]:
def tokenize(batch):
    texts = [
        f"### Instruction:\n{instruction}\n### Response:\n{out}"
        for instruction, out in zip(batch["question"], batch['answer'])
    ]

    tokens = tokenizer(
        texts,
        padding = 'max_length',
        max_length = 256,
        truncation = True,
        return_tensors = 'pt'
    )

    tokens['labels'] = tokens['input_ids'].clone()

    return tokens

In [43]:
eval_ds = load_dataset("openai/gsm8k",'main', split='train[200:300]')
eval_ds = eval_ds.map(tokenize, batched=True, remove_columns=['question','answer'])
eval_ds = eval_ds.with_format('torch')

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [44]:
eval_loader = DataLoader(
    eval_ds,
    batch_size = 8,
    collate_fn = default_data_collator
)

In [45]:
@torch.no_grad()
def compute_perplexity(model):
    losses = []

    for batch in eval_loader:
        batch = {k:v.to('cuda') for k, v in batch.items()}
        loss = model(**batch).loss
        losses.append(loss.item())

    return math.exp(sum(losses))/ len(losses)

In [46]:
# lower is better
print(f"Base Model Perplexity: {compute_perplexity(base_model):.2f}")
print(f"Tuned Model Perplexity: {compute_perplexity(tuned_model):.2f}")

Base Model Perplexity: 393165011551684504439471734784.00
Tuned Model Perplexity: 19741409479.78


In [47]:
import random

raw_data = load_dataset("openai/gsm8k",'main', split='train[200:300]')
refs = raw_data['answer']

def generate(model, instruction):
    token_ids = tokenizer(f'### Instruction:\n{instruction}\n### Response:\n', return_tensors='pt').input_ids.to('cuda')

    with torch.no_grad():
        out = model.generate(token_ids, max_new_tokens=256)

    return tokenizer.decode(out[0], skip_special_tokens=True)

In [48]:
raw_data['question'][0]

'Sansa is a famous artist, she can draw a portrait and sell it according to its size. She sells an 8-inch portrait for $5, and a 16-inch portrait for twice the price of the 8-inch portrait. If she sells three 8-inch portraits and five 16-inch portraits per day, how many does she earns every 3 days?'

In [49]:
refs[0]

'Sansa earns $5 x 3 = $<<5*3=15>>15 every day by selling three 8-inch portraits.\nThe price of the 16-inch portrait is $5 x 2 = $<<5*2=10>>10 each.\nSo, she earns $10 x 5 = $<<10*5=50>>50 every day by selling five 16-inch portraits.\nHer total earnings is $50 + $15 = $<<50+15=65>>65 every day.\nTherefore, the total amount she earns after 3 days is $65 x 3 = $<<65*3=195>>195.\n#### 195'

In [50]:
print(generate(base_model, raw_data['question'][0]))

### Instruction:
Sansa is a famous artist, she can draw a portrait and sell it according to its size. She sells an 8-inch portrait for $5, and a 16-inch portrait for twice the price of the 8-inch portrait. If she sells three 8-inch portraits and five 16-inch portraits per day, how many does she earns every 3 days?
### Response:
Sansa earns $100 per day, which means she earns $300 per week, and $1,200 per month. She sells 10 portraits per week, which means she earns $10 per week, and $50 per month. Therefore, she earns $100 per week, and $50 per month.


In [51]:
print(generate(tuned_model, raw_data['question'][0]))

### Instruction:
Sansa is a famous artist, she can draw a portrait and sell it according to its size. She sells an 8-inch portrait for $5, and a 16-inch portrait for twice the price of the 8-inch portrait. If she sells three 8-inch portraits and five 16-inch portraits per day, how many does she earns every 3 days?
### Response:
Sansa earns each 8-inch portrait $5*1=<<5*1=5>>5.
She earns each 16-inch portrait $2*2=<<2*2=4>>4.
She sells 3 8-inch portraits and 5 16-inch portraits per day, which gives her a day's earnings of $5+$4=<<5+4=9>>9.
She earns a total of $9*3=<<9*3=27>>27 every 3 days.
#### 27
