<a href="https://colab.research.google.com/github/G0nkly/pytorch_sandbox/blob/main/gpts/finetuning/TinyLlama_gsm8k_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType

In [3]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

In [4]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)

In [5]:
data = load_dataset("openai/gsm8k", "main", split="train[:200]")

Generating train split: 100%|██████████████████████████████████████████████████████████| 7473/7473 [00:00<00:00, 646695.42 examples/s]
Generating test split: 100%|███████████████████████████████████████████████████████████| 1319/1319 [00:00<00:00, 385874.80 examples/s]


In [7]:
def tokenize(batch):
  texts = [
      f"### Instruction:\n{instruction}\n### Response:\n{out}"
      for instruction, out in zip(batch['question'], batch['answer'])
  ]

  tokens = tokenizer(
      texts,
      padding="max_length",
      max_length=256,
      truncation=True,
      return_tensors="pt"
  )

  tokens["labels"] = tokens["input_ids"].clone()

  return tokens

In [8]:
tokenized_data = data.map(tokenize, batched=True, remove_columns=data.column_names)

Map: 100%|█████████████████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 3031.41 examples/s]


In [9]:
training_args = TrainingArguments(
  output_dir="./tinyllama-math-lora-tutorial",
  per_device_train_batch_size=8,
  gradient_accumulation_steps=8,
  learning_rate=1e-3,
  num_train_epochs=50,
  fp16=True,
  logging_steps=20,
  save_strategy="epoch",
  report_to="none",
  remove_unused_columns=False,
  label_names=["labels"]
)

In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data,
    processing_class=tokenizer
)

In [11]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.


Step,Training Loss
20,1.6568
40,0.7972
60,0.6652
80,0.5767
100,0.4889
120,0.3959
140,0.3373
160,0.2833
180,0.2437
200,0.2244


TrainOutput(global_step=200, training_loss=0.5669354009628296, metrics={'train_runtime': 896.7386, 'train_samples_per_second': 11.152, 'train_steps_per_second': 0.223, 'total_flos': 1.590741172224e+16, 'train_loss': 0.5669354009628296, 'epoch': 50.0})

In [12]:
model.save_pretrained("./tinyllama-lora-tuned-adapther-math")
tokenizer.save_pretrained("./tinyllama-lora-tuned-adapther-math")

('./tinyllama-lora-tuned-adapther-math/tokenizer_config.json',
 './tinyllama-lora-tuned-adapther-math/special_tokens_map.json',
 './tinyllama-lora-tuned-adapther-math/chat_template.jinja',
 './tinyllama-lora-tuned-adapther-math/tokenizer.model',
 './tinyllama-lora-tuned-adapther-math/added_tokens.json',
 './tinyllama-lora-tuned-adapther-math/tokenizer.json')