MODULE INSTALLATION

In [1]:
!pip -q install transformers peft datasets accelerate bitsandbytes


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[?25h

imports

In [2]:
import torch
import json
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer
)
from peft import LoraConfig, get_peft_model, PeftModel


load dataset

In [3]:
dataset = load_dataset("json", data_files="train.jsonl")["train"]

print("Samples:", len(dataset))
dataset[0]


Generating train split: 0 examples [00:00, ? examples/s]

Samples: 150


{'instruction': 'What is gradient descent?',
 'response': '• Minimizes loss function\n• Updates weights iteratively\n• Moves opposite gradient direction\n• Stops at convergence'}

format text

In [4]:
def format_example(example):
    return {
        "text": f"Answer briefly using bullet points:\n{example['instruction']}\n{example['response']}"
    }

dataset = dataset.map(format_example)


Map:   0%|          | 0/150 [00:00<?, ? examples/s]

tokenizer

In [5]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Tokenization + labels

In [6]:
def tokenize(example):
    tokens = tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=128
    )

    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

dataset = dataset.map(tokenize, batched=True)
dataset = dataset.remove_columns(["text"])
dataset.set_format("torch")


Map:   0%|          | 0/150 [00:00<?, ? examples/s]

base model

In [7]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

attach lora

In [8]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()


trainable params: 2,252,800 || all params: 1,102,301,184 || trainable%: 0.2044


training config

In [9]:
training_args = TrainingArguments(
    output_dir="lora-output",
    per_device_train_batch_size=4,
    num_train_epochs=3,
    logging_steps=10,
    fp16=True,
    report_to="none",
    save_strategy="epoch"
)


trainer

In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset
)


train

In [11]:
trainer.train()


Step,Training Loss
10,10.379
20,5.8236
30,3.2172
40,1.6866
50,1.2087
60,1.029
70,0.9692
80,0.9141
90,0.8297
100,0.8278


TrainOutput(global_step=114, training_loss=2.453414904443841, metrics={'train_runtime': 51.3411, 'train_samples_per_second': 8.765, 'train_steps_per_second': 2.22, 'total_flos': 358306047590400.0, 'train_loss': 2.453414904443841, 'epoch': 3.0})

export final adapters

In [12]:
checkpoint = "lora-output/checkpoint-114"  # adjust if different

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config
)

model = PeftModel.from_pretrained(base_model, checkpoint)

model.save_pretrained("lora_adapters")
tokenizer.save_pretrained("lora_adapters")

print("Saved adapters")


Saved adapters


refrence test

In [13]:
from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

prompt = "Answer briefly using bullet points:\nExplain gradient descent."

print(pipe(prompt, max_new_tokens=60, do_sample=False)[0]["generated_text"])


Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Answer briefly using bullet points:
Explain gradient descent.
- Implements backpropagation algorithm
- Uses stochastic gradient descent
- Reduces variance in model
- Improves generalization
