**Step 1: Install required libraries**

In [1]:
!pip install -q -U datasets accelerate peft trl bitsandbytes xformers

In [2]:
!pip install git+https://github.com/huggingface/transformers@v4.38-release

Collecting git+https://github.com/huggingface/transformers@v4.38-release
  Cloning https://github.com/huggingface/transformers (to revision v4.38-release) to /tmp/pip-req-build-y_4jgs3j
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-y_4jgs3j
  Running command git checkout -b v4.38-release --track origin/v4.38-release
  Switched to a new branch 'v4.38-release'
  Branch 'v4.38-release' set up to track remote branch 'v4.38-release' from 'origin'.
  Resolved https://github.com/huggingface/transformers to commit 092f1fdaa4224fdd88c616dc9678e6fcb37bfffd
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


**Step 02: Set the enviornment as Hugging Face Token**

In [3]:
import os
os.environ["HF_TOKEN"] = "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"

**Step 03: Import All the Required Libraries**

In [4]:
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
from trl import SFTTrainer

In [5]:
import transformers
transformers.__version__

'4.38.2'

In [6]:
lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

In [7]:
model_id = "PY007/TinyLlama-1.1B-step-50K-105b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'])
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [8]:
text = "Quote: Imagination is more"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: Imagination is more important than knowledge.
The most important thing is to have fun.
The most important thing is


In [9]:
from datasets import load_dataset

data = load_dataset("LongDHo/english_quotes")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

In [10]:
import transformers
from trl import SFTTrainer

def formatting_func(example):
    output_texts = []
    for i in range(len(example)):
        text = f"Quote: {example['quote'][i]}\nAuthor: {example['author'][i]}"
        output_texts.append(text)
    return output_texts

trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=10,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    formatting_func=formatting_func,
)
trainer.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss
1,4.4127
2,2.7306
3,3.7833
4,3.3492
5,2.9641
6,2.7952
7,3.1893
8,3.07
9,3.2993
10,3.0104


TrainOutput(global_step=10, training_loss=3.2604084253311156, metrics={'train_runtime': 23.6328, 'train_samples_per_second': 1.693, 'train_steps_per_second': 0.423, 'total_flos': 20352198660096.0, 'train_loss': 3.2604084253311156, 'epoch': 0.04})

In [11]:
text = "Quote: Imagination is"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: Imagination is the ability to imagine the future.
The first step in the process of imagining the future is


In [12]:
new_model = "finetuned-tinyllama"
trainer.model.save_pretrained(new_model)

In [13]:
# Empty VRAM
del model
del trainer
import gc
gc.collect()
gc.collect()

35050

In [14]:
# Reload model in FP16 and merge it with LoRA weights
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0},
)
#Reload the Base Model and load the QLoRA adapters
model = PeftModel.from_pretrained(model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

In [15]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [16]:
#!huggingface-cli login
model.push_to_hub("LongDHo/TinyLlama", check_pr=True, use_auth_token="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")
tokenizer.push_to_hub("LongDHo/TinyLlama", check_pr=True, use_auth_token="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")



model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]



README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/LongDHo/TinyLlama/commit/b0b5c42d05acf8bf2f0557f59fb02d21c3607631', commit_message='Upload tokenizer', commit_description='', oid='b0b5c42d05acf8bf2f0557f59fb02d21c3607631', pr_url=None, pr_revision=None, pr_num=None)