In [1]:
!pip install -q peft==0.4.0 bitsandbytes==0.40.2  trl==0.4.7 datasets==2.17.0

In [2]:
!pip install accelerate==0.27.2

Collecting accelerate==0.27.2
  Downloading accelerate-0.27.2-py3-none-any.whl.metadata (18 kB)
Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.21.0
    Uninstalling accelerate-0.21.0:
      Successfully uninstalled accelerate-0.21.0
Successfully installed accelerate-0.27.2


In [3]:
!pip install transformers==4.38.2



In [4]:
import os
import transformers
import torch
from google.colab import userdata
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig

In [5]:
os.environ["HF_TOKEN"]= userdata.get("HF_TOKEN")

In [6]:
model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [7]:
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ["HF_TOKEN"])
model = AutoModelForCausalLM.from_pretrained(
    model_id, quantization_config=bnb_config,
    device_map={"":0},
    token=os.environ["HF_TOKEN"]
)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


In [8]:
text = "Quote: Imagination is more,"
device= "cuda:0"
inputs = tokenizer(text,return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Quote: Imagination is more, than knowledge.

I am a self-taught artist, born in 1985 in the beautiful city of Porto Alegre, Brazil.

I have always been interested in art, but I never thought I would be able to make a living


In [9]:
lora_config = LoraConfig(
    r=8,
    target_modules = [
        "q_proj","o_proj","k_proj","v_proj","gate_proj","up_proj","down_proj"
    ],
    task_type="CAUSAL_LM",
)

In [10]:
from datasets import load_dataset

data = load_dataset("Abirate/english_quotes")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)



In [11]:
data['train']

Dataset({
    features: ['quote', 'author', 'tags', 'input_ids', 'attention_mask'],
    num_rows: 2508
})

In [12]:
def formating_func(example):
  text = f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}"
  return [text]

In [13]:
training_arguments = transformers.TrainingArguments(

    per_device_train_batch_size= 1,
    gradient_accumulation_steps= 4,
    warmup_steps=2,
    max_steps=100,
    learning_rate=2e-4,
    fp16= True,
    logging_steps=1,
    output_dir="outputs",
    optim="paged_adamw_8bit"
)
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=data['train'],
    peft_config=lora_config,
    max_seq_length= None,
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
    formatting_func=formating_func
)



In [14]:
trainer.train()

You are using 8-bit optimizers with a version of `bitsandbytes` < 0.41.1. It is recommended to update your version as a major bug has been fixed in 8-bit optimizers.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
1,1.6806
2,0.6303
3,1.0203
4,1.0167
5,0.4088
6,1.1761
7,1.0308
8,0.3088
9,0.5217
10,0.4506


TrainOutput(global_step=100, training_loss=0.13250487179029732, metrics={'train_runtime': 99.4017, 'train_samples_per_second': 4.024, 'train_steps_per_second': 1.006, 'total_flos': 54994550906880.0, 'train_loss': 0.13250487179029732, 'epoch': 66.67})

In [16]:
text = "Quote: The opposite of love is not hate"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs= model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))




Quote: The opposite of love is not hate
Author: Aung San Suu Kyi
Source: 19Quote: Be yourself;
