# Finetuning Gemma Model

In [None]:
!pip install -q -U bitsandbytes==0.42.0
!pip install -q -U peft==0.8.2

!pip install -q -U accelerate==0.27.1
!pip install -q -U datasets==2.17.0
!pip install -q -U transformers==4.38.0

In [None]:
!pip install trl

Collecting trl
  Downloading trl-0.12.1-py3-none-any.whl.metadata (10 kB)
Collecting accelerate>=0.34.0 (from trl)
  Downloading accelerate-1.1.1-py3-none-any.whl.metadata (19 kB)
Collecting datasets>=2.21.0 (from trl)
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Downloading trl-0.12.1-py3-none-any.whl (310 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.9/310.9 kB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading accelerate-1.1.1-py3-none-any.whl (333 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m333.2/333.2 kB[0m [31m32.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: accelerate, datasets, trl
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.27.1
    Uninstalling accelerate-0.27.1:
 

In [None]:
import os
import transformers
import torch
from google.colab import userdata
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig, GemmaTokenizer


In [None]:
#import hf_token here

In [None]:
# set hf_token here


# Prerequsites

In [None]:
model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,    #The model weighs are in 32 bit we try to load it in 4bit
    bnb_4bit_quant_type="nf4", #quantization technique nf4(4-bit NormalFloat(NF4))
    bnb_4bit_compute_dtype=torch.bfloat16 #new weights will get updated to f16
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'])
model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=bnb_config,
                                             device_map={"":0},
                                             token = os.environ['HF_TOKEN'])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
text = "Quote: Imagination is more,"
device = "cuda:0"
inputs= tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: Imagination is more, than knowledge.

I am a self-taught artist, born in 1985 in the beautiful city of Porto, Portugal.

I have always been interested in art, but I never thought I could make a living out of it.


In [None]:
print(outputs[0])

tensor([     2,  14782, 235292, 122399,    603,    978, 235269,   1178,   5567,
        235265,    109, 235285,   1144,    476,   2011, 235290, 145008,   9161,
        235269,   7565,    575, 235248, 235274, 235315, 235321, 235308,    575,
           573,   4964,   3413,    576,  35639, 235269,  21539, 235265,    109,
        235285,    791,   2593,   1125,   8202,    575,   3096, 235269,    901,
           590,   2447,   3421,    590,   1538,   1501,    476,   5027,    921,
           576,    665, 235265], device='cuda:0')


In [None]:
os.environ["WANDB_DISABLED"] = 'false'

In [None]:
lora_config = LoraConfig(
    r=8,
    target_modules= ["q_proj", "o_proj", "k_proj","v_proj",
                     "gate_proj","up_proj","down_proj"],
    task_type="CASUAL_LM",
)

In [None]:
from datasets import load_dataset

data = load_dataset("Abirate/english_quotes")
data =data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

In [None]:
data['train']['quote'][0]

'“Be yourself; everyone else is already taken.”'

In [None]:
def formatting_func(example):
  text = f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}"
  return [text]

In [None]:
data['train']

Dataset({
    features: ['quote', 'author', 'tags', 'input_ids', 'attention_mask'],
    num_rows: 2508
})

In [None]:
trainer = SFTTrainer(
    model = model,
    train_dataset = data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=100,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    peft_config=lora_config,
    formatting_func=formatting_func,
)

max_steps is given, it will override any value given in num_train_epochs


In [None]:
trainer.train()

Step,Training Loss
1,1.7363
2,0.934
3,1.535
4,2.0958
5,1.5963
6,1.994
7,2.4337
8,1.8087
9,2.6834
10,1.6285


TrainOutput(global_step=100, training_loss=1.7882964819669724, metrics={'train_runtime': 94.1289, 'train_samples_per_second': 4.249, 'train_steps_per_second': 1.062, 'total_flos': 189744345784320.0, 'train_loss': 1.7882964819669724, 'epoch': 0.1594896331738437})

In [None]:
text2 = "Quote: Twenty years from now you will be more disappointed by the things that you didn't do than by the ones you did do."

device = "cuda:0"
inputs= tokenizer(text2, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: Twenty years from now you will be more disappointed by the things that you didn't do than by the ones you did do. So throw off the bowlines. Sail away from the safe harbor. Catch the trade winds in your sails. Explore. Dream. Discover.

-Mark Twain

I'm not sure if I


In [None]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_name(0))  # Prints the GPU name


True
NVIDIA L4
