# FineTuning Using Google Gemma's Model

In [13]:
import os
import torch
from datasets import load_dataset
import transformers
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,  
    BitsAndBytesConfig
)
from trl import SFTTrainer
from peft import LoraConfig

In [14]:
import os
from dotenv import load_dotenv

load_dotenv()  # load variables from .env file
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

In [15]:
model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

In [16]:
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.getenv("HF_TOKEN"))
model = AutoModelForCausalLM.from_pretrained(model_id,
                                            #  quantization_config=bnb_config,
                                             device_map="auto",
                                             token=os.getenv("HF_TOKEN"))

ValueError: Tokenizer class GemmaTokenizer does not exist or is not currently imported.

In [None]:
text = "Quote: Imagination is more,"
device = "cuda" if torch.cuda.is_available() else "cpu"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
os.environ["WAND_DISABLED"] = "true" 

In [None]:
lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj",
                    "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

In [None]:
from datasets import load_dataset

data = load_dataset("Abirate/english_quotes")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

In [None]:
data["train"]["quote"]

In [None]:
data['train']

In [None]:
def formatting_function(example):
    return [f"Quote: {example['quote']}\nAuthor: {example['author']}"]

In [None]:
from transformers import TrainingArguments

trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],
    args=TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=100,
        learning_rate=2e-4,
        fp16=False,  # safer on most GPUs than bf16 unless using A100 or similar
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit",  # works with bitsandbytes
        save_strategy="no"  # optional: disable model checkpoints for quick test
    ),
    peft_config=lora_config,
    formatting_func=formatting_function,
    tokenizer=tokenizer  # RECOMMENDED: for proper formatting and padding
)

In [None]:
trainer.train()

In [None]:
text = "Quote: A woman is like a tea bag;"
device = "cuda" if torch.cuda.is_available() else "cpu"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))