# Fine Tuning Google Gemma


In [None]:
!pip3 install -q -U bitsandbytes==0.42.0  # Install bitsandbytes for efficient CUDA operations, often used for deep learning optimization
!pip3 install -q -U peft==0.8.2  # Install PEFT, possibly for task scheduling or performance estimation in parallel computing
!pip3 install -q -U trl==0.7.10  # Install TRL (Transformers Reinforcement Learning) for integrating RL with transformer models
!pip3 install -q -U accelerate==0.27.1  # Install Accelerate for simplified and efficient hardware acceleration in deep learning
!pip3 install -q -U datasets==2.17.0  # Install Datasets for easy access to a vast collection of datasets, useful for ML model training
!pip3 install -q -U transformers==4.38.0  # Install Transformers for state-of-the-art NLP models and utilities provided by Hugging Face

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.4/183.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m280.0/280.0 kB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.9/150.9 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.7/536.7 kB[0m [31m44.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━

In [None]:
import os  # OS functions
import transformers  # NLP models and utilities
import torch  # Deep learning library

from google.colab import userdata  # Access to Colab user data
from datasets import load_dataset  # Load ML datasets
from trl import SFTTrainer  # Reinforcement learning trainer
from peft import LoraConfig  # Configuration for LoRA
from transformers import AutoTokenizer, AutoModelForCausalLM  # Tokenizer and language models
from transformers import BitsAndBytesConfig, GemmaTokenizer  # 8-bit inference config, tokenizer (Note: 'GemmaTokenizer' might not be correct as it's not standard in Transformers)


In [None]:
os.environ["HF_TOKEN"] = userdata.get('HF_TOKEN') #Accessing saved token

## Load Model

In [None]:
model_id = "google/gemma-2b"  # Model identifier
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Enable 4-bit loading
    bnb_4bit_quant_type="nf4",  # 4-bit normal float quantization
    bnb_4bit_compute_dtype=torch.bfloat16  # Use bfloat16 for computation to mitigate quantization loss
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'])  # Initialize tokenizer with model-specific settings

model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=bnb_config,  # Apply quantization config for efficient inference
                                             device_map={"":0},  # Map model to device (0 typically refers to the first CUDA device)
                                             token=os.environ['HF_TOKEN'])  # Authenticate with Hugging Face token

tokenizer_config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/555 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

In [None]:
text = "Quote: Imagination is more,"  # Input text
device = "cuda:0"  # Specify CUDA device for processing
inputs = tokenizer(text, return_tensors="pt").to(device)  # Tokenize input and move to specified device

outputs = model.generate(**inputs, max_new_tokens=20)  # Generate text with a limit of 20 new tokens
print(tokenizer.decode(outputs[0], skip_special_tokens=True))  # Decode and print the generated text, omitting special tokens


Quote: Imagination is more, than knowledge.

I am a self-taught artist, born in 1985 in


In [None]:
text = "Quote: Imagination is more"  # Define input text
device = "cuda:0"  # Target CUDA device
inputs = tokenizer(text, return_tensors="pt").to(device)  # Tokenize text and move to GPU

outputs = model.generate(**inputs, max_new_tokens=20)  # Generate additional text
print(tokenizer.decode(outputs[0], skip_special_tokens=True))  # Decode and print output, excluding special tokens

Quote: Imagination is more important than knowledge. Knowledge is limited. Imagination encircles the world.

- Albert Einstein

The


In [None]:
text = "Quote: Imagination is more"  # Define the initial text prompt
device = "cuda:0"  # Specify the GPU device for processing
inputs = tokenizer(text, return_tensors="pt").to(device)  # Tokenize the text and move to GPU

outputs = model.generate(**inputs, max_new_tokens=20)  # Generate text from the model
print(tokenizer.decode(outputs[0], skip_special_tokens=True))  # Decode and print the generated text

Quote: Imagination is more important than knowledge. Knowledge is limited. Imagination encircles the world.

- Albert Einstein

The


## Fine Tuning Gemma

In [None]:
os.environ["WANDB_DISABLED"] = "false"  # enables Weights & Biases logging by indicating that W&B is not disabled.

In [None]:
lora_config = LoraConfig(
    r=8,  # Set rank for LoRA decomposition
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],  # Target modules for adaptation
    task_type="CAUSAL_LM",  # Specify task type as causal language modeling
)

In [None]:
from datasets import load_dataset

data = load_dataset("Abirate/english_quotes")  # Load the 'english_quotes' dataset from the Hugging Face datasets
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)  # Tokenize the 'quote' field in the dataset using the tokenizer, processing in batches


Downloading readme:   0%|          | 0.00/5.55k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/647k [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/2508 [00:00<?, ? examples/s]

In [None]:
data['train']['quote']

['“Be yourself; everyone else is already taken.”',
 "“I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.”",
 "“Two things are infinite: the universe and human stupidity; and I'm not sure about the universe.”",
 '“So many books, so little time.”',
 '“A room without books is like a body without a soul.”',
 "“Be who you are and say what you feel, because those who mind don't matter, and those who matter don't mind.”",
 "“You've gotta dance like there's nobody watching,Love like you'll never be hurt,Sing like there's nobody listening,And live like it's heaven on earth.”",
 "“You know you're in love when you can't fall asleep because reality is finally better than your dreams.”",
 '“You only live once, but if you do it right, once is enough.”',
 '“Be the change that you wish to see in the world.”',
 "“In three words I can sum up everything I

In [None]:
def formatting_func(example):
    text = f"Quote: {example['quote'][0]}\nAuthor: {example['author'][0]}" #format of the reponse and output
    return [text]

In [None]:
data['train']

Dataset({
    features: ['quote', 'author', 'tags', 'input_ids', 'attention_mask'],
    num_rows: 2508
})

In [None]:
trainer = SFTTrainer(
    model=model,  # Specify the model for training
    train_dataset=data["train"],  # Use the 'train' split of the dataset
    args=transformers.TrainingArguments(  # Set training arguments
        per_device_train_batch_size=1,  # Batch size per device
        gradient_accumulation_steps=4,  # Number of steps to accumulate gradients before updating model parameters
        warmup_steps=2,  # Number of warmup steps for learning rate scheduler
        max_steps=100,  # Total number of training steps
        learning_rate=2e-4,  # Learning rate
        fp16=True,  # Enable mixed-precision training
        logging_steps=1,  # Log metrics every N steps
        output_dir="outputs",  # Directory for saving outputs
        optim="paged_adamw_8bit"  # Optimizer
    ),
    peft_config=lora_config,  # LoRA configuration
    formatting_func=formatting_func,  # Function to format data for training
)



Map:   0%|          | 0/2508 [00:00<?, ? examples/s]



In [None]:
trainer.train()

Step,Training Loss
1,1.6802
2,0.6298
3,1.023
4,1.0297
5,0.4182
6,1.2244
7,1.0872
8,0.333
9,0.5601
10,0.4783


TrainOutput(global_step=100, training_loss=0.1452721032407135, metrics={'train_runtime': 71.1584, 'train_samples_per_second': 5.621, 'train_steps_per_second': 1.405, 'total_flos': 55030401331200.0, 'train_loss': 0.1452721032407135, 'epoch': 66.67})

### Testing The Outputs

In [None]:
text = "Quote: A woman is like a tea bag;"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=22)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: A woman is like a tea bag; you can’t tell how strong she is until you put her in hot water
Author: Eleanor Roosevelt



In [None]:
text = "Quote: Outside of a dog, a book is man's"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Quote: Outside of a dog, a book is man's best friend.
Author: Antoine de Saint-Exupéry
Quote: The most wasted of
