## Install the libraries
### bitsandbytes - > for quantization

In [None]:
!pip install bitsandbytes peft trl accelerate datasets transformers


## import the modules

In [None]:
import os
import transformers
import torch
from datasets import load_dataset
from google.colab import userdata
from trl import SFTTrainer # sftTrainer is used for fine tuning called supervised fine tuning
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer

In [None]:
os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')
# through the hf_token you can access any hugging face models

## Quantization to nf-4bit
nf4(4-bit NormalFloat)

In [None]:
model_id = "google/gemma-2b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id, token=os.environ['HF_TOKEN'])
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"":0},
    token=os.environ['HF_TOKEN']
)

## Test the loaded model

In [None]:
text = "Quote : Imagination is more,"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

output = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
text = "Quote : Imagination is more"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

output = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
text = "Quote : Be yourself; everyone else is already taken."
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

output = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(output[0], skip_special_tokens=True))

## Fine tuning

In [None]:
os.environ["WANDB_DISABLED"] = "false"

In [None]:
lora_config = LoraConfig(
    r = 8,
    target_modules = ["q_proj", "o_proj", "k_proj",
                       "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type = "CAUSAL_LM"
)

In [None]:
from datasets import load_dataset
data = load_dataset("Abirate/english_quotes") # it's a dataset available on hugging face
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

In [None]:
def formatting_func(example):
  text = f"Quote: {example['quote']}\nAuthor: {example['author']}\n"
  return text

In [None]:
trainer = SFTTrainer(
    model = model,
    train_dataset = data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=100,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit",
        label_names=["labels"]
    ),
    peft_config=lora_config,
    formatting_func=formatting_func
)

In [None]:
text = "Quote : Two things are infinite: the universe and human stupidity;"
device = "cuda:0"
inputs = tokenizer(text, return_tensors="pt").to(device)

output = model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(output[0], skip_special_tokens=True))

In [None]:
import json

# Load the notebook
with open('Fine Tuning Gemma-2b.ipynb', 'r') as f:
    notebook = json.load(f)

# Fix the metadata
if 'widgets' in notebook.get('metadata', {}):
    if 'state' not in notebook['metadata']['widgets']:
        notebook['metadata']['widgets']['state'] = {}

# Save the notebook
with open('your_notebook.ipynb', 'w') as f:
    json.dump(notebook, f)