In [None]:
!pip install --upgrade -qq torch
!pip install -qq transformers
!pip install -qq accelerate
!pip install -qq datasets
!pip install -qq bitsandbytes
!pip install -qq peft
!pip install -qq trl==0.8.6
!pip install -qq SentencePiece
!pip install -qq wandb -U
# !pip install -qq ninja packaging
# !pip install -qq -U flash-attn --no-build-isolation

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.1/779.1 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.1/168.1 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m71.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchaudio 2.3.0+cu121 requires torch==2.3.0, but you have torch 2.3.1 which is incompatible.
torchvision 0.18.0+cu121 requires torch==2.3.0, but you have torch 2.3.1 which is incompatible.[0m[31m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K  

In [None]:
import numpy as np
import random
import torch
import transformers

def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    transformers.set_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(2)

In [None]:
import os
import wandb

wandb.login(key="YOUR WANDB ACCESS TOKEN") #######################################

wandb_project = "Gemma-1.1-2b-it-Squad-Fine-Tuning"
if len(wandb_project) > 0:
    os.environ["WANDB_PROJECT"] = wandb_project


In [None]:
from huggingface_hub.hf_api import HfFolder
import os

hf_token = "YOUR HUGGING FACE ACCESS TOKEN" #######################################
os.environ["HF_TOKEN"] = hf_token
HfFolder.save_token(hf_token)

### Load Squad Dataset

In [None]:
from datasets import load_dataset

squad_v2 = False
datasets = load_dataset("squad_v2" if squad_v2 else "squad")
datasets

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 87599
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 10570
    })
})

In [None]:
train_dataset = datasets["train"]
valid_dataset = datasets["validation"]

In [None]:
train_dataset[0]

{'id': '5733be284776f41900661182',
 'title': 'University_of_Notre_Dame',
 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
 'answers': {'text': ['Saint Bernadette Soubirous'], 'answer_start': [515]}}

In [None]:
def format_instruct_prompt(data_point):
    prompt_template = """ Answer the follwing question from the given Context:
{context}

### Question:
{question}

### Answer:
{answer}"""

    data_point["prompt"] = prompt_template.format(context=data_point["context"],
                                                  question=data_point["question"],
                                                  answer=data_point["answers"]['text'][0])
    return data_point

In [None]:
import os

column_names = list(train_dataset.features)

train_dataset = train_dataset.map(format_instruct_prompt,
                                  num_proc=os.cpu_count(),
                                  remove_columns=column_names,
                                  desc="Applying chat template",)


valid_dataset = valid_dataset.map(format_instruct_prompt,
                                  num_proc=os.cpu_count(),
                                  remove_columns=column_names,
                                  desc="Applying chat template",)


In [None]:
valid_dataset[0]

### Set Model Repo ID or Path

In [None]:
model_id = "google/gemma-1.1-2b-it"

### Load Gemma Tokenizer

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

In [None]:
max_seq_length = 1024

tokenizer.model_max_length = max_seq_length

### Define Qunatization Config

In [None]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(load_in_4bit=True,
                                bnb_4bit_use_double_quant=True,
                                bnb_4bit_quant_type="nf4",
                                bnb_4bit_compute_dtype=torch.float16)

### Load Model

In [None]:
import torch

model = AutoModelForCausalLM.from_pretrained(model_id,
                                             quantization_config=bnb_config,
                                             # attn_implementation="flash_attention_2",
                                             trust_remote_code=True,
                                             device_map="auto",
                                             torch_dtype=torch.float16)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Define Peft Config

In [None]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit #if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
        if "lm_head" in lora_module_names: # Needed for 16bit
            lora_module_names.remove("lm_head")

    return list(lora_module_names)


In [None]:
target_modules = find_all_linear_names(model)
target_modules

['k_proj', 'up_proj', 'o_proj', 'v_proj', 'gate_proj', 'down_proj', 'q_proj']

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

qlora_config = LoraConfig(r=16,  # dimension of the updated matrices
                          lora_alpha=64,  # parameter for scaling
                          target_modules=target_modules, # this chooses on which layers QLoRA is applied
                          lora_dropout=0.1,  # dropout probability for layers
                          bias="none",
                          task_type="CAUSAL_LM")

In [None]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, qlora_config)
model.print_trainable_parameters()

trainable params: 19,611,648 || all params: 2,525,784,064 || trainable%: 0.7765


### Define Training Arguments

In [None]:
if torch.cuda.device_count() > 1: # If more than 1 GPU
    model.is_parallelizable = True
    model.model_parallel = True

In [None]:
output_dir = "Gemma-1.1-2b-it-Squad-Fine-Tuning"

os.makedirs(output_dir, exist_ok=True)

In [None]:
from transformers import TrainingArguments


training_arguments = TrainingArguments(output_dir=output_dir,
                                       overwrite_output_dir=True,
                                       warmup_steps=5,
                                       per_device_train_batch_size=5,
                                       per_device_eval_batch_size=5,
                                       gradient_accumulation_steps=2,
                                       gradient_checkpointing=True,
                                       num_train_epochs=1,
                                       learning_rate=2.0e-05,         # Want a small lr for finetuning
                                       lr_scheduler_type="cosine",
                                       fp16=True, ###############################
                                       bf16=False, ##############################
                                       optim="paged_adamw_8bit",
                                       weight_decay = 0.01,
                                       logging_dir="./logs",          # Directory for storing logs
                                       save_strategy="steps",         # Save the model checkpoint every logging step
                                       eval_strategy="epoch",         # Evaluate the model every logging step
                                       save_total_limit=5,
                                       do_eval=True,                  # Perform evaluation at the end of training

                                       push_to_hub=True,
                                       hub_strategy="checkpoint",
                                       hub_token=hf_token,

                                       seed=2,

                                       logging_steps=100,             # When to start reporting loss
                                       save_steps=100,                # Save checkpoints every 100 steps
                                       eval_steps=200,                # Evaluate and save checkpoints every 200 steps
                                       )


### Define Trainer

In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

In [None]:
from trl import SFTTrainer

trainer = SFTTrainer(model=model,
                     tokenizer=tokenizer,
                     args=training_arguments,
                     data_collator=data_collator,
                     max_seq_length=tokenizer.model_max_length,
                     train_dataset=train_dataset,
                     eval_dataset=valid_dataset,
                     peft_config=qlora_config,
                     dataset_text_field="prompt",
                     packing=False,
                    )

model.config.use_cache = False

### Start Fine - tuning

In [None]:
trainer_stats = trainer.train()
trainer_stats

[34m[1mwandb[0m: Currently logged in as: [33mmohamed-ahmed[0m. Use [1m`wandb login --relogin`[0m to force relogin




Epoch,Training Loss,Validation Loss




KeyboardInterrupt: 

In [None]:
# trainer.push_to_hub()
print("Fine Tuned Successfully")