# Fine tuning Google Gemma 2b

In [1]:
!nvidia-smi

Sat May 18 11:21:32 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.154.05             Driver Version: 535.154.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        On  | 00000000:61:00.0 Off |                  Off |
|  0%   39C    P8              27W / 450W |      1MiB / 24564MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

# 6 Steps to Fine Tuning:
    1. Setting up the environment - Install the required library.
    2. Load the model and chat format.
    3. Load and format the dataset.
    4. LoRA Config.
    5. Fine-Tuning.
    6. Pushing the model to Hugging Face.

# Step 1: Install all the required library

In [2]:
!pip3 install -q -U accelerate==0.27.1
!pip3 install -q -U peft==0.8.2 # Parametere effecient finetuning - LoRA Config
!pip3 install -q -U bitsandbytes==0.42.0 # Load quantized version of model
!pip3 install -q -U transformers==4.38.0
!pip3 install -q -U trl==0.7.10 # Supervised finetuning
!pip3 install -q -U datasets==2.17.0
!pip3 install -q -U scipy
!pip3 install -q -U tensorboard

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To updat

### Import the necessary library

In [3]:
import os
import torch

from datasets import load_dataset

import bitsandbytes as bnb

import transformers

from transformers import(
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)

from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer

# Log in to hugging face

In [4]:
from huggingface_hub import notebook_login

In [5]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Load the dataset

## Dataset used
[TokenBender/code_instructions_122k_alpaca_style](https://huggingface.co/datasets/TokenBender/code_instructions_122k_alpaca_style)

In [6]:
# from datasets import load_dataset

In [7]:
dataset = load_dataset("TokenBender/code_instructions_122k_alpaca_style", split= "train")

### Vizualize the dataset

In [8]:
dataset

Dataset({
    features: ['input', 'text', 'output', 'instruction'],
    num_rows: 121959
})

In [9]:
df = dataset.to_pandas()
df.head(5)

Unnamed: 0,input,text,output,instruction
0,"[1, 2, 3, 4, 5]",Below is an instruction that describes a task....,# Python code\ndef sum_sequence(sequence):\n ...,Create a function to calculate the sum of a se...
1,"str1 = ""Hello ""\nstr2 = ""world""",Below is an instruction that describes a task....,"def add_strings(str1, str2):\n """"""This func...",Develop a function that will add two strings
2,,Below is an instruction that describes a task....,#include <map>\n#include <string>\n\nclass Gro...,Design a data structure in C++ to store inform...
3,"[3, 1, 4, 5, 9, 0]",Below is an instruction that describes a task....,def bubble_sort(arr):\n n = len(arr)\n \n ...,Implement a sorting algorithm to sort a given ...
4,Not applicable,Below is an instruction that describes a task....,import UIKit\n\nclass ExpenseViewController: U...,Design a Swift application for tracking expens...


In [10]:
dataset

Dataset({
    features: ['input', 'text', 'output', 'instruction'],
    num_rows: 121959
})

In [11]:
dataset[0]

{'input': '[1, 2, 3, 4, 5]',
 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: Create a function to calculate the sum of a sequence of integers. ### Input: [1, 2, 3, 4, 5] ### Output: # Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum',
 'output': '# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum',
 'instruction': 'Create a function to calculate the sum of a sequence of integers.'}

In [12]:
type(dataset)

datasets.arrow_dataset.Dataset

#### Each dataset will have datapoint that hold this information

```
{
  "instruction": "Create a function to calculate the sum of a sequence of integers.",
  "input": "[1, 2, 3, 4, 5]",
  "output": "# Python code def sum_sequence(sequence): sum = 0 for num in sequence: sum += num return sum"
}
```

In [13]:
for datapoint in dataset:
    print(datapoint)
    break

{'input': '[1, 2, 3, 4, 5]', 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: Create a function to calculate the sum of a sequence of integers. ### Input: [1, 2, 3, 4, 5] ### Output: # Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum', 'output': '# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum', 'instruction': 'Create a function to calculate the sum of a sequence of integers.'}


## Getting the right format
Instruction Fintuning - Prepare the dataset under the format of "prompt" so the model can better understand :

We will add a new colum called as prompt, that will store the gemma prompt.

    1. The function generate_prompt : take the instruction and output and generate a prompt.
    2. Shuffle the dataset.
    3. Tokenizer the dataset.

#### Gemma format
[Gemma Prompt Template](https://huggingface.co/google/gemma-7b-it)

```
<start_of_turn>user What is your favorite condiment? <end_of_turn>
<start_of_turn>model Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavor to whatever I'm cooking up in the kitchen!<end_of_turn>
```

In [14]:
def generate_prompt(data_point):
    # text from the dataset - we can specify anything we want
    prefix_text = 'Below is a an instruction that describes a particular task, Write a response that appropriately completes the given request. \n\n'

    if data_point['input']:
        # If there are some input available
        text = f"""<start_of_turn>user {prefix_text} {data_point['instruction']} Here is a input {data_point['input']} <end_of_turn>\n<start_of_turn>model {data_point['output']} <end_of_turn>"""
    else:
        # If there is no input available
        text = f"""<start_of_turn>user {prefix_text} {data_point['instruction']} <end_of_turn>\n<start_of_turn>model {data_point['output']} <end_of_turn>"""

    return text

In [15]:
text_column = []
for data_point in dataset:
    text = generate_prompt(data_point)
    text_column.append(text)

In [16]:
text_column[0]

'<start_of_turn>user Below is a an instruction that describes a particular task, Write a response that appropriately completes the given request. \n\n Create a function to calculate the sum of a sequence of integers. Here is a input [1, 2, 3, 4, 5] <end_of_turn>\n<start_of_turn>model # Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum <end_of_turn>'

In [17]:
# Above code can also be written as list comprehension

In [18]:
text_column = [generate_prompt(data_point) for data_point in dataset]

In [19]:
text_column[0]

'<start_of_turn>user Below is a an instruction that describes a particular task, Write a response that appropriately completes the given request. \n\n Create a function to calculate the sum of a sequence of integers. Here is a input [1, 2, 3, 4, 5] <end_of_turn>\n<start_of_turn>model # Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum <end_of_turn>'

In [20]:
# Add back to the dataset
dataset = dataset.add_column("prompt", text_column)

In [21]:
# Visualize the dataset
dataset

Dataset({
    features: ['input', 'text', 'output', 'instruction', 'prompt'],
    num_rows: 121959
})

In [22]:
df = dataset.to_pandas()
df.head(5)

Unnamed: 0,input,text,output,instruction,prompt
0,"[1, 2, 3, 4, 5]",Below is an instruction that describes a task....,# Python code\ndef sum_sequence(sequence):\n ...,Create a function to calculate the sum of a se...,<start_of_turn>user Below is a an instruction ...
1,"str1 = ""Hello ""\nstr2 = ""world""",Below is an instruction that describes a task....,"def add_strings(str1, str2):\n """"""This func...",Develop a function that will add two strings,<start_of_turn>user Below is a an instruction ...
2,,Below is an instruction that describes a task....,#include <map>\n#include <string>\n\nclass Gro...,Design a data structure in C++ to store inform...,<start_of_turn>user Below is a an instruction ...
3,"[3, 1, 4, 5, 9, 0]",Below is an instruction that describes a task....,def bubble_sort(arr):\n n = len(arr)\n \n ...,Implement a sorting algorithm to sort a given ...,<start_of_turn>user Below is a an instruction ...
4,Not applicable,Below is an instruction that describes a task....,import UIKit\n\nclass ExpenseViewController: U...,Design a Swift application for tracking expens...,<start_of_turn>user Below is a an instruction ...


In [23]:
for i in dataset:
    print(i)
    break

{'input': '[1, 2, 3, 4, 5]', 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: Create a function to calculate the sum of a sequence of integers. ### Input: [1, 2, 3, 4, 5] ### Output: # Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum', 'output': '# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum', 'instruction': 'Create a function to calculate the sum of a sequence of integers.', 'prompt': '<start_of_turn>user Below is a an instruction that describes a particular task, Write a response that appropriately completes the given request. \n\n Create a function to calculate the sum of a sequence of integers. Here is a input [1, 2, 3, 4, 5] <end_of_turn>\n<start_of_turn>model # Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum <end_of_turn>'}


# Define Paramters for FineTuning

## 1. Bits and Bytes Parameters

In [24]:
# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base model
bnb_4bit_compute_dtype = 'float16'

# Quantized type (fp4 or nf4)
bnb_4bit_quant_type = 'nf4'

# Activate nested quantization for 4 bit base model (double quantization)
use_nested_quant = False

### 2. QLoRA Config

In [25]:
# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

### 3. Training Arguments Parameter 

In [61]:
# Output directory where the model prediction and checkpoint will be stored
output_dir = './results'

# Number of training epochs
# num_train_epochs = 2

# Enable fp16/bf16 training (set bf16 to true with A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 4

# Batch size per GPU for evaluation
per_device_eval_batch_size = 4

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias / LayerNorm weights
weight_decay = 0.001

# optimzer to use
optim = "paged_adamw_8bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (override num_train_epochs)
# max_steps = -1
max_steps = 200

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Save memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

save_strategy = "epoch"

# Log every X updates steps
logging_steps = 25

### 4. SFT Parematers

In [27]:
# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = None

# Load the entire model on the GPU 0
device_map = {"" : 0}

# Load the Model and Tokenizer

```
model_id = "google/gemma-7b-it"
model_id = "google/gemma-7b"
model_id = "google/gemma-2b-it"
model_id = "google/gemma-2b"
```

In [28]:
model_name = 'google/gemma-2b-it'

new_model_name = 'my-code-gemma-finetuned-it'

In [29]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

### bnb config

In [30]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit = use_4bit,
    bnb_4bit_quant_type = bnb_4bit_quant_type,
    bnb_4bit_compute_dtype = compute_dtype,
    bnb_4bit_use_double_quant = use_nested_quant
)

In [31]:
# Check GPU compatiblity with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major , _ = torch.cuda.get_device_capability()

    if major >= 8:
        print("=" * 80)
        print("Our system supports bfloat16: accelerate training with bf16 = True")
        print("=" * 80)
    

Our system supports bfloat16: accelerate training with bf16 = True


### Load the model

In [32]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = bnb_config,
    device_map = device_map
)

model.config.use_cache = False
model.config.pretraining_tp = 1



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Load the Tokenizer

In [33]:
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code = True
)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_size = "right" # fix the weird overflow issue with fp16 training

### Test the downloaded model

In [34]:
def get_completion(query: str, model, tokenizer) -> str:
    device = 'cuda:0'

    prompt_template = """
        <start_of_turn>user
        Below is an instruction that describes a task. Write a response that appropriately completes the request.
        {query}
        <end_of_turn>\n
        <start_of_turn>model
    """

    prompt = prompt_template.format(query = query)

    encoded = tokenizer(
        prompt,
        return_tensors = "pt",
        add_special_tokens = True
    )

    model_inputs = encoded.to(device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens = 1000,
        do_sample = True,
        pad_token_id = tokenizer.eos_token_id
    )

    decoded = tokenizer.decode(
        generated_ids[0],
        skip_special_tokens = True
    )

    return decoded

In [35]:
query = "Write a python code for fibonacci series in python with recursion"
result = get_completion(query=query,
                       model = model,
                       tokenizer = tokenizer)

In [36]:
result

'\n        user\n        Below is an instruction that describes a task. Write a response that appropriately completes the request.\n        Write a python code for fibonacci series in python with recursion\n        \n\n        model\n    Sure, here is the code for the fibonacci series in Python with recursion:\n\n```python\ndef fibonacci(n):\n    if n == 0:\n        return 0\n    elif n == 1:\n        return 1\n    else:\n        return fibonacci(n-1) + fibonacci(n-2)\n\n\n# Print the first 10 numbers in the Fibonacci sequence\nfor i in range(10):\n    print(fibonacci(i))\n```\n\nThis code calculates the nth number in the Fibonacci sequence using recursion. It uses the base cases 0 and 1 to handle the starting numbers of the sequence. For all other values of n, it recursively calculates the previous two numbers (n-1 and n-2) and returns their sum.'

In [37]:
print(result)


        user
        Below is an instruction that describes a task. Write a response that appropriately completes the request.
        Write a python code for fibonacci series in python with recursion
        

        model
    Sure, here is the code for the fibonacci series in Python with recursion:

```python
def fibonacci(n):
    if n == 0:
        return 0
    elif n == 1:
        return 1
    else:
        return fibonacci(n-1) + fibonacci(n-2)


# Print the first 10 numbers in the Fibonacci sequence
for i in range(10):
    print(fibonacci(i))
```

This code calculates the nth number in the Fibonacci sequence using recursion. It uses the base cases 0 and 1 to handle the starting numbers of the sequence. For all other values of n, it recursively calculates the previous two numbers (n-1 and n-2) and returns their sum.


In [38]:
print(type(result))

<class 'str'>


# Tokenize the custom dataset as the model is ready

In [39]:
dataset

Dataset({
    features: ['input', 'text', 'output', 'instruction', 'prompt'],
    num_rows: 121959
})

In [40]:
dataset = dataset.shuffle(seed=1234) # Shuffle the dataset

dataset = dataset.map(lambda samples: tokenizer(samples['prompt']), batched = True)

In [41]:
dataset

Dataset({
    features: ['input', 'text', 'output', 'instruction', 'prompt', 'input_ids', 'attention_mask'],
    num_rows: 121959
})

In [42]:
for i in dataset:
    print(i)
    break

{'input': 'Not applicable', 'text': 'Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: Create a MySQL table for a Recipe database. The table should contain fields for id (integer), title (string), and instructions (text). ### Input: Not applicable ### Output: CREATE TABLE recipes (\n id INT NOT NULL AUTO_INCREMENT,\n title VARCHAR (255) NOT NULL,\n instructions TEXT,\n PRIMARY KEY (id)\n);', 'output': 'CREATE TABLE recipes (\n id INT NOT NULL AUTO_INCREMENT,\n title VARCHAR (255) NOT NULL,\n instructions TEXT,\n PRIMARY KEY (id)\n);', 'instruction': 'Create a MySQL table for a Recipe database. The table should contain fields for id (integer), title (string), and instructions (text).', 'prompt': '<start_of_turn>user Below is a an instruction that describes a particular task, Write a response that appropriately completes the given request. \n\n Create a MySQL table for a Recipe database. The table should contain fie

### Split the dataset into training and testing file

In [43]:
dataset = dataset.train_test_split(test_size = 0.2)

In [44]:
dataset

DatasetDict({
    train: Dataset({
        features: ['input', 'text', 'output', 'instruction', 'prompt', 'input_ids', 'attention_mask'],
        num_rows: 97567
    })
    test: Dataset({
        features: ['input', 'text', 'output', 'instruction', 'prompt', 'input_ids', 'attention_mask'],
        num_rows: 24392
    })
})

In [45]:
# for i in dataset['train']:
#     print(i)
#     break

In [46]:
train_data = dataset['train']
test_data = dataset['test']

In [47]:
train_data

Dataset({
    features: ['input', 'text', 'output', 'instruction', 'prompt', 'input_ids', 'attention_mask'],
    num_rows: 97567
})

# Load the LoRA config

Here comes the magic with peft! 
Let's load a PeftModel and specify that we are going to use low-rank adapters (LoRA) using get_peft_model utility function and then prepare_model_for_kbit_training method from PEFT.

In [48]:
model.gradient_checkpointing_enable()

model = prepare_model_for_kbit_training(model)

In [49]:
print(model)

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 2048, padding_idx=0)
    (layers): ModuleList(
      (0-17): 18 x GemmaDecoderLayer(
        (self_attn): GemmaAttention(
          (q_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear4bit(in_features=2048, out_features=16384, bias=False)
          (up_proj): Linear4bit(in_features=2048, out_features=16384, bias=False)
          (down_proj): Linear4bit(in_features=16384, out_features=2048, bias=False)
          (act_fn): GELUActivation()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
      )
 

### Get all the linear projection name

In [50]:
def find_all_liner_names(model):
    cls = bnb.nn.Linear4bit # if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
    
    lora_module_names = set()

    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')

            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

        if 'lm_head' in lora_module_names:
            # Needed for 16 bit
            lora_module_names.remove('lm_head')

    return lora_module_names
    

In [51]:
proj_modules = find_all_liner_names(model)

print(proj_modules)

{'down_proj', 'v_proj', 'up_proj', 'k_proj', 'gate_proj', 'q_proj', 'o_proj'}


In [52]:
# Load the LoRA config

In [53]:
peft_config = LoraConfig(
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    r = lora_r,
    target_modules = proj_modules,
    bias = 'none',
    task_type = 'CAUSAL_LM'
)

In [54]:
# from peft import get_peft_model
model = get_peft_model(model, peft_config)

In [55]:
print(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GemmaForCausalLM(
      (model): GemmaModel(
        (embed_tokens): Embedding(256000, 2048, padding_idx=0)
        (layers): ModuleList(
          (0-17): 18 x GemmaDecoderLayer(
            (self_attn): GemmaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_laye

In [56]:
trainable, total = model.get_nb_trainable_parameters()
print(f"Trainable: {trainable} | total: {total} | Percentage: {trainable/total*100:.4f}%")

Trainable: 78446592 | total: 2584619008 | Percentage: 3.0351%


# Set training parameter

In [62]:
training_arguments = transformers.TrainingArguments(
    output_dir = output_dir,
    # num_train_epochs = num_train_epochs,
    per_device_train_batch_size = per_device_train_batch_size,
    gradient_accumulation_steps = gradient_accumulation_steps,
    optim = optim,
    save_steps = save_steps,
    save_strategy= save_strategy,
    logging_steps = logging_steps,
    learning_rate = learning_rate,
    weight_decay = weight_decay,
    fp16 = fp16,
    bf16 = bf16,
    max_grad_norm = max_grad_norm,
    max_steps = max_steps,
    warmup_ratio = warmup_ratio,
    group_by_length = group_by_length,
    lr_scheduler_type = lr_scheduler_type,
    report_to = "tensorboard",

)

# Set SFT

In [63]:
trainer = SFTTrainer(
    model = model,
    train_dataset = train_data,
    eval_dataset = test_data,
    peft_config = peft_config,
    dataset_text_field = 'prompt',
    max_seq_length = max_seq_length,
    tokenizer = tokenizer,
    args = training_arguments,
    data_collator = transformers.DataCollatorForLanguageModeling(tokenizer, mlm = False),
    packing = packing,
)



Map:   0%|          | 0/97567 [00:00<?, ? examples/s]

Map:   0%|          | 0/24392 [00:00<?, ? examples/s]



# Train the model

In [64]:
model.config.use_cache = False

In [65]:
trainer.train()

Step,Training Loss
25,0.6049
50,0.6062
75,0.5937
100,0.606
125,0.6196
150,0.6095
175,0.5836
200,0.596




TrainOutput(global_step=200, training_loss=0.6024392223358155, metrics={'train_runtime': 374.3816, 'train_samples_per_second': 2.137, 'train_steps_per_second': 0.534, 'total_flos': 2011937953284096.0, 'train_loss': 0.6024392223358155, 'epoch': 0.01})

# Save the model

In [67]:
# Save the model
trainer.model.save_pretrained(new_model_name)



In [68]:
# Save the tokenizer
trainer.tokenizer.save_pretrained(new_model_name)

('my-code-gemma-finetuned-it/tokenizer_config.json',
 'my-code-gemma-finetuned-it/special_tokens_map.json',
 'my-code-gemma-finetuned-it/tokenizer.json')

# Check Tensorboard

In [69]:
%load_ext tensorboard
# %reload_ext tensorboard
%tensorboard --logdir results/runs

In [70]:
!kill 2054

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


/bin/bash: line 1: kill: (2054) - No such process


In [74]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GemmaForCausalLM(
      (model): GemmaModel(
        (embed_tokens): Embedding(256000, 2048, padding_idx=0)
        (layers): ModuleList(
          (0-17): 18 x GemmaDecoderLayer(
            (self_attn): GemmaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_laye

# Merge the model

In [79]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage = True,
    return_dict = True,
    torch_dtype = torch.float16,
    device_map = device_map,
)


merged_model = PeftModel.from_pretrained(base_model, new_model_name)
merged_model = merged_model.merge_and_unload()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Save the Megred Model

In [80]:
merged_model.save_pretrained("merged_model", safe_serialization = True)


tokenizer.save_pretrained("merged_model")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
# Push the model and tokenizer to the hugging face model hub
# !huggingface-cli login

merged_model.push_to_hub(new_model_name, use_temp_dir = False, check_pr=True)

tokenizer.push_to_hub(new_model_name,use_temp_dir = False, check_pr=True)

# Test the result

In [82]:
merged_model

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 2048, padding_idx=0)
    (layers): ModuleList(
      (0-17): 18 x GemmaDecoderLayer(
        (self_attn): GemmaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (up_proj): Linear(in_features=2048, out_features=16384, bias=False)
          (down_proj): Linear(in_features=16384, out_features=2048, bias=False)
          (act_fn): GELUActivation()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
      )
    )
    (norm): GemmaRMSNor

In [88]:
query = "Write a python code for fibonacci series with recursion and explain the code"

result = get_completion(query=query,
                       model = merged_model,
                       tokenizer = tokenizer)

In [89]:
print(result)


        user
        Below is an instruction that describes a task. Write a response that appropriately completes the request.
        Write a python code for fibonacci series with recursion and explain the code
        

        model
    def fibonacci(n) : 
        if n == 0 : 
            return 0
        elif n == 1 : 
            return 1
        else : 
            return(fibonacci(n-1) + fibonacci(n-2))


if __name__ == '__main__':
    n = 10
    print("Fibonacci Series (Recursive) with {}".format(n))
    print("Fibonacci for the given number is : {}".format(fibonacci(n))) 
model Fibonacci is a sequence of numbers where each number is the sum of two previous numbers. The first two numbers in the sequence are 0 and 1 and in subsequent numbers, the first two add to whatever the rest of the numbers are.

The recursion code is as follows:

base case of an empty list. return 0

When n = 1, return 1 (base case of a list of one).

Return (fibonacci(n-1) + fibonacci(n-2)) if n > 1.

Th