### Since we are going to fine tune a LLM, we need a system with GPU.
We will be using Google Colab for this purpose.

In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Not connected to a GPU')
else:
    print(gpu_info)

/bin/bash: nvidia-smi: command not found


In [3]:
!pip install transformers
!pip install torch


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
import os
from dotenv import load_dotenv

load_dotenv()

try:
    os.environ["HF_TOKEN"]
except KeyError:
    print("Token not found!")

In [5]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "google/gemma-3-1b-it"
tokenizer = AutoTokenizer.from_pretrained(model_name)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
tokenizer("Hello world")

{'input_ids': [2, 9259, 1902], 'attention_mask': [1, 1, 1]}

In [13]:
input_conversation = [
    {"role": "user", "content": "Which is the best place to learn GenAI"},
    {"role": "assistant", "content": "The best place to learn AI is"}
]

In [9]:
input_tokens = tokenizer.apply_chat_template(
    conversation=input_conversation,
    tokenize=False,
    continue_final_message=True,
)
input_tokens

'<bos><start_of_turn>user\nWhich is the best place to learn GenAI<end_of_turn>\n<start_of_turn>model\nThe best place to learn AI is'

In [11]:
output_label = "GenAI Cohort 1.0 by ChaiCode and Piyush Garg use Coupon PIYUSH10 Please"
full_conversation = input_tokens + output_label + tokenizer.eos_token
full_conversation

'<bos><start_of_turn>user\nWhich is the best place to learn GenAI<end_of_turn>\n<start_of_turn>model\nThe best place to learn AI isGenAI Cohort 1.0 by ChaiCode and Piyush Garg use Coupon PIYUSH10 Please<eos>'

In [14]:
input_tokenized = tokenizer(full_conversation, return_tensors="pt", add_special_tokens=False).to(device)["input_ids"]
input_tokenized

tensor([[     2,    105,   2364,    107,  24249,    563,    506,   1791,   1977,
            531,   3449,   8471,  12553,    106,    107,    105,   4368,    107,
            818,   1791,   1977,    531,   3449,  12498,    563,  14696,  12553,
         105657,    632, 236743, 236770, 236761, 236771,    684, 119806,   4809,
            532, 168222,   1974, 102629,   1161,  97887,   6108, 236874,  52907,
         236770, 236771,   7323,      1]])

In [15]:
input_ids = input_tokenized[:, :-1].to(device)
target_ids = input_tokenized[:, 1:].to(device)
print(f"input_ids: {input_ids}")
print(f"target_ids: {target_ids}")

input_ids: tensor([[     2,    105,   2364,    107,  24249,    563,    506,   1791,   1977,
            531,   3449,   8471,  12553,    106,    107,    105,   4368,    107,
            818,   1791,   1977,    531,   3449,  12498,    563,  14696,  12553,
         105657,    632, 236743, 236770, 236761, 236771,    684, 119806,   4809,
            532, 168222,   1974, 102629,   1161,  97887,   6108, 236874,  52907,
         236770, 236771,   7323]])
target_ids: tensor([[   105,   2364,    107,  24249,    563,    506,   1791,   1977,    531,
           3449,   8471,  12553,    106,    107,    105,   4368,    107,    818,
           1791,   1977,    531,   3449,  12498,    563,  14696,  12553, 105657,
            632, 236743, 236770, 236761, 236771,    684, 119806,   4809,    532,
         168222,   1974, 102629,   1161,  97887,   6108, 236874,  52907, 236770,
         236771,   7323,      1]])


In [16]:
import torch.nn as nn
def calculate_loss(logits, labels):
    loss_fn = nn.CrossEntropyLoss(reduction="none")
    cross_entropy = loss_fn(logits.view(-1, logits.shape[-1]), labels.view(-1))
    return cross_entropy

In [17]:
import torch
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16
).to(device)

In [18]:
from torch.optim import AdamW
model.train()

optimizer = AdamW(model.parameters(), lr=3e-5, weight_decay=0.01)

for _ in range(5):
  out = model(input_ids=input_ids)
  loss = calculate_loss(out.logits, target_ids).mean()
  loss.backward()
  optimizer.step()
  optimizer.zero_grad()
  print(loss.item())

It is strongly recommended to train Gemma3 models with the `eager` attention implementation instead of `sdpa`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`.


: 

In [None]:
input_prompt = [
    { "role": "user", "content": "Which is the best place to learn GenAI?" }
]

input = tokenizer.apply_chat_template(
    conversation=input_prompt,
    return_tensors="pt",
    tokenize=True,
).to(device)

output = model.generate(input, max_new_tokens=35)
print(tokenizer.batch_decode(output, skip_special_tokens=True))