In [10]:
!pip install transformers transformers[torch] accelerate -U datasets



In [11]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [12]:
from huggingface_hub import login
from google.colab import userdata
api_key = userdata.get('HF_TOKEN')
login(api_key)

In [15]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset

# Loading model and tokenizer

In [13]:
model_id = "google/gemma-3-270m"

model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id, torch_dtype="float32")

# Ensure pad_token exists
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

print(f"Trainable parameters: {sum(p.numel() for p in model.parameters())}")

Trainable parameters: 268098176


# Load Dataset

In [16]:
dataset = load_dataset("wikitext", "wikitext-2-raw-v1")

# Just use train split for fine-tuning
train_data = dataset["train"]

README.md: 0.00B [00:00, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/733k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/6.36M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/657k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/4358 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/36718 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3760 [00:00<?, ? examples/s]

# Tokenization

In [17]:
def tokenize_function(examples):
    outputs = tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=128,
    )
    outputs["labels"] = outputs["input_ids"].copy()  # shift labels = input_ids
    return outputs

tokenized_train = train_data.map(tokenize_function, batched=True, remove_columns=["text"])

Map:   0%|          | 0/36718 [00:00<?, ? examples/s]

# Training setup

In [19]:
training_args = TrainingArguments(
    output_dir="./finetuned_gemma",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,  # helps with memory
    # evaluation_strategy="no",       # turn to "epoch" if you add validation
    save_strategy="epoch",
    learning_rate=5e-5,
    num_train_epochs=1,  # for demo, increase if needed
    logging_steps=50,
    fp16=torch.cuda.is_available(), # use FP16 if GPU supports it
    report_to="none",               # disable WandB logging
)

In [20]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
)

In [21]:
trainer.train()

It is strongly recommended to train Gemma3 models with the `eager` attention implementation instead of `sdpa`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`.


Step,Training Loss
50,3.3051
100,1.2283
150,1.2696
200,1.2873
250,1.1915
300,1.2438
350,1.3615
400,1.2375
450,1.2194
500,1.1727


TrainOutput(global_step=2295, training_loss=1.2310049202447364, metrics={'train_runtime': 1386.9463, 'train_samples_per_second': 26.474, 'train_steps_per_second': 1.655, 'total_flos': 2829135863414784.0, 'train_loss': 1.2310049202447364, 'epoch': 1.0})

# Saving the model

In [22]:
output_dir = "./finetuned_gemma"

In [23]:
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

('./finetuned_gemma/tokenizer_config.json',
 './finetuned_gemma/special_tokens_map.json',
 './finetuned_gemma/tokenizer.model',
 './finetuned_gemma/added_tokens.json',
 './finetuned_gemma/tokenizer.json')

# Push to HuggingFace Hub

In [None]:
from huggingface_hub import create_repo, HfApi, upload_folder

In [28]:
# Create repo (only first time)
repo_id = "Sharath1036/gemma3-270m-finetuned"
create_repo(repo_id, repo_type="model", exist_ok=True)

In [30]:
api = HfApi()

# Create the repository if it doesn't exist
create_repo(repo_id, repo_type="model", exist_ok=True)

# Now upload the folder
upload_folder(
    repo_id=repo_id,
    folder_path=output_dir,
    commit_message="Upload fine-tuned Gemma 3 270M model"
)

Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...tent/finetuned_gemma/tokenizer.json:  25%|##4       | 8.28MB / 33.4MB            

  ...ent/finetuned_gemma/tokenizer.model: 100%|##########| 4.69MB / 4.69MB            

  ...gemma/checkpoint-2295/rng_state.pth:  77%|#######7  | 11.3kB / 14.6kB            

  ..._gemma/checkpoint-2295/optimizer.pt:   0%|          |  529kB / 2.14GB            

  ...a/checkpoint-2295/model.safetensors:   0%|          |  553kB / 1.07GB            

  ...t/finetuned_gemma/model.safetensors:   0%|          |  553kB / 1.07GB            

  ...ned_gemma/checkpoint-2295/scaler.pt:   6%|5         |  79.0B / 1.38kB            

  ..._gemma/checkpoint-2295/scheduler.pt:   6%|5         |  84.0B / 1.47kB            

  ...a/checkpoint-2295/training_args.bin:   6%|5         |   331B / 5.78kB            

CommitInfo(commit_url='https://huggingface.co/Sharath1036/gemma3-270m-finetuned/commit/2498b0c6090766c983165006c578a1ab5915d259', commit_message='Upload fine-tuned Gemma 3 270M model', commit_description='', oid='2498b0c6090766c983165006c578a1ab5915d259', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Sharath1036/gemma3-270m-finetuned', endpoint='https://huggingface.co', repo_type='model', repo_id='Sharath1036/gemma3-270m-finetuned'), pr_revision=None, pr_num=None)

# Test the pushed model

In [31]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("Sharath1036/gemma3-270m-finetuned")
tokenizer = AutoTokenizer.from_pretrained("Sharath1036/gemma3-270m-finetuned")

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.07G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/128 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

In [45]:
prompt = "Narendra Modi is an Indian politician"

In [46]:
inputs = tokenizer(prompt, return_tensors="pt")

In [53]:
outputs = model.generate(
    **inputs,
    max_new_tokens=500,   # length of generation
    do_sample=True,      # sampling makes responses more natural
    top_p=0.9,           # nucleus sampling
    temperature=0.2      # creativity
)

In [56]:
print("ðŸ”¹ Prompt:", prompt)
print("ðŸ”¹ Output:", tokenizer.decode(outputs[0], skip_special_tokens=True))

ðŸ”¹ Prompt: Narendra Modi is an Indian politician
ðŸ”¹ Output: Narendra Modi is an Indian politician and former Prime Minister of India . He is the President of India and the Prime Minister of India . He is the first Indian to be elected to the Indian Parliament . He is the first Indian to be elected to the Indian Parliament . 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

