In [2]:
import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "offline"

## Install Dependencies

In [3]:
!pip install -q bitsandbytes
!pip install -q transformers accelerate datasets peft trl

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m465.5/465.5 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[?25h

## Import Libraries

In [4]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import SFTTrainer

⚙️  Running in WANDB offline mode


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


## Sign In to HuggingFace Hub

In [5]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Load Tokenizer + Model (4-bit) — QLoRA Style

In [6]:
base_model = "google/gemma-2b-it"

tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    load_in_4bit=True,              # QLoRA quantization
    torch_dtype=torch.float16,
    device_map="auto"
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/34.2k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

## Define LoRA configuration

In [7]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

## Load Dataset

In [9]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="data.jsonl")

Generating train split: 0 examples [00:00, ? examples/s]

## Define the Prompt Template

In [10]:
def format_example(example):
    instruction = example["instruction"]
    input_text = example["input"]
    output_text = example["output"]

    if input_text.strip() == "":
        prompt = f"""Below is a customer query. Write a helpful, polite, and accurate response.

### Instruction:
{instruction}

### Response:
{output_text}
"""
    else:
        prompt = f"""Below is an instruction and input. Write a helpful response.

### Instruction:
{instruction}

### Input:
{input_text}

### Response:
{output_text}
"""
    return {"text": prompt}

dataset = dataset.map(format_example)


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

## Training Arguments + SFTTrainer

In [11]:
import trl
print(trl.__version__)

0.25.1


In [12]:
from trl import SFTConfig
from peft import LoraConfig

# 1. Define LoRA config
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

# 2. Define SFT config (trainer args)
sft_config = SFTConfig(
    output_dir="gemma2b-support-lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    num_train_epochs=3,
    logging_steps=50,
    save_steps=500,
    warmup_ratio=0.03,
    max_grad_norm=1.0,
    bf16=False,
    fp16=True,
    packing=False,
    dataset_text_field="text",
    report_to="none"   # <— explicitly no logging to wandb
    # Note: we do NOT include tokenizer here
    # Also do NOT include peft_config here
)

# 3. Initialize trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    args=sft_config,
    peft_config=lora_config,
    processing_class=tokenizer  # If latest version *still allows* tokenizer param
)

# If tokenizer param is rejected in your version, remove it:
# trainer = SFTTrainer(
#     model=model,
#     train_dataset=dataset["train"],
#     args=sft_config,
#     peft_config=lora_config
# )

Adding EOS to train dataset:   0%|          | 0/5000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/5000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/5000 [00:00<?, ? examples/s]

In [13]:
# Ensure pad token exists
if tokenizer.pad_token_id is None:
    tokenizer.add_special_tokens({"pad_token": tokenizer.eos_token})

model.resize_token_embeddings(len(tokenizer))

# Align model config
model.config.pad_token_id = tokenizer.pad_token_id
model.config.eos_token_id = tokenizer.eos_token_id
if hasattr(model, "generation_config"):
    model.generation_config.pad_token_id = tokenizer.pad_token_id
    model.generation_config.eos_token_id = tokenizer.eos_token_id
else:
    model.generation_config = model.config  # fallback

In [14]:
trainer.train()


  return fn(*args, **kwargs)


Step,Training Loss
50,2.1097
100,1.2008
150,1.0756
200,0.9959
250,0.9558
300,0.9227
350,0.8841
400,0.8516
450,0.8415
500,0.8279


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  return fn(*args, **kwargs)
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


TrainOutput(global_step=939, training_loss=0.9308581265905541, metrics={'train_runtime': 4946.8121, 'train_samples_per_second': 3.032, 'train_steps_per_second': 0.19, 'total_flos': 3.626529202714214e+16, 'train_loss': 0.9308581265905541, 'entropy': 0.7845424632360409, 'num_tokens': 2513484.0, 'mean_token_accuracy': 0.7865063135887121, 'epoch': 3.0})

In [15]:
!zip -r gemma2b-support-lora-939.zip gemma2b-support-lora/checkpoint-939

  adding: gemma2b-support-lora/checkpoint-939/ (stored 0%)
  adding: gemma2b-support-lora/checkpoint-939/adapter_config.json (deflated 57%)
  adding: gemma2b-support-lora/checkpoint-939/chat_template.jinja (deflated 52%)
  adding: gemma2b-support-lora/checkpoint-939/rng_state.pth (deflated 26%)
  adding: gemma2b-support-lora/checkpoint-939/adapter_model.safetensors (deflated 8%)
  adding: gemma2b-support-lora/checkpoint-939/tokenizer_config.json (deflated 96%)
  adding: gemma2b-support-lora/checkpoint-939/scaler.pt (deflated 64%)
  adding: gemma2b-support-lora/checkpoint-939/tokenizer.model (deflated 51%)
  adding: gemma2b-support-lora/checkpoint-939/special_tokens_map.json (deflated 70%)
  adding: gemma2b-support-lora/checkpoint-939/trainer_state.json (deflated 73%)
  adding: gemma2b-support-lora/checkpoint-939/training_args.bin (deflated 53%)
  adding: gemma2b-support-lora/checkpoint-939/tokenizer.json (deflated 84%)
  adding: gemma2b-support-lora/checkpoint-939/scheduler.pt (deflate

In [16]:
!zip -r gema2b-support-lora.zip gemma2b-support-lora

  adding: gemma2b-support-lora/ (stored 0%)
  adding: gemma2b-support-lora/checkpoint-939/ (stored 0%)
  adding: gemma2b-support-lora/checkpoint-939/adapter_config.json (deflated 57%)
  adding: gemma2b-support-lora/checkpoint-939/chat_template.jinja (deflated 52%)
  adding: gemma2b-support-lora/checkpoint-939/rng_state.pth (deflated 26%)
  adding: gemma2b-support-lora/checkpoint-939/adapter_model.safetensors (deflated 8%)
  adding: gemma2b-support-lora/checkpoint-939/tokenizer_config.json (deflated 96%)
  adding: gemma2b-support-lora/checkpoint-939/scaler.pt (deflated 64%)
  adding: gemma2b-support-lora/checkpoint-939/tokenizer.model (deflated 51%)
  adding: gemma2b-support-lora/checkpoint-939/special_tokens_map.json (deflated 70%)
  adding: gemma2b-support-lora/checkpoint-939/trainer_state.json (deflated 73%)
  adding: gemma2b-support-lora/checkpoint-939/training_args.bin (deflated 53%)
  adding: gemma2b-support-lora/checkpoint-939/tokenizer.json (deflated 84%)
  adding: gemma2b-suppo