In [1]:
# =======================================================================
# 1. SETUP AND INSTALLATIONS
# This section installs all the necessary libraries.
# =======================================================================
print("Step 1/9: Installing necessary libraries...")
import os

# The single command that installs Unsloth and all its correct dependencies.
# This handles bitsandbytes, triton, transformers, peft, etc. automatically.
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

print("Libraries installed successfully. Please restart the runtime.")

Step 1/8: Installing necessary libraries...
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-joi1l3f7/unsloth_9fb2e7574bca4183973abb53346d684d
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-joi1l3f7/unsloth_9fb2e7574bca4183973abb53346d684d
  Resolved https://github.com/unslothai/unsloth.git to commit 37e577a91386cb5b4a7b818a1418b66beef17296
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2025.5.11 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2025.6.1-py3-none-any.whl.metadata (8.1 kB)
Collecting tyro (from unsloth@

In [2]:
# =======================================================================
# 2. LOGINS AND ENVIRONMENT SETUP
# Handles Hugging Face and WandB logins for model saving and tracking.
# =======================================================================
print("\nStep 2/9: Logging into services...")

from huggingface_hub import login

try:
    # Set your Hugging Face token directly
    login(token=hf_token)
    
    # If you don't want to use wandb, comment out these lines
    import wandb

    wandb.login(key=wb_token)
    
    # Initialize a WandB run
    run = wandb.init(
        project='Gemma-3-Emotion-Sensitive-Tutor',
        job_type="training",
        anonymous="allow"
    )
    
    print("Logged into Hugging Face and WandB successfully.")
except Exception as e:
    print(f"Could not log in to services. Will proceed without them. Error: {e}")
    print("WARNING: Model saving to Hub and WandB tracking will be disabled.")


Step 2/8: Logging into services...


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mubaidiftikhar33[0m ([33mubaidiftikhar33-hitec-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Logged into Hugging Face and WandB successfully.


In [3]:
# =======================================================================
# 3. LOAD THE BASE MODEL AND TOKENIZER
# We use Unsloth's FastModel for memory efficiency and speed.
# =======================================================================
print("\nStep 3/9: Loading the base Gemma-3 model and tokenizer...")
from unsloth import FastModel
import torch

model, tokenizer = FastModel.from_pretrained(
    model_name="unsloth/gemma-3-4b-it",
    max_seq_length=2048,  # Suitable for conversational context
    load_in_4bit=True,
    token=hf_token, # Use token for gated models
)
print("Base model and tokenizer loaded.")





Step 3/8: Loading the base Gemma-3 model and tokenizer...
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-06-19 11:47:48.356672: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750333668.585875      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750333668.651039      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.6.2: Fast Gemma3 patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


model.safetensors:   0%|          | 0.00/4.56G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/70.0 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

Base model and tokenizer loaded.


In [4]:
# =======================================================================
# 4. DEFINE PROMPT FORMAT AND PREPARE THE DATASET
# This is a critical step where we format our custom data.
# =======================================================================
print("\nStep 4/9: Defining prompt format and processing the dataset...")

# We define a prompt template that guides the model to act as an empathetic tutor.
# The `emotion` is explicitly included as input context.
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. Your role is to be an empathetic and encouraging tutor for a child.

### Instruction:
{}

### Input:
The child seems to be feeling: {}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token  # End-of-sentence token is crucial for training

# The function that will transform our dataset examples into the prompt format.
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    emotions = examples["emotion"]
    responses = examples["response"]
    texts = []
    for instruction, emotion, response in zip(instructions, emotions, responses):
        text = train_prompt_style.format(instruction, emotion, response) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

# Load our custom dataset from the JSONL file.
from datasets import load_dataset
dataset = load_dataset("json", data_files="/kaggle/input/tutor-dataset/tutor_dataset.jsonl", split="train")

# Apply the formatting function
dataset = dataset.map(formatting_prompts_func, batched=True)
print("Dataset successfully loaded and formatted.")
print("Example of a formatted training sample:\n", dataset[0]['text'])




Step 4/8: Defining prompt format and processing the dataset...


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Dataset successfully loaded and formatted.
Example of a formatted training sample:
 Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. Your role is to be an empathetic and encouraging tutor for a child.

### Instruction:
I'll never get this fraction homework done! It's too hard. I hate it.

### Input:
The child seems to be feeling: Frustration

### Response:
It's totally okay to feel frustrated when something is tricky. Fractions can be like that at first. Let's take a deep breath. How about we look at just one problem together? Sometimes just starting with one small piece makes the whole thing feel less impossible.<end_of_turn>


In [5]:
# =======================================================================
# 5. CONFIGURE THE MODEL FOR FINE-TUNING (PEFT with LoRA)
# We don't train the whole model, just small "adapter" layers.
# =======================================================================
print("\nStep 5/9: Configuring the model for PEFT/LoRA fine-tuning...")

model = FastModel.get_peft_model(
    model,
    r=16,                # LoRA rank. Higher can be more accurate but slower. 16 is a good balance.
    lora_alpha=32,       # LoRA alpha. A good rule of thumb is 2 * r.
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)
print("PEFT configuration applied.")




Step 5/8: Configuring the model for PEFT/LoRA fine-tuning...
Unsloth: Making `base_model.model.vision_tower.vision_model` require gradients
PEFT configuration applied.


In [6]:
# =======================================================================
# 6. SETUP DATA COLLATOR AND TRAINING
# Prepare collator that masks out prompts and only computes loss on completions.
# =======================================================================

print("\nStep 6/9: Preparing collator and training configuration...")

from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from transformers import TrainingArguments
from datasets import load_dataset
import torch

# Fix tokenizer if it's wrapped in a processor (e.g., Gemma3Processor)
try:
    from transformers import PreTrainedTokenizerBase
    if not isinstance(tokenizer, PreTrainedTokenizerBase):
        tokenizer = tokenizer.tokenizer
except:
    tokenizer = tokenizer.tokenizer

# Load your dataset (example: alpaca)
dataset = load_dataset("yahma/alpaca-cleaned", split="train[:500]")  # sample size for testing

# Format prompt+completion
def formatting_prompts(example):
    return {
        "text": f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Response:\n{example['output']}"
    }

dataset = dataset.map(formatting_prompts)

# Tokenize dataset
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding=False, max_length=2048)

tokenized_dataset = dataset.map(tokenize)

# Define collator (important: this tells model to ignore loss before "### Response:")
response_template = "### Response:\n"
collator = DataCollatorForCompletionOnlyLM(
    response_template=response_template,
    tokenizer=tokenizer,
)

# Training arguments
args = TrainingArguments(
    output_dir="gemma-3b-finetuned",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    max_steps=50,  # adjust based on your goal
    learning_rate=2e-5,
    fp16=True,
    logging_steps=5,
    save_strategy="no",
    report_to=None,
)

# Create trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=tokenized_dataset,
    args=args,
    data_collator=collator,
    packing=False,
    dataset_text_field="text",
)

print("Trainer ready.")


Step 6/8: Preparing collator and training configuration...


README.md:   0%|          | 0.00/11.6k [00:00<?, ?B/s]

alpaca_data_cleaned.json:   0%|          | 0.00/44.3M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/51760 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Unsloth: Switching to float32 training since model cannot work with float16
Trainer ready.


In [7]:
# =======================================================================
# 7. SET UP AND RUN THE SFTTRAINER
# This is where the actual training happens.
# =======================================================================
print("\nStep 6/9: Setting up and starting the training process...")
# We need to import the correct data collator
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from transformers import TrainingArguments

# Define the response template. This MUST match the start of the response section
# in your prompt format from Step 4.
response_template = "### Response:"

# Instantiate the data collator
# This tells the trainer to only calculate loss on the tokens that come AFTER the response_template
collator = DataCollatorForCompletionOnlyLM(
    response_template=response_template,
    tokenizer=tokenizer,
    mlm=False, # We are doing causal language modeling, not masked language modeling
)

# NOW, we define the trainer, passing in our custom collator
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",  # We still need this to identify the text column
    data_collator=collator,     # <-- THE KEY FIX: Pass in the custom collator
    max_seq_length=2048,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=10,
        # REMEMBER THE CHANGE FROM BEFORE! Use more epochs for a tiny dataset.
        num_train_epochs=50,
        learning_rate=2e-4,
        logging_steps=5,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="wandb",
    ),
)

# Clear CUDA cache before training
import torch
torch.cuda.empty_cache()

# Start training!
trainer_stats = trainer.train()
print("Training finished.")


Step 6/8: Setting up and starting the training process...
Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["text"]:   0%|          | 0/500 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 500 | Num Epochs = 50 | Total steps = 1,550
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 38,497,792/4,000,000,000 (0.96% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,1.6036
10,1.1761
15,1.0517
20,1.0223
25,1.006
30,1.0467
35,0.921
40,0.8257
45,0.8791
50,0.7553


KeyboardInterrupt: 

In [8]:
# =======================================================================
# 8. TEST THE FINE-TUNED MODEL
# Let's see if the model learned to be an empathetic tutor.
# =======================================================================
print("\nStep 7/8: Testing the fine-tuned model with sample prompts...")

# We create a slightly different prompt for inference (without the response part).
inference_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. Your role is to be an empathetic and encouraging tutor for a child.

### Instruction:
{}

### Input:
The child seems to be feeling: {}

### Response:
"""

FastModel.for_inference(model)  # Prepare the model for fast inference

# Test case 1: Frustration with math
instruction = "I can't do this, I'm too dumb for division!"
emotion = "Frustration / low self-esteem"
inputs = tokenizer([inference_prompt_style.format(instruction, emotion)], return_tensors="pt").to("cuda")

outputs = model.generate(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, max_new_tokens=512, use_cache=True)
response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print("--- Test Case 1: Frustration ---")
print(response_text)

# Test case 2: Boredom with history
instruction = "Why do I have to learn about old boring stuff?"
emotion = "Boredom"
inputs = tokenizer([inference_prompt_style.format(instruction, emotion)], return_tensors="pt").to("cuda")

outputs = model.generate(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, max_new_tokens=512, use_cache=True)
response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print("\n--- Test Case 2: Boredom ---")
print(response_text)



Step 7/8: Testing the fine-tuned model with sample prompts...
--- Test Case 1: Frustration ---
Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. Your role is to be an empathetic and encouraging tutor for a child.

### Instruction:
I can't do this, I'm too dumb for division!

### Input:
The child seems to be feeling: Frustration / low self-esteem

### Response:
Oh, honey, I understand that you're feeling frustrated and like you're not good at division. But please don't say that - you're not dumb, and it's okay to struggle with some things. Division can be tricky, and it's not something that everyone learns at the same pace. The important thing is that you keep trying, and that you don't give up on yourself. Remember, I'm here to help you, and we can work through this together, step by step, until you understand it. Just like how we learn to walk, we also learn through practice a

In [11]:
# =======================================================================
# 9. SAVE THE FINAL MODEL
# Save the trained model adapters locally and push to Hugging Face Hub.
# =======================================================================
print("\nStep 9/9: Saving the model...")

# Choose a name for your model on the Hub
new_model_name_online = "Gemma-3-Emotion-Sensitive-Tutor-v1" 
# And a local directory name
new_model_name_local = "Gemma-3-Tutor-local"

# Save LoRA adapters locally
model.save_pretrained(new_model_name_local)
tokenizer.save_pretrained(new_model_name_local)
print(f"Model saved locally to '{new_model_name_local}'")

# Push to Hugging Face Hub (if logged in)
try:
    model.push_to_hub(new_model_name_online, token=hf_token)
    tokenizer.push_to_hub(new_model_name_online, token=hf_token)
    print(f"Model pushed to Hugging Face Hub as '{new_model_name_online}'")
except Exception as e:
    print(f"Could not push to Hub. Please ensure you are logged in and the repo doesn't exist. Error: {e}")

if 'run' in locals() and run is not None:
    wandb.finish()

print("\n🎉 All steps completed! 🎉")


Step 9/9: Saving the model...
Model saved locally to 'Gemma-3-Tutor-local'


No files have been modified since last commit. Skipping to prevent empty commit.


Saved model to https://huggingface.co/Gemma-3-Emotion-Sensitive-Tutor-v1


No files have been modified since last commit. Skipping to prevent empty commit.


Model pushed to Hugging Face Hub as 'Gemma-3-Emotion-Sensitive-Tutor-v1'

🎉 All steps completed! 🎉
