# Multi-Script Emotion Classification using Gemma-3

## Importing Modules

In [1]:
!pip install -U trl
!pip install -U bitsandbytes
!pip install -U wandb

Collecting trl
  Downloading trl-0.19.0-py3-none-any.whl.metadata (10 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets>=3.0.0->trl)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate>=1.4.0->trl)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate>=1.4.0->trl)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->accelerate>=1.4.0->trl)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=2.0.0->accelerate>=1.4.0->trl)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from 

In [119]:
from datasets import Dataset
import pandas as pd
import numpy as np
import kagglehub
import torch
import os

## Loading the Gemma Model

In [120]:
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("CUDA cache emptied.")
else:
    print("CUDA is not available, no cache to empty.")

CUDA cache emptied.


In [130]:
from transformers import AutoTokenizer
from transformers.models.gemma3 import Gemma3ForCausalLM

device = "cuda" if torch.cuda.is_available() else "cpu"

GEMMA_PATH = kagglehub.model_download("google/gemma-3/transformers/gemma-3-1b-it")

tokenizer = AutoTokenizer.from_pretrained(
    GEMMA_PATH,
    max_seq_length=1024, #Ensure matches with trainer
)
model = Gemma3ForCausalLM.from_pretrained(
    GEMMA_PATH,
    attn_implementation="eager",
    low_cpu_mem_usage=True,
    device_map="auto"
)
print(model)

Gemma3ForCausalLM(
  (model): Gemma3TextModel(
    (embed_tokens): Gemma3TextScaledWordEmbedding(262144, 1152, padding_idx=0)
    (layers): ModuleList(
      (0-25): 26 x Gemma3DecoderLayer(
        (self_attn): Gemma3Attention(
          (q_proj): Linear(in_features=1152, out_features=1024, bias=False)
          (k_proj): Linear(in_features=1152, out_features=256, bias=False)
          (v_proj): Linear(in_features=1152, out_features=256, bias=False)
          (o_proj): Linear(in_features=1024, out_features=1152, bias=False)
          (q_norm): Gemma3RMSNorm((256,), eps=1e-06)
          (k_norm): Gemma3RMSNorm((256,), eps=1e-06)
        )
        (mlp): Gemma3MLP(
          (gate_proj): Linear(in_features=1152, out_features=6912, bias=False)
          (up_proj): Linear(in_features=1152, out_features=6912, bias=False)
          (down_proj): Linear(in_features=6912, out_features=1152, bias=False)
          (act_fn): PytorchGELUTanh()
        )
        (input_layernorm): Gemma3RMSNorm((11

In [122]:
def count_trainable_parameters(model):
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    return params

count_trainable_parameters(model)

999885952

## Inference using Gemma

In [123]:
prompt = """<start_of_turn>user
Classify sentence to one (and only one) of these emotions: ["disgust","anger","sad","happy","fear","surprise"]. Just give one word answer. Even if you find multiple emotions, choose the best one! 
Sentence: "I'm very scared for my life, I don't know what to do next!" <end_of_turn>
<start_of_turn>model"""

input_ids = tokenizer(text=prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**input_ids, max_new_tokens=4)
text = tokenizer.batch_decode(
    outputs,
    skip_special_tokens=False,
    clean_up_tokenization_spaces=False
)
print(text[0])

<bos><start_of_turn>user
Classify sentence to one (and only one) of these emotions: ["disgust","anger","sad","happy","fear","surprise"]. Just give one word answer. Even if you find multiple emotions, choose the best one! 
Sentence: "I'm very scared for my life, I don't know what to do next!" <end_of_turn>
<start_of_turn>model
Fear<end_of_turn>



## Preprocessing Dataset

In [124]:
train_data = pd.read_csv('/kaggle/input/emoti-code-multi-script-emotion-classification-rel/competition_train.csv')
val_data = pd.read_csv('/kaggle/input/emoti-code-multi-script-emotion-classification-rel/competition_val.csv')

y_true_val = val_data['emotion']

In [126]:
examples = {
    "Kashmiri": train_data[train_data["language"]=="Kashmiri"].sample(3),
    "Manipuri": train_data[train_data["language"]=="Manipuri"].sample(3),
    "Santali": train_data[train_data["language"]=="Santali"].sample(3)
}

kashmiri_prompt = ""
for i in range(3):
    x = examples["Kashmiri"].iloc[i]
    kashmiri_prompt += f"""<start_of_turn>user
    Classify sentence to exactly one of these emotions: ["disgust","anger","sad","happy","fear","surprise"]. Just give one word answer. 
    Sentence: "{x["Sentence"]}" 
    <end_of_turn>
    <start_of_turn>model
    {x["emotion"]}
    <end_of_turn>\n"""

manipuri_prompt = ""
for i in range(3):
    x = examples["Manipuri"].iloc[i]
    manipuri_prompt += f"""<start_of_turn>user
    Classify sentence to exactly one of these emotions: ["disgust","anger","sad","happy","fear","surprise"]. Just give one word answer. 
    Sentence: "{x["Sentence"]}" 
    <end_of_turn>
    <start_of_turn>model
    {x["emotion"]}
    <end_of_turn>\n"""

santali_prompt = ""
for i in range(3):
    x = examples["Santali"].iloc[i]
    santali_prompt += f"""<start_of_turn>user
    Classify sentence to exactly one of these emotions: ["disgust","anger","sad","happy","fear","surprise"]. Just give one word answer. 
    Sentence: "{x["Sentence"]}" 
    <end_of_turn>
    <start_of_turn>model
    {x["emotion"]}
    <end_of_turn>\n"""

In [127]:
def generate_train_prompt(x):
    return f"""<start_of_turn>user
    Classify sentence to exactly one of these emotions: ["disgust","anger","sad","happy","fear","surprise"]. Just give one word answer. 
    Sentence: "{x["Sentence"]}" 
    <end_of_turn>
    <start_of_turn>model
    {x["emotion"]}
    <end_of_turn>"""

def generate_val_prompt(x):
    if x["language"] == "Kashmiri":
        prompt = kashmiri_prompt
    if x["language"] == "Santali":
        prompt = santali_prompt
    if x["language"] == "Manipuri":
        prompt = manipuri_prompt
    return prompt+f"""<start_of_turn>user
    You are an expert in {x["language"]} language. Based on your understanding of the "meaning" of the given sentence, classify this sentence to exactly one of these emotions: ["disgust","anger","sad","happy","fear","surprise"]. Just give one word answer, focus on the actual intention of the sentence and classify it properly. 
    Sentence: "{x["Sentence"]}" 
    <end_of_turn>
    <start_of_turn>model
    """

In [128]:
train_data = pd.DataFrame(train_data.apply(generate_train_prompt,axis=1),columns=["text"])
val_data_for_scoring = pd.DataFrame(val_data.apply(generate_val_prompt,axis=1),columns=["text"])
val_data = pd.DataFrame(val_data.apply(generate_train_prompt,axis=1),columns=["text"])[:400]

X_train = Dataset.from_pandas(train_data)
X_eval = Dataset.from_pandas(val_data)

In [129]:
tokenizer(val_data_for_scoring.iloc[4]['text'],return_tensors="pt")['input_ids'].shape

torch.Size([1, 492])

## Processing and Evaluating Model Output

In [131]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

def evaluate(y_true,y_pred):
    label_mapping = {"disgust":0,"anger":1,"sad":2,"happy":3,"fear":4,"surprise":5}
    
    y_true = np.array([label_mapping[label] for label in y_true])
    y_pred = np.array([label_mapping[label] for label in y_pred])
    
    accuracy = accuracy_score(y_true, y_pred)
    print("Overall Accuracy: ", accuracy)

    # Label wise accuracy
    unique_labels = np.unique(y_true)
    for label in unique_labels:
        label_mask = y_true == label
        label_accuracy = accuracy_score(y_true[label_mask], y_pred[label_mask])
        print("Accuracy for label ", label, ": ", label_accuracy)
    
    class_report = classification_report(y_true, y_pred, target_names=label_mapping.keys())
    print('\nClassification Report:\n', class_report)
    
    conf_matrix = confusion_matrix(y_true, y_pred, labels=[0, 1, 2, 3, 4, 5])
    print('\nConfusion Matrix:\n', conf_matrix)

In [132]:
from tqdm import tqdm

max_new_tokens = 4
temperature = 0.2

def predict(X_test, model, tokenizer):
    y_pred = []
    for i in tqdm(range(len(X_test)), desc="Predicting Sentiments"):
        prompt = X_test.iloc[i]["text"]
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        outputs = model.generate(
            **input_ids, 
            max_new_tokens=max_new_tokens, 
            do_sample=True,
            temperature=temperature
        )
        result = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()
        result = result.split("model")[4]
        if "disgust" in result:
            y_pred.append("disgust")
        elif "anger" in result:
            y_pred.append("anger")
        elif "sad" in result:
            y_pred.append("sad")
        elif "happy" in result:
            y_pred.append("happy")
        elif "fear" in result:
            y_pred.append("fear")
        elif "surprise" in result:
            y_pred.append("surprise")
        else:
            y_pred.append("none")

    return y_pred

def predict_for_submission(X_test, model, tokenizer):
    predictions = []
    for i in tqdm(range(len(X_test)), desc="Predicting Sentiments for Submission"):
        entry_id = X_test.iloc[i]["id"]
        prompt = X_test.iloc[i]["text"]
        input_ids = tokenizer(prompt, return_tensors="pt").to(device)
        model.eval()
        with torch.no_grad():  
            outputs = model.generate(
                **input_ids,
                max_new_tokens=max_new_tokens,
                do_sample=True,
                temperature=temperature
            )
        result = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().lower()
        result = result.split("model")[4]

        predicted_emotion = "happy"
        if "disgust" in result:
            predicted_emotion = "disgust"
        elif "anger" in result:
            predicted_emotion = "anger"
        elif "sad" in result:
            predicted_emotion = "sad"
        elif "happy" in result:
            predicted_emotion = "happy"
        elif "fear" in result:
            predicted_emotion = "fear"
        elif "surprise" in result:
            predicted_emotion = "surprise"

        predictions.append({"id": entry_id, "emotion": predicted_emotion})

    submission_df = pd.DataFrame(predictions)
    return submission_df

In [151]:
y_pred_val = predict(val_data_for_scoring[:100], model, tokenizer)
evaluate(y_true_val[:100],y_pred_val)

Predicting Sentiments: 100%|██████████| 100/100 [01:12<00:00,  1.38it/s]

Overall Accuracy:  0.36
Accuracy for label  0 :  0.3
Accuracy for label  1 :  0.2631578947368421
Accuracy for label  2 :  0.15789473684210525
Accuracy for label  3 :  0.5294117647058824
Accuracy for label  4 :  0.7
Accuracy for label  5 :  0.13333333333333333

Classification Report:
               precision    recall  f1-score   support

     disgust       0.10      0.30      0.15        10
       anger       0.38      0.26      0.31        19
         sad       0.43      0.16      0.23        19
       happy       0.56      0.53      0.55        17
        fear       0.48      0.70      0.57        20
    surprise       0.50      0.13      0.21        15

    accuracy                           0.36       100
   macro avg       0.41      0.35      0.34       100
weighted avg       0.43      0.36      0.36       100


Confusion Matrix:
 [[ 3  3  2  0  1  1]
 [ 7  5  2  1  4  0]
 [ 7  3  3  2  4  0]
 [ 5  0  0  9  2  1]
 [ 4  0  0  2 14  0]
 [ 5  2  0  2  4  2]]





## Finetuning the model

In [145]:
from peft import LoraConfig, PeftConfig, PeftModel
from kaggle_secrets import UserSecretsClient
from trl import SFTTrainer, SFTConfig
import bitsandbytes as bnb
import wandb

user_secrets = UserSecretsClient()
api_key = user_secrets.get_secret("WANDB_API_KEY")

peft_config = LoraConfig(
    lora_alpha=32,
    lora_dropout=0.05,
    r=2,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj", "down_proj"],
)

wandb.login(key=api_key)

training_arguments = SFTConfig(
    output_dir="checkpoints_run_5",
    num_train_epochs=1,
    gradient_checkpointing=True, 
    gradient_checkpointing_kwargs={"use_reentrant": False},
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit", 
    save_steps=15,
    logging_steps=15,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    max_grad_norm=0.3, 
    warmup_ratio=0.03, 
    lr_scheduler_type="constant",
    report_to="wandb",
    max_seq_length=1024,
    dataset_kwargs={
        "add_special_tokens": False,
        "append_concat_token": True, 
    },
    label_names=["labels"],
    average_tokens_across_devices=False,
    load_best_model_at_end=False,
    eval_strategy="steps",
    per_device_eval_batch_size=2,
    eval_steps=15,
    eval_accumulation_steps=1,
)

wandb.init(
    project="dlp_nppe1_gemma_3",
    entity="architkohli-msit",
    name="gemma-1b-lora-run-8",
    config=training_arguments.to_dict()
)
wandb.config.update(peft_config.to_dict())

model.config.use_cache = False
model.config.pretraining_tp = 1

trainer = SFTTrainer(
    model=model,
    train_dataset=X_train,
    eval_dataset=X_eval,
    peft_config=peft_config,
    processing_class=tokenizer,
    args=training_arguments
)



0,1
eval/loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁
eval/mean_token_accuracy,▁▅▆▆▇▇▇▇▇███████
eval/num_tokens,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
eval/runtime,▁▅▆█▆▅▆▄▄▆▆▄▅▆█▇
eval/samples_per_second,█▄▃▁▂▄▃▅▅▃▃▅▄▃▁▂
eval/steps_per_second,█▄▃▁▂▄▃▅▅▃▃▅▄▃▁▂
train/epoch,▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇████
train/grad_norm,█▄▃▃▂▁▂▁▁▂▁▂▁▁▁▂
train/learning_rate,▁████████▇▇▇▇▇▆▆

0,1
eval/loss,1.15109
eval/mean_token_accuracy,0.71123
eval/num_tokens,337420.0
eval/runtime,32.2232
eval/samples_per_second,12.413
eval/steps_per_second,6.207
train/epoch,0.26756
train/global_step,240.0
train/grad_norm,1.30033
train/learning_rate,0.00017


Adding EOS to train dataset:   0%|          | 0/7176 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/7176 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/7176 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/400 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/400 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/400 [00:00<?, ? examples/s]

In [146]:
count_trainable_parameters(model)

5843456

In [149]:
import shutil
for i in range(1):
    shutil.rmtree(f'/kaggle/working/checkpoints_run_4/')

In [150]:
from transformers.trainer_utils import get_last_checkpoint

last_checkpoint = None
if os.path.isdir(training_arguments.output_dir):
    last_checkpoint = get_last_checkpoint(training_arguments.output_dir)
if last_checkpoint is not None:
    print(f"Resuming training from checkpoint: {last_checkpoint}")
    trainer.train(resume_from_checkpoint=last_checkpoint)
else:
    print("No checkpoint found, starting training from scratch.")
    trainer.train()

No checkpoint found, starting training from scratch.


Step,Training Loss,Validation Loss
15,3.0207,1.828471
30,1.5133,1.506462
45,1.3699,1.371118
60,1.2567,1.315706
75,1.2001,1.26799
90,1.2129,1.234923
105,1.2106,1.217272
120,1.1321,1.19278
135,1.095,1.178166
150,1.1217,1.165662


## Submission

In [None]:
test_data = pd.read_csv('/kaggle/input/emoti-code-multi-script-emotion-classification-rel/competition_test.csv')
test_data["text"] = test_data.apply(generate_val_prompt,axis=1) 
y_pred_test = predict_for_submission(test_data,model,tokenizer)

Predicting Sentiments for Submission:  24%|██▍       | 571/2392 [06:21<20:03,  1.51it/s]

In [None]:
os.makedirs("submissions", exist_ok=True)
y_pred_test.to_csv("submissions/submission_1.csv", index=False)

In [None]:
y_pred_test

In [172]:
shutil.rmtree('/kaggle/working/submissions')