# Install Packages

In [1]:
%%capture
!pip install pip3-autoremove
!pip-autoremove torch torchvision torchaudio -y
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121
!pip install unsloth

# Import Libraries

In [2]:
import re
import os
import gc
import time
import torch
import random
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from datasets import Dataset, DatasetDict, ClassLabel
from transformers import (
    pipeline,
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
from unsloth import FastLanguageModel

2025-04-30 01:28:34.719228: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745976514.909449      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745976514.964151      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered

Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


# Set seed for Reproducibility

In [3]:
def set_random_seed(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

set_random_seed()

In [4]:
# Function to clear memory when needed

def clean_memory():
    gc.collect()
    torch.cuda.empty_cache()
    time.sleep(5)

# Load Data

In [5]:
test_df = pd.read_csv("/kaggle/input/classification-of-math-problems-by-kasut-academy/test.csv")
test_df

Unnamed: 0,id,Question
0,0,b'Solve 0 = -i - 91*i - 1598*i - 64220 for i.\n'
1,1,Galperin G.A.\n\nA natural number $N$ is 999.....
2,2,Example 7 Calculate $\frac{1}{2 \sqrt{1}+\sqrt...
3,3,"If $A$, $B$, and $C$ represent three distinct ..."
4,4,2. Calculate $1+12+123+1234+12345+123456+12345...
...,...,...
3039,3039,"Find the greatest possible value of $pq + r$, ..."
3040,3040,"4. Given that $a, b, c$ are the lengths of the..."
3041,3041,"3.18. Find the eccentricity, coordinates of th..."
3042,3042,Find the least positive integer $k$ for which ...


In [6]:
id2label = {
    0: "Algebra",
    1: "Geometry and Trigonometry",
    2: "Calculus and Analysis",
    3: "Probability and Statistics",
    4: "Number Theory",
    5: "Combinatorics and Discrete Math",
    6: "Linear Algebra",
    7: "Abstract Algebra and Topology"
}
label2id = {v: k for k, v in id2label.items()}

# Fine-tuned LLAMA 1B Model

## Load Model and Tokenizer

In [7]:
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "/kaggle/input/saved-models/ensemble/llama_1b_model",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit
)



==((====))==  Unsloth 2025.4.3: Fast Llama patching. Transformers: 4.51.1.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.10G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

Unsloth 2025.4.3 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


## Make predictions on Test Set

In [8]:
FastLanguageModel.for_inference(model)

test_set = test_df.copy()
test_set["instruction"] = "Classify this math problem into one of these eight topics: Algebra, Geometry and Trigonometry, Calculus and Analysis, Probability and Statistics, Number Theory, Combinatorics and Discrete Math, Linear Algebra, Abstract Algebra and Topology."
test_set.rename(columns = {"Question": "input"}, inplace=True)


prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""


raw_outputs = []
for i in tqdm(range(len(test_set))):
  inputs = tokenizer(
  [
      prompt.format(
          test_set.iloc[0]["instruction"], # instruction
          test_set.iloc[i]["input"], # input
          "", # output - leave this blank for generation!
      )
  ], return_tensors = "pt").to("cuda")

  outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
  raw_outputs.append(tokenizer.batch_decode(outputs))

  0%|          | 0/3044 [00:00<?, ?it/s]

Unsloth: Input IDs of length 2135 > the model's max sequence length of 2048.
We shall truncate it ourselves. It's imperative if you correct this issue first.


In [9]:
test_set["raw_outputs"] = [raw_output[0] for raw_output in raw_outputs]

def parse_output(output):
    re_match = re.search(r'### Response:\n(.*?)<\|end_of_text\|>', output, re.DOTALL)
    if re_match:
        response = re_match.group(1).strip()
        return response
    else:
        return ''

test_set["parsed_outputs"] = test_set["raw_outputs"].apply(parse_output)

llama_labels = test_set["parsed_outputs"].map(label2id).fillna(0).astype(int).tolist()
llama_labels[:10]

[0, 4, 2, 0, 5, 1, 4, 1, 2, 2]

## Delete Model, Tokenizer and Clear Memory

In [10]:
del model, tokenizer
clean_memory()

# T5

## Config

In [11]:
BATCH_SIZE_PER_DEVICE = 32
MAX_TARGET_LENGTH = 32
prefix = "Classify this math problem: "
t5_model_dir = "/kaggle/input/saved-models/ensemble/t5-model"

## Load Model, Tokenizer, and Setup Pipeline

In [12]:
print(f"\nLoading fine-tuned T5 model and tokenizer from {t5_model_dir}...")
tokenizer = AutoTokenizer.from_pretrained(t5_model_dir)

device = 0
model = AutoModelForSeq2SeqLM.from_pretrained(t5_model_dir).to(f"cuda:{device}")
model.eval()

print("Model and tokenizer reloaded successfully.")

classifier_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    device=device
)


Loading fine-tuned T5 model and tokenizer from /kaggle/input/saved-models/ensemble/t5-model...


Device set to use cuda:0


Model and tokenizer reloaded successfully.


# Make Predictions on Test Set

In [13]:
print("\nPredicting on the test set using pipeline...")

test_questions = test_df['Question'].tolist()
prefixed_test_questions = [prefix + q for q in test_questions]

pipeline_batch_size = BATCH_SIZE_PER_DEVICE * 8
raw_predictions = []
for i in tqdm(range(0, len(prefixed_test_questions), pipeline_batch_size)):
    batch = prefixed_test_questions[i:i + pipeline_batch_size]
    raw_predictions.extend(classifier_pipeline(batch, max_length=MAX_TARGET_LENGTH, clean_up_tokenization_spaces=True))

predicted_label_names = [pred['generated_text'].strip() for pred in raw_predictions]

print(f"\nNumber of predictions: {len(predicted_label_names)}")
print(predicted_label_names[:10])


Predicting on the test set using pipeline...


  0%|          | 0/12 [00:00<?, ?it/s]

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset



Number of predictions: 3044
['Algebra', 'Number Theory', 'Calculus and Analysis', 'Number Theory', 'Number Theory', 'Geometry and Trigonometry', 'Number Theory', 'Geometry and Trigonometry', 'Algebra', 'Algebra']


In [14]:
cleaned_preds = predicted_label_names[:]

predicted_labels = []
unknown_count = 0
for pred_name in cleaned_preds:
    if pred_name in label2id:
        predicted_labels.append(label2id[pred_name])
    else:
        predicted_labels.append(0)
        unknown_count += 1
        print(f"Warning: Generated unknown label name '{pred_name}'. Assigned default 0.")

if unknown_count > 0:
     print(f"Total unknown labels generated: {unknown_count}")

t5_labels = predicted_labels[:]
t5_labels[:10]

[0, 4, 2, 4, 4, 1, 4, 1, 0, 0]

## Delete Model Objects

In [15]:
del model, tokenizer
clean_memory()

# Deberta-V3-base

## Config

In [16]:
MAX_LENGTH = 512
deberta_labels = []
EVAL_BATCH_SIZE = 32
deberta_model_dir = "/kaggle/input/saved-models/ensemble/deberta-model"

## Preprocessing function

In [17]:
def clean_math_text_final(text):
    
    text = str(text)
    text = re.sub(r'^\s*\d+\.\s*', '', text)
    text = re.sub(r'https?://\S+|www\.\S+', ' ', text)
    text = re.sub(r'#\w+', ' ', text)
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"
                           u"\U0001F300-\U0001F5FF"
                           u"\U0001F680-\U0001F6FF"
                           u"\U0001F1E0-\U0001F1FF"
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    text = emoji_pattern.sub(r' ', text)
    text = re.sub(r'\s+', ' ', text).strip().lower()

    return text

## Load Model and Tokenizer

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device} to load deberta model")


tokenizer = AutoTokenizer.from_pretrained(deberta_model_dir)
print("Tokenizer loaded.")

model = AutoModelForSequenceClassification.from_pretrained(deberta_model_dir)
print("Model loaded.")
model.to(device)
print(f"Model moved to {device}.")

model.eval()

Using device: cuda to load deberta model
Tokenizer loaded.
Model loaded.
Model moved to cuda.


DebertaV2ForSequenceClassification(
  (deberta): DebertaV2Model(
    (embeddings): DebertaV2Embeddings(
      (word_embeddings): Embedding(128100, 768, padding_idx=0)
      (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): DebertaV2Encoder(
      (layer): ModuleList(
        (0-11): 12 x DebertaV2Layer(
          (attention): DebertaV2Attention(
            (self): DisentangledSelfAttention(
              (query_proj): Linear(in_features=768, out_features=768, bias=True)
              (key_proj): Linear(in_features=768, out_features=768, bias=True)
              (value_proj): Linear(in_features=768, out_features=768, bias=True)
              (pos_dropout): Dropout(p=0.1, inplace=False)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): DebertaV2SelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): Layer

## Setup Trainer

In [19]:
training_args = TrainingArguments(
    output_dir="./",
    push_to_hub=False,
    per_device_eval_batch_size=EVAL_BATCH_SIZE,
    report_to="none",
    fp16=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer
)

  trainer = Trainer(


## Make Predictions on Test Set

In [20]:
comp_test_df = test_df.copy()

print("Cleaning test data...")
comp_test_df['cleaned_question'] = comp_test_df['Question'].apply(clean_math_text_final)
print("Cleaning complete.")

predict_dataset = Dataset.from_pandas(comp_test_df[['cleaned_question']])
print("Test data converted to Dataset format.")
print(predict_dataset)

def tokenize_for_predict(examples):
    return tokenizer(examples["cleaned_question"],
                     padding="max_length",
                     truncation=True,
                     max_length=MAX_LENGTH)

print("\n--- Tokenizing Competition Test Set ---")
tokenized_predict_dataset = predict_dataset.map(tokenize_for_predict, batched=True)

tokenized_predict_dataset = tokenized_predict_dataset.remove_columns(["cleaned_question"])
tokenized_predict_dataset.set_format("torch")
print("Tokenization complete.")

print("\n--- Making Predictions ---")
predictions_output = trainer.predict(tokenized_predict_dataset)

logits = predictions_output.predictions

predicted_labels = np.argmax(logits, axis=-1)
print("Predictions generated.")

deberta_labels = [i for i in predicted_labels]
deberta_labels[:10]

Cleaning test data...
Cleaning complete.
Test data converted to Dataset format.
Dataset({
    features: ['cleaned_question'],
    num_rows: 3044
})

--- Tokenizing Competition Test Set ---


Map:   0%|          | 0/3044 [00:00<?, ? examples/s]

Tokenization complete.

--- Making Predictions ---


Predictions generated.


[0, 4, 2, 0, 4, 1, 4, 1, 0, 2]

# Ensemble Predictions from Three Models

In [21]:
ensemble_preds = []

# Hard Voting (use deberta label if all the labels are different)
for p1, p2, p3 in zip(llama_labels, t5_labels, deberta_labels):
    if p1 == p2 or p1 == p3:
        ensemble_preds.append(p1)
    elif p2 == p3:
        ensemble_preds.append(p2)
    else:
        ensemble_preds.append(p3)

result = pd.DataFrame({'id': test_df["id"], 'label': ensemble_preds})
result.to_csv('submission.csv', index=False)

In [22]:
result.head()

Unnamed: 0,id,label
0,0,0
1,1,4
2,2,2
3,3,0
4,4,4
