In [None]:
# STEP 1 — Environment Setup
!pip install -q transformers accelerate peft bitsandbytes datasets

import torch

# Verify GPU
print("GPU available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25hGPU available: True
GPU name: Tesla T4


In [None]:
# STEP 2 — Load the Dataset
from datasets import load_dataset

dataset = load_dataset("ad6398/nyu-dl-teach-maths-comp")
train_dataset = dataset["train"]
test_dataset = dataset["test"]

# Display the first example to inspect the structure
print(train_dataset[0])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00002.parquet:   0%|          | 0.00/195M [00:00<?, ?B/s]

data/train-00001-of-00002.parquet:   0%|          | 0.00/195M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/3.65M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]

{'question': 'What is the radius of the circle inscribed in triangle $ABC$ if $AB = 22, AC=12,$ and $BC=14$? Express your answer in simplest radical form.', 'is_correct': True, 'answer': '3.16227766016838', 'solution': "The circle is inscribed in a triangle, and we know the sides of the triangle.\nTo use the inradius formula, we need to know the area of the triangle.\nWe can use Heron's formula to calculate the area.\n<llm-code>\nimport math\nfrom sympy import *\n\nAB, AC, BC = 22, 12, 14\n\n# Calculate the semiperimeter and area using Heron's formula\ns = (AB + AC + BC) / 2\nK = sqrt(s * (s - AB) * (s - AC) * (s - BC))\n\nprint(K)\n</llm-code>\n<llm-code-output>\n75.8946638440411\n</llm-code-output>\nLet's now use the formula for the radius of the inscribed circle.\n<llm-code>\nr = K / s\nprint(r)\n</llm-code>\n<llm-code-output>\n3.16227766016838\n</llm-code-output>\nThe answer is \\boxed{3.16227766016838}"}


In [None]:
# STEP 3 — Prepare Prompts

def make_prompt(example):
    return f"""Question: {example['question']}

Proposed Answer: {example['answer']}

Solution: {example['solution']}

Decide whether the proposed answer is correct. Respond only with True or False."""

# Apply to train and test sets
train_dataset = train_dataset.map(lambda x: {"prompt": make_prompt(x)})
test_dataset = test_dataset.map(lambda x: {"prompt": make_prompt(x)})

# Display one example prompt
print(train_dataset[0]["prompt"])


Map:   0%|          | 0/1000000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Question: What is the radius of the circle inscribed in triangle $ABC$ if $AB = 22, AC=12,$ and $BC=14$? Express your answer in simplest radical form.

Proposed Answer: 3.16227766016838

Solution: The circle is inscribed in a triangle, and we know the sides of the triangle.
To use the inradius formula, we need to know the area of the triangle.
We can use Heron's formula to calculate the area.
<llm-code>
import math
from sympy import *

AB, AC, BC = 22, 12, 14

# Calculate the semiperimeter and area using Heron's formula
s = (AB + AC + BC) / 2
K = sqrt(s * (s - AB) * (s - AC) * (s - BC))

print(K)
</llm-code>
<llm-code-output>
75.8946638440411
</llm-code-output>
Let's now use the formula for the radius of the inscribed circle.
<llm-code>
r = K / s
print(r)
</llm-code>
<llm-code-output>
3.16227766016838
</llm-code-output>
The answer is \boxed{3.16227766016838}

Decide whether the proposed answer is correct. Respond only with True or False.


In [None]:
# STEP 4 — Tokenization

from transformers import AutoTokenizer

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # ensures proper padding

# quick test: tokenize one example
encoding = tokenizer(train_dataset[0]["prompt"], truncation=True, padding="max_length", max_length=512)
print(f"Number of tokens: {len(encoding['input_ids'])}")
print("Decoded text preview:\n", tokenizer.decode(encoding['input_ids'][:200]))


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Number of tokens: 512
Decoded text preview:
 <s> Question: What is the radius of the circle inscribed in triangle $ABC$ if $AB = 22, AC=12,$ and $BC=14$? Express your answer in simplest radical form.

Proposed Answer: 3.16227766016838

Solution: The circle is inscribed in a triangle, and we know the sides of the triangle.
To use the inradius formula, we need to know the area of the triangle.
We can use Heron's formula to calculate the area.
<llm-code>
import math
from sympy import *

AB, AC, BC = 22, 12, 14

# Calculate the semiperimeter and area using Heron's formula
s = (AB + AC + BC) / 2
K = sqrt(s * (s - AB


In [None]:
# STEP 5 — Load the Base Model

from transformers import AutoModelForSequenceClassification

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=2,            # binary classification: True/False
    load_in_8bit=True,       # use 8-bit quantization to save GPU memory
    device_map="auto"        # automatically use the GPU
)

print("Model loaded successfully ✅")


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at TinyLlama/TinyLlama-1.1B-Chat-v1.0 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded successfully ✅


In [None]:
# STEP 6 — Apply LoRA Fine-Tuning Setup
from peft import LoraConfig, get_peft_model

# Configuration for LoRA adapters
config = LoraConfig(
    r=8,                  # rank dimension
    lora_alpha=32,        # scaling factor
    target_modules=["q_proj", "v_proj"],  # attention layers to adapt
    lora_dropout=0.1,
    bias="none",
    task_type="SEQ_CLS"   # sequence-classification task
)

model = get_peft_model(model, config)
model.print_trainable_parameters()


trainable params: 1,130,496 || all params: 1,035,646,976 || trainable%: 0.1092


In [None]:
# STEP 7.1 — Faster Training with Subset
from transformers import TrainingArguments, Trainer

# Reduce dataset size for quick fine-tuning
small_train = train_dataset.select(range(10000))
small_train = small_train.train_test_split(test_size=0.1)

def preprocess(example):
    tokens = tokenizer(
        example["prompt"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokens["labels"] = int(example["is_correct"])
    return tokens

tokenized = small_train.map(preprocess, remove_columns=small_train["train"].column_names)

args = TrainingArguments(
    output_dir="outputs",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    logging_steps=100,
    eval_strategy="epoch",
    save_strategy="no",
    report_to="none"
)

if model.config.pad_token_id is None:
    model.config.pad_token_id = tokenizer.pad_token_id
    print(" Padding token ID set to:", model.config.pad_token_id)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"]
)

trainer.train()



Map:   0%|          | 0/9000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,0.4966,0.554199


TrainOutput(global_step=2250, training_loss=0.5668978441026475, metrics={'train_runtime': 1831.6837, 'train_samples_per_second': 4.914, 'train_steps_per_second': 1.228, 'total_flos': 2.6821628264448e+16, 'train_loss': 0.5668978441026475, 'epoch': 1.0})

In [None]:
# STEP 8 — Generate Predictions for Kaggle Submission
import torch
import pandas as pd
from tqdm import tqdm

# Make sure we use the same prompt format for the test set
def make_prompt(example):
    return f"""Question: {example['question']}

Proposed Answer: {example['answer']}

Solution: {example['solution']}

Decide whether the proposed answer is correct. Respond only with True or False."""

test_dataset = test_dataset.map(lambda x: {"prompt": make_prompt(x)})

# Tokenize test prompts
def predict(batch):
    inputs = tokenizer(
        batch["prompt"],
        truncation=True,
        padding="max_length",
        max_length=512,
        return_tensors="pt"
    ).to(model.device)

    with torch.no_grad():
        logits = model(**inputs).logits
        preds = torch.argmax(logits, dim=-1).cpu().numpy()

    return preds

# Run inference on all test samples
preds = []
for i in tqdm(range(len(test_dataset))):
    pred = predict(test_dataset[i:i+1])[0]
    preds.append("True" if pred == 1 else "False")

# Build submission DataFrame
submission = pd.DataFrame({
    "ID": range(len(test_dataset)),
    "is_correct": preds
})

# Save for Kaggle upload
submission.to_csv("submission.csv", index=False)
print("✅ submission.csv created successfully!")
submission.head()


Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

100%|██████████| 10000/10000 [27:39<00:00,  6.03it/s]


✅ submission.csv created successfully!


Unnamed: 0,ID,is_correct
0,0,True
1,1,False
2,2,True
3,3,True
4,4,False
