In [2]:
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.26" "trl<0.9.0" "peft<0.12.0" "accelerate<0.32.0" "bitsandbytes<0.44.0" "transformers<4.43.0"

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-ojocfe21/unsloth_5adb9ba3bef040e4a04355130a48f5db
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-ojocfe21/unsloth_5adb9ba3bef040e4a04355130a48f5db
  Resolved https://github.com/unslothai/unsloth.git to commit d707bd43b4e883b521761d525be2fae428fe5980
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2025.10.13 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2025.10.13-py3-none-any.whl.metadata (32 kB)
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.

In [3]:
from unsloth import FastLanguageModel
import torch, re, os, random
from datasets import load_dataset
import pandas as pd

SEED = 42
random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)

max_seq_length = 1024
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

EOS_TOKEN = tokenizer.eos_token
print("EOS:", EOS_TOKEN)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.




🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.10.12: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

EOS: <|end_of_text|>


In [37]:
ds = load_dataset("ad6398/nyu-dl-teach-maths-comp")
train_all = ds["train"].shuffle(seed=SEED)

TRAIN_EXAMPLES = 100_000
VAL_EXAMPLES   = 5_000

train_ds = train_all.select(range(min(TRAIN_EXAMPLES, len(train_all))))
val_ds   = train_all.select(range(TRAIN_EXAMPLES, min(TRAIN_EXAMPLES + VAL_EXAMPLES, len(train_all))))
test_ds  = ds["test"]

def clean(text: str) -> str:
    if text is None: return ""
    text = re.sub(r"```.*?```", "", text, flags=re.S)
    text = re.sub(r"[ \t]{2,}", " ", text)
    return text.strip()


train_tmpl = """You are a great mathematician and you are tasked with verifying if a provided solution to a maths question is correct.
Question:
{q}

Given Answer:
{a}

Solution:
{s}

Output:
{y}"""


infer_tmpl = """You are a great mathematician and you are tasked with verifying if a provided solution to a maths question is correct.
Question:
{q}

Given Answer:
{a}

Solution:
{s}

Output:
"""

def format_train(batch):
    qs, ans, sols, ys = batch["question"], batch["answer"], batch["solution"], batch["is_correct"]
    texts = []
    for q, a, s, y in zip(qs, ans, sols, ys):
        text = train_tmpl.format(q=clean(q), a=clean(str(a)), s=clean(s), y="True" if bool(y) else "False") + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

def format_infer(batch):
    qs, ans, sols = batch["question"], batch["answer"], batch["solution"]
    texts = []
    for q, a, s in zip(qs, ans, sols):
        texts.append(infer_tmpl.format(q=clean(q), a=clean(str(a)), s=clean(s)))
    return {"text": texts}

train_formatted = train_ds.map(format_train, batched=True, remove_columns=train_ds.column_names)
val_prompts     = val_ds.map(format_infer, batched=True, remove_columns=val_ds.column_names)["text"]
test_prompts    = test_ds.map(format_infer, batched=True, remove_columns=test_ds.column_names)["text"]

len(train_formatted), len(val_prompts), len(test_prompts)

(100000, 5000, 10000)

In [38]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
    lora_alpha = 32,
    lora_dropout = 0.05,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = SEED,
)

from trl import SFTTrainer
from transformers import TrainingArguments

args = TrainingArguments(
    per_device_train_batch_size = 2,
    gradient_accumulation_steps = 8,
    learning_rate = 2e-4,
    weight_decay = 0.01,
    warmup_ratio = 0.03,
    max_steps = 400,
    logging_steps = 20,
    lr_scheduler_type = "linear",
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    optim = "adamw_8bit",
    output_dir = "outputs",
    seed = SEED,
    report_to = "none",
    save_strategy = "no",
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_formatted,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    args = args,
    packing = True,
)

Unsloth: Already have LoRA adapters! We shall skip this step.


Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/100000 [00:00<?, ? examples/s]

In [13]:
trainer.train()
trainer.save_model("outputs")
tokenizer.save_pretrained("outputs")

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 100,000 | Num Epochs = 1 | Total steps = 400
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 8 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040 of 8,072,204,288 (0.52% trained)


Step,Training Loss
20,0.9135
40,0.7936
60,0.7706
80,0.7393
100,0.7686
120,0.7474
140,0.7556
160,0.731
180,0.7466
200,0.7499


('outputs/tokenizer_config.json',
 'outputs/special_tokens_map.json',
 'outputs/tokenizer.json')

In [14]:
trainer.save_model("outputs")
tokenizer.save_pretrained("outputs")

('outputs/tokenizer_config.json',
 'outputs/special_tokens_map.json',
 'outputs/tokenizer.json')

In [35]:
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="outputs", max_seq_length=max_seq_length, dtype=None, load_in_4bit=True
)

==((====))==  Unsloth 2025.10.12: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [41]:
FastLanguageModel.for_inference(model)

idx = 237
example   = val_ds[idx]
question  = example["question"]
answer    = str(example["answer"])
solution  = example["solution"]

prompt = infer_tmpl.format(
    q = clean(question),
    a = clean(answer),
    s = clean(solution),
)

inputs = tokenizer([prompt], return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens = 6,
    do_sample      = False,
    temperature    = 0.0,
    use_cache      = True,
    eos_token_id   = tokenizer.eos_token_id,
    pad_token_id   = tokenizer.pad_token_id,
)

response_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
pred_bool     = parse_bool(response_text)

print("#### QUESTION ####")
print(question)
print("\n#### GIVEN ANSWER ####")
print(answer)
print("\n#### SOLUTION ####")
print(solution)
print("\n#### MODEL'S PREDICTION ####")
print("Output:", "True" if pred_bool else "False")
print("\n#### CORRECT ANSWER ####")
print(example["is_correct"])


#### QUESTION ####
Two circles are drawn in a 12-inch by 14-inch rectangle. Each circle has a diameter of 6 inches. If the circles do not extend beyond the rectangular region, what is the greatest possible distance (in inches) between the centers of the two circles?

#### GIVEN ANSWER ####
\sqrt{61}

#### SOLUTION ####
First, let's draw a diagram to illustrate the problem:

<figure>
<img width="450" src="https://file.eduboard.com/users/TempUser/files/diagram.png" alt="Diagram of the problem"/>
<figcaption>
Diagram of the problem.
</figcaption>
</figure>

The shaded rectangle has dimensions $N$ and $O$. We can calculate them using sympy:
<llm-code>
from sympy import Symbol, sqrt

# Define the dimensions of the rectangle
l = 12
w = 14

# Calculate the diagonal of the shaded rectangle
a = l - 6
b = w - 6
d = sqrt(a**2 + b**2)

# Calculate N and O
N = d / 2
O = sqrt(N**2 + a**2)

# Print the values
print(f"N = {N}")
print(f"O = {O}")
</llm-code>
<llm-code-output>
N = 5
O = sqrt(61)
</llm-c

In [42]:
import pandas as pd
from tqdm import tqdm
from unsloth import FastLanguageModel
import torch

FastLanguageModel.for_inference(model)

tokenizer.padding_side = "left"
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id

predictions = []

@torch.inference_mode()
def generate_one(p: str, max_new_tokens: int = 6) -> bool:
    inputs = tokenizer([p], return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=False,
        temperature=0.0,
        use_cache=True,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
    )
    text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    return parse_bool(text)


if "test_prompts" in globals() and isinstance(test_prompts, list) and len(test_prompts) == len(test_ds):
    iterator = zip(test_prompts, range(len(test_prompts)))
else:
    def clean_text(x):
        return clean(str(x)) if 'clean' in globals() else str(x)
    built_prompts = []
    for ex in test_ds:
        built_prompts.append(
            infer_tmpl.format(
                q = clean_text(ex["question"]),
                a = clean_text(ex["answer"]),
                s = clean_text(ex["solution"]),
            )
        )
    iterator = zip(built_prompts, range(len(built_prompts)))

for prompt, _ in tqdm(iterator, total=len(test_ds)):
    pred = generate_one(prompt, max_new_tokens=6)
    predictions.append(pred)

submission = pd.DataFrame({
    "ID": range(len(predictions)),
    "is_correct": predictions,
})
submission.to_csv("submission.csv", index=False)
print("Saved: submission.csv  | rows =", len(submission))

 13%|█▎        | 1274/10000 [11:38<1:24:25,  1.72it/s]Unsloth: Input IDs of shape torch.Size([1, 1176]) with length 1176 > the model's max sequence length of 1024.
We shall truncate it ourselves. It's imperative if you correct this issue first.
 23%|██▎       | 2329/10000 [21:31<1:17:49,  1.64it/s]Unsloth: Input IDs of shape torch.Size([1, 1191]) with length 1191 > the model's max sequence length of 1024.
We shall truncate it ourselves. It's imperative if you correct this issue first.
 30%|███       | 3050/10000 [28:14<1:00:45,  1.91it/s]Unsloth: Input IDs of shape torch.Size([1, 1233]) with length 1233 > the model's max sequence length of 1024.
We shall truncate it ourselves. It's imperative if you correct this issue first.
 33%|███▎      | 3251/10000 [30:08<1:00:50,  1.85it/s]Unsloth: Input IDs of shape torch.Size([1, 1029]) with length 1029 > the model's max sequence length of 1024.
We shall truncate it ourselves. It's imperative if you correct this issue first.
 33%|███▎      | 332

Saved: submission.csv  | rows = 10000



