In [None]:
# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)
# !cd /content/drive/MyDrive/slm

Mounted at /content/drive


In [None]:
# lora.py
import math
import torch
import torch.nn as nn

class LoRALinear(nn.Module):
    def __init__(self, orig_linear: nn.Linear, r=8, alpha=16, merge_weights=False):
        super().__init__()
        self.in_features = orig_linear.in_features
        self.out_features = orig_linear.out_features
        self.bias = orig_linear.bias is not None

        self.linear = orig_linear
        for p in self.linear.parameters():
            p.requires_grad = False

        self.r = r
        self.alpha = alpha
        self.scaling = alpha / max(1, r)
        self.merge_weights = merge_weights

        # LoRA params
        if r > 0:

            self.lora_A = nn.Parameter(torch.zeros(r, self.in_features))
            self.lora_B = nn.Parameter(torch.zeros(self.out_features, r))
            nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
            nn.init.zeros_(self.lora_B)
        else:
            self.register_parameter("lora_A", None)
            self.register_parameter("lora_B", None)

    def forward(self, x):
        base = self.linear(x)
        if self.r > 0:
            # x: (..., in)
            # x @ A.T -> (..., r)
            # (..., r) @ B.T -> (..., out)
            lora_out = (x @ self.lora_A.t()) @ self.lora_B.t()
            return base + lora_out * self.scaling
        else:
            return base

    def lora_state_dict(self):
        if self.r > 0:
            return {"lora_A": self.lora_A.data.cpu(), "lora_B": self.lora_B.data.cpu()}
        else:
            return {}

def apply_lora_to_module(module, target_names=("qkv", "proj", "fc", "proj"), r=8, alpha=16, inject_in_mlp=False):
    
    for name, child in module.named_children():
        #If child itself contains children, recurse
        apply_lora_to_module(child, target_names, r, alpha, inject_in_mlp)

        if isinstance(child, nn.Linear):
            lname = name.lower()
            should_wrap = False
            if any(k in lname for k in ["qkv", "proj", "attn", "to_", "toq", "tok"]):
                should_wrap = True
            if inject_in_mlp and any(k in lname for k in ["fc", "mlp", "proj"]):
                should_wrap = True

            if should_wrap:
                wrapped = LoRALinear(child, r=r, alpha=alpha)
                setattr(module, name, wrapped)

def print_trainable_stats(model):
    total = sum(p.numel() for p in model.parameters())
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total params: {total/1e6:.3f}M, Trainable params: {trainable/1e6:.3f}M")


In [4]:
import os
import numpy as np
import tiktoken
from datasets import load_dataset
from tqdm import tqdm

OUT_DIR = "data-FT"
os.makedirs(OUT_DIR, exist_ok=True)

VAL_FRACTION = 0.05
MAX_SAMPLES = None
enc = tiktoken.get_encoding("gpt2")

def format_gsm8k(example):
    q = example["question"].strip()
    a = example["answer"].strip()

    text = (
        "### Question:\n"
        f"{q}\n\n"
        "### Answer:\n"
        f"{a}\n\n"
        "### Final Answer:\n"
        f"{a.split('####')[-1].strip()}"
    )
    return text


def tokenize_and_write(texts, filename):
    all_ids = []
    for t in tqdm(texts):
        ids = enc.encode(t)
        ids.append(enc.eot_token)
        all_ids.extend(ids)

    arr = np.array(all_ids, dtype=np.uint16)
    memmap = np.memmap(filename, dtype=np.uint16, mode="w+", shape=arr.shape)
    memmap[:] = arr[:]
    memmap.flush()


print("Loading GSM8K...")
dataset = load_dataset("gsm8k", "main")

train_texts = []
val_texts = []

for ex in dataset["train"]:
    train_texts.append(format_gsm8k(ex))

for ex in dataset["test"]:
    val_texts.append(format_gsm8k(ex))

if MAX_SAMPLES:
    train_texts = train_texts[:MAX_SAMPLES]

print("Writing GSM8K bins...")
tokenize_and_write(train_texts, os.path.join(OUT_DIR, "gsm8k_train.bin"))
tokenize_and_write(val_texts,   os.path.join(OUT_DIR, "gsm8k_val.bin"))

print("GSM8K done.")


Loading GSM8K...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

main/train-00000-of-00001.parquet:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

main/test-00000-of-00001.parquet:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

Writing GSM8K bins...


100%|██████████| 7473/7473 [00:00<00:00, 10667.13it/s]
100%|██████████| 1319/1319 [00:00<00:00, 7191.68it/s]


GSM8K done.


In [10]:
import math
from dataclasses import dataclass
import torch
import torch.nn as nn
import torch.nn.functional as F

class LayerNorm(nn.Module):
    def __init__(self, ndim, bias=True):
        super().__init__()
        self.weight = nn.Parameter(torch.ones(ndim)) #element wise scaling
        self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None

    def forward(self, x):
        return F.layer_norm(x, self.weight.shape, self.weight, self.bias, 1e-5)

# RoPE
def rotate_half(x):
    x1 = x[..., : x.shape[-1] // 2]
    x2 = x[..., x.shape[-1] // 2 :]
    return torch.cat((-x2, x1), dim=-1)

def apply_rope(q, k, cos, sin):
    q = (q * cos) + (rotate_half(q) * sin)
    k = (k * cos) + (rotate_half(k) * sin)
    return q, k

class RotaryEmbedding:
    def __init__(self, dim, max_seq_len):
        inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2).float() / dim))
        self.register(inv_freq, max_seq_len)

    def register(self, inv_freq, max_seq_len):
        t = torch.arange(max_seq_len)
        freqs = torch.einsum("i,j->ij", t, inv_freq)
        emb = torch.cat((freqs, freqs), dim=-1)
        self.cos = emb.cos()[None, None, :, :]
        self.sin = emb.sin()[None, None, :, :]

    def get(self, seq_len, device):
        return (
            self.cos[:, :, :seq_len, :].to(device),
            self.sin[:, :, :seq_len, :].to(device),
        )


class MultiHeadAttention(nn.Module):
    def __init__(self, config):
        super().__init__()
        assert config.n_embed % config.n_head == 0

        self.n_head = config.n_head
        self.head_dim = config.n_embed // config.n_head
        self.dropout = config.dropout

        self.qkv = nn.Linear(config.n_embed, 3 * config.n_embed, bias=config.bias)
        self.proj = nn.Linear(config.n_embed, config.n_embed, bias=config.bias)

        self.rope = RotaryEmbedding(self.head_dim, config.block_size)

    def forward(self, x):
        B, T, C = x.size()

        qkv = self.qkv(x)
        q, k, v = qkv.chunk(3, dim=-1)

        q = q.view(B, T, self.n_head, self.head_dim).transpose(1, 2)
        k = k.view(B, T, self.n_head, self.head_dim).transpose(1, 2)
        v = v.view(B, T, self.n_head, self.head_dim).transpose(1, 2)

        cos, sin = self.rope.get(T, x.device)
        q, k = apply_rope(q, k, cos, sin)

        # FlashAttention (automatic if supported)
        out = F.scaled_dot_product_attention(
            q, k, v,
            dropout_p=self.dropout if self.training else 0.0,
            is_causal=True
        )

        out = out.transpose(1, 2).contiguous().view(B, T, C)
        return self.proj(out)


class MLP(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.fc = nn.Linear(config.n_embed, 4 * config.n_embed, bias=config.bias)
        self.proj = nn.Linear(4 * config.n_embed, config.n_embed, bias=config.bias)
        self.dropout = nn.Dropout(config.dropout)

    def forward(self, x):
        x = self.fc(x)
        x = F.gelu(x)
        x = self.proj(x)
        return self.dropout(x)

class Block(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.ln1 = LayerNorm(config.n_embed, config.bias)
        self.attn = MultiHeadAttention(config)
        self.ln2 = LayerNorm(config.n_embed, config.bias)
        self.mlp = MLP(config)

    def forward(self, x):
        x = x + self.attn(self.ln1(x))
        x = x + self.mlp(self.ln2(x))
        return x

@dataclass
class SLMconfig:
    vocab_size: int = 50257
    block_size: int = 64
    n_layer: int = 4
    n_head: int = 6
    n_embed: int = 384
    dropout: float = 0.1
    bias: bool = False

class SLM(nn.Module):
    def __init__(self, config: SLMconfig):
        super().__init__()
        self.config = config

        self.token_emb = nn.Embedding(config.vocab_size, config.n_embed)
        self.drop = nn.Dropout(config.dropout)

        self.blocks = nn.ModuleList([Block(config) for _ in range(config.n_layer)])
        self.ln_f = LayerNorm(config.n_embed, config.bias)

        self.lm_head = nn.Linear(config.n_embed, config.vocab_size, bias=False)
        self.token_emb.weight = self.lm_head.weight  # weight tying

        self.apply(self._init_weights)

        print(f"Number of parameters: {self.num_params()/1e6:.2f}M")

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def num_params(self):
        return sum(p.numel() for p in self.parameters())

    def forward(self, idx, targets=None):
        B, T = idx.shape
        assert T <= self.config.block_size

        x = self.token_emb(idx)
        x = self.drop(x)

        for block in self.blocks:
            x = block(x)

        x = self.ln_f(x)
        logits = self.lm_head(x)

        loss = None
        if targets is not None:
            loss = F.cross_entropy(
                logits.view(-1, logits.size(-1)),
                targets.view(-1),
                ignore_index=-1
            )

        return logits, loss

    @torch.no_grad()
    def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None):
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -self.config.block_size :]
            logits, _ = self(idx_cond)
            logits = logits[:, -1, :] / temperature

            if top_k is not None:
                v, _ = torch.topk(logits, top_k)
                logits[logits < v[:, [-1]]] = -float("Inf")

            probs = F.softmax(logits, dim=-1)
            next_idx = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx, next_idx), dim=1)

        return idx


In [None]:
# finetune_lora.py
import os, math, time
import numpy as np
from tqdm import tqdm
from pathlib import Path

import torch
import torch.nn as nn
from torch.optim import AdamW


# =========================
# DEVICE
# =========================
device = "cuda" if torch.cuda.is_available() else "cpu"

# =========================
# PATHS
# =========================
from google.colab import drive
drive.mount("/content/drive")

PRETRAIN_DIR = "/content/drive/MyDrive/slm_checkpoints/step_18000"
OUT_DIR = "/content/drive/MyDrive/slm_lora_ckpts"
os.makedirs(OUT_DIR, exist_ok=True)

# =========================
# DATA
# =========================
data_dir = "data-FT"
train_bin = os.path.join(data_dir, "gsm8k_train.bin")
val_bin   = os.path.join(data_dir, "gsm8k_val.bin")

# =========================
# TRAIN PARAMS
# =========================
block_size = 256
batch_size = 4
grad_accum = 8
max_iters = 10000
eval_interval = 250

# LoRA
lora_r = 8
lora_alpha = 16
inject_mlp = False

lr = 1e-4
weight_decay = 0.0
warmup_frac = 0.05

# =========================
# MODEL
# =========================
config = SLMconfig(
    vocab_size=50257,
    block_size=block_size,
    n_layer=4,
    n_head=6,
    n_embed=384,
    dropout=0.1
)

model = SLM(config)

base_weights = torch.load(
    os.path.join(PRETRAIN_DIR, "model.pt"),
    map_location="cpu"
)
model.load_state_dict(base_weights)

for p in model.parameters():
    p.requires_grad = False

apply_lora_to_module(
    model,
    r=lora_r,
    alpha=lora_alpha,
    inject_in_mlp=inject_mlp
)

model.to(device)
print_trainable_stats(model)

def load_bin(path):
    return np.memmap(path, dtype=np.uint16, mode="r")

train_data = load_bin(train_bin)
val_data   = load_bin(val_bin)

def get_batch(data, batch_size, block_size):
    ix = np.random.randint(0, len(data) - block_size - 1, size=(batch_size,))
    x = np.stack([data[i:i+block_size].astype(np.int64) for i in ix])
    y = np.stack([data[i+1:i+1+block_size].astype(np.int64) for i in ix])
    return torch.from_numpy(x), torch.from_numpy(y)

optimizer = AdamW(
    [p for p in model.parameters() if p.requires_grad],
    lr=lr,
    weight_decay=weight_decay
)

warmup_steps = int(max_iters * warmup_frac)

def get_lr(step):
    if step < warmup_steps:
        return lr * step / max(1, warmup_steps)
    progress = (step - warmup_steps) / max(1, max_iters - warmup_steps)
    cosine = 0.5 * (1 + math.cos(math.pi * progress))
    return lr * 0.1 + cosine * (lr - lr * 0.1)


def atomic_save(obj, path):
    tmp = path.with_suffix(".tmp")
    torch.save(obj, tmp, _use_new_zipfile_serialization=False)
    os.replace(tmp, path)

model.train()
for step in range(max_iters):

    optimizer.zero_grad(set_to_none=True)
    cur_lr = get_lr(step)
    for g in optimizer.param_groups:
        g["lr"] = cur_lr

    for _ in range(grad_accum):
        xb, yb = get_batch(train_data, batch_size, block_size)
        xb = xb.to(device)
        yb = yb.to(device)
        _, loss = model(xb, yb)
        (loss / grad_accum).backward()

    torch.nn.utils.clip_grad_norm_(
        [p for p in model.parameters() if p.requires_grad], 1.0
    )
    optimizer.step()

    if step % eval_interval == 0:
        model.eval()
        with torch.no_grad():
            losses = []
            for _ in range(100):
                xb, yb = get_batch(val_data, batch_size, block_size)
                xb = xb.to(device); yb = yb.to(device)
                _, vloss = model(xb, yb)
                losses.append(vloss.item())
        model.train()

        print(f"[step {step}] lr={cur_lr:.2e} val_loss={sum(losses)/len(losses):.4f}")

        # ---- save LoRA only ----
        lora_state = {
            k: v.cpu()
            for k, v in model.state_dict().items()
            if "lora_" in k
        }

        atomic_save(
            {
                "lora_state": lora_state,
                "config": config,
                "base_ckpt": PRETRAIN_DIR
            },
            Path(OUT_DIR) / f"lora_step_{step}.pt"
        )

print("LoRA fine-tuning complete.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Number of parameters: 26.38M
Total params: 26.515M, Trainable params: 0.135M
[step 0] lr=0.00e+00 val_loss=4.8192
[step 250] lr=5.00e-05 val_loss=3.7307
[step 500] lr=1.00e-04 val_loss=2.9325
[step 750] lr=9.98e-05 val_loss=2.7255
[step 1000] lr=9.94e-05 val_loss=2.6121
[step 1250] lr=9.86e-05 val_loss=2.5546
[step 1500] lr=9.76e-05 val_loss=2.4796
[step 1750] lr=9.62e-05 val_loss=2.4491
[step 2000] lr=9.46e-05 val_loss=2.4068
[step 2250] lr=9.27e-05 val_loss=2.4256
[step 2500] lr=9.05e-05 val_loss=2.4127
[step 2750] lr=8.81e-05 val_loss=2.4007
[step 3000] lr=8.55e-05 val_loss=2.4044
[step 3250] lr=8.26e-05 val_loss=2.3669
[step 3500] lr=7.96e-05 val_loss=2.3871
[step 3750] lr=7.64e-05 val_loss=2.3717
[step 4000] lr=7.31e-05 val_loss=2.3471
[step 4250] lr=6.96e-05 val_loss=2.3232


In [None]:
import torch
import tiktoken

device = "cuda" if torch.cuda.is_available() else "cpu"

config = SLMconfig(
    vocab_size=50257,
    block_size=256,
    n_layer=4,
    n_head=6,
    n_embed=384,
    dropout=0.0  # IMPORTANT: no dropout at inference
)

model = SLM(config)

base_ckpt = torch.load(
    "/content/drive/MyDrive/slm_checkpoints/step_18000/model.pt",
    map_location="cpu"
)
model.load_state_dict(base_ckpt)

apply_lora_to_module(
    model,
    r=8,
    alpha=16,
    inject_in_mlp=False
)


lora_ckpt = torch.load(
    "/content/drive/MyDrive/slm_lora_ckpts/lora_step_4250.pt",
    map_location="cpu",
    weights_only=False
)

model.load_state_dict(lora_ckpt["lora_state"], strict=False)

model.to(device)
model.eval()
enc = tiktoken.get_encoding("gpt2")
@torch.no_grad()
def generate(
    model,
    prompt,
    max_new_tokens=200,
    temperature=0.8,
    top_k=50
):
    model.eval()

    # encode
    idx = torch.tensor(
        enc.encode(prompt),
        dtype=torch.long,
        device=device
    )[None, :]  # (1, T)

    for _ in range(max_new_tokens):
        # crop context if needed
        idx_cond = idx[:, -config.block_size:]

        logits, _ = model(idx_cond)
        logits = logits[:, -1, :] / temperature

        if top_k is not None:
            v, _ = torch.topk(logits, top_k)
            logits[logits < v[:, [-1]]] = -float("inf")

        probs = torch.softmax(logits, dim=-1)
        next_token = torch.multinomial(probs, num_samples=1)

        idx = torch.cat([idx, next_token], dim=1)

    return enc.decode(idx[0].tolist())
prompt = """### Question:
If a car travels 60 km in 2 hours, what is its average speed?

### Answer:
"""

print(generate(model, prompt, max_new_tokens=150))


Number of parameters: 26.38M
### Question:
If a car travels 60 km in 2 hours, what is its average speed?

### Answer:
For the total speed of the car you drive a car on the highway, the distance of a car is 25 * 2 = <<25*2=10>>10 years old.
For the total speed of the car, the speed of the car is 10 * 2 = <<10*2=10>>10 years old.
For the total speed of the car, the distance of the car is 10 + 10 = <<10+10=15>>15 years old.
#### 10

### Final Answer:
15<|endoftext|>### Question:
After the release of a pencil, the second pencil was introduced a year later, and when it was published, the first pencil was introduced. The first pencil was introduced the second pencil as a


In [12]:
import torch
@torch.no_grad()
def generate(
    model,
    tokenizer,
    prompt,
    max_new_tokens=128,
    temperature=1.0,
):
    model.eval()

    device = next(model.parameters()).device
    tokens = tokenizer.encode(prompt)
    x = torch.tensor(tokens, dtype=torch.long, device=device)[None, :]

    for _ in range(max_new_tokens):
        logits, _ = model(x)

        logits = logits[:, -1, :] / temperature
        probs = torch.softmax(logits, dim=-1)

        next_token = torch.multinomial(probs, num_samples=1)
        x = torch.cat([x, next_token], dim=1)

    return tokenizer.decode(x[0].tolist())


In [13]:

import re


class SLMAgent:
    """
    Simple agentic loop:
    1. Generate reasoning
    2. Critique output
    3. Retry with feedback
    """

    def __init__(
        self,
        model,
        tokenizer,
        max_steps=3,
        temperature=0.8,
    ):
        self.model = model
        self.tokenizer = tokenizer
        self.max_steps = max_steps
        self.temperature = temperature

    def run(self, question):
        history = []

        prompt = self._base_prompt(question)

        for step in range(self.max_steps):
            output = generate(
                self.model,
                self.tokenizer,
                prompt,
                temperature=self.temperature,
            )

            history.append(output)

            if self._is_good_answer(output):
                return {
                    "final": output,
                    "steps": history,
                }

            critique = self._critique(output)

            prompt = (
                f"{prompt}\n\n"
                f"Previous attempt:\n{output}\n\n"
                f"Critique:\n{critique}\n\n"
                f"Fix the mistakes and answer again carefully."
            )

        return {
            "final": history[-1],
            "steps": history,
        }

    def _base_prompt(self, question):
        return (
            "Solve the following problem step by step.\n"
            "Explain your reasoning clearly.\n\n"
            f"Question: {question}\n"
            "Answer:"
        )

    def _critique(self, text):
        """
        Lightweight self-critique (no external model).
        """
        if not self._contains_number(text):
            return "No final numeric answer was provided."

        if "?" in text:
            return "The answer appears uncertain."

        return "The reasoning may contain arithmetic or logical errors."

    def _is_good_answer(self, text):
        return self._contains_number(text) and "therefore" in text.lower()

    def _contains_number(self, text):
        return bool(re.search(r"-?\d+", text))


In [14]:


class MathAgent(SLMAgent):
    def _critique(self, text):
        numbers = re.findall(r"-?\d+", text)

        if len(numbers) == 0:
            return "No numerical reasoning found."

        if "=" not in text:
            return "The solution does not show explicit calculations."

        return "Check the arithmetic carefully and recompute."

    def _is_good_answer(self, text):
        return "=" in text and text.strip().endswith(numbers_last(text))


def numbers_last(text):
    nums = re.findall(r"-?\d+", text)
    return nums[-1] if nums else ""


In [18]:
import tiktoken


class GPT2Tokenizer:
    def __init__(self):
        self.enc = tiktoken.get_encoding("gpt2")
        self.vocab_size = self.enc.n_vocab  # 50257

    def encode(self, text):
        # Allow GPT-2 special tokens like <|endoftext|>
        return self.enc.encode(
            text,
            allowed_special={"<|endoftext|>"}
        )

    def decode(self, tokens):
        return self.enc.decode(tokens)



In [None]:
import torch


@torch.no_grad()
def generate(
    model,
    tokenizer,
    prompt,
    max_new_tokens=128,
    temperature=1.0,
):
    model.eval()
    device = next(model.parameters()).device

    tokens = tokenizer.encode(prompt)

    block_size = model.block_size if hasattr(model, "block_size") else model.config.block_size
    tokens = tokens[-block_size:]

    x = torch.tensor(tokens, dtype=torch.long, device=device)[None, :]

    for _ in range(max_new_tokens):
        x = x[:, -block_size:]

        logits, _ = model(x)

        logits = logits[:, -1, :] / temperature
        probs = torch.softmax(logits, dim=-1)

        next_token = torch.multinomial(probs, num_samples=1)
        x = torch.cat([x, next_token], dim=1)

    return tokenizer.decode(x[0].tolist())


In [21]:
# from math_agent import MathAgent

tokenizer = GPT2Tokenizer()
agent = MathAgent(model, tokenizer)

q = "If a train travels 60 km in 1.5 hours, what is its average speed?"
result = agent.run(q)

print(result["final"])


 so he will have to be able to get a total of 4**25=<<4**25=125>>125 tons
####125

### Final Answer:
125<|endoftext|>### Question:
Roberto had a fifth portion of the corn. He cut a quarter of the corn, and a half of the corn that drilled the corn back in the first half of the corn back in the second half of the corn. How many corn did he just cut all the corn bill?

### Answer:
He

Critique:
Check the arithmetic carefully and recompute.

Fix the mistakes and answer again carefully.
See Neveld:
The same fraction of the corn is all the other corn, so he cut 10*7 = <<10*7=60>>60 corn.
Therefore, he cut the deficit by 60/6 = <<60/6=40>>40 o.
#### 45

### Final Answer:
40<|endoftext|>### Question:
John has to take his cake.  He sold 5 cakes that he should have saved in total.  He sold $1 each and $1 each.  He cost his cocoa $2 each.  He bought 4 cakes that they wanted to lose.  He bought
