### Step 1: Install necesscary packages

In [None]:
%pip install matplotlib
%pip install torch numpy transformers datasets tiktoken wandb tqdm



### Specify the path to your `model` module
Please replace the placeholder path below with the actual path to the directory containing your `model.py` file.

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=true)

Mounted at /content/drive


In [2]:

MODEL_DIR = "/content/drive/MyDrive/SC3000" #@param {type:"string"}

import sys
import os
if MODEL_DIR not in sys.path:
    sys.path.append(MODEL_DIR)

print(f"Added {MODEL_DIR} to sys.path")

Added /content/drive/MyDrive/SC3000 to sys.path


### Step 2: Package imports and configuration

In [3]:
import sys
import os
# The following line is commented out as it might be redundant if MODEL_DIR is set correctly
# sys.path.append(os.path.abspath(".."))
# Setting CUDA_VISIBLE_DEVICES is typically not needed in Colab
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch
import torch.nn as nn
import torch.nn.functional as F
import random
import pickle
from model import GPT, GPTConfig
import random
from tqdm import tqdm
import time
import json
import matplotlib.pyplot as plt
# Configuration
beta = 0.5
# Use 'cuda' if available, otherwise 'mps' if available, otherwise 'cpu'
device = 'cuda' if torch.cuda.is_available() else ('mps' if torch.backends.mps.is_available() else 'cpu')
base_lr = 1e-4
epochs = 5
batch_size = 64
max_length = 64
num_samples = 1
max_new_tokens = 200
temperature = 0.8
top_k = 200
print(device)
# tokenizer
tokenizer_path = os.path.join("/content/drive/MyDrive/SC3000/sft", "meta.pkl")
with open(tokenizer_path, "rb") as f:
    meta = pickle.load(f)
stoi, itos = meta["stoi"], meta["itos"]
def encode(s): return [stoi[c] for c in s]
def decode(l): return ''.join([itos[i] for i in l])

cuda


### Step 3: Define helper functions

In [4]:
def compute_logprob(input_ids):
    inputs = input_ids[:, :-1]
    targets = input_ids[:, 1:]
    logits, _ = gpt(inputs, full_seq=True)
    B, T, V = logits.size()
    logits_flat = logits.reshape(-1, V)
    targets_flat = targets.reshape(-1)
    loss = F.cross_entropy(logits_flat, targets_flat, ignore_index=0, reduction='none')
    loss = loss.reshape(B, T)
    attention_mask = (targets != 0).float()
    loss = (loss * attention_mask).sum(dim=1) / attention_mask.sum(dim=1)
    return -loss

def pad_or_truncate(seq, max_length):
    return seq[-max_length:] if len(seq) > max_length else seq + [0] * (max_length - len(seq))

def get_batches(lines, batch_size):
    random.shuffle(lines)
    #for l in lines:
    #    print(l[1])
    for i in range(0, len(lines), batch_size):
        batch = lines[i:i+batch_size]
        if len(batch) < batch_size:
            continue
        neg_inputs = [pad_or_truncate(encode(p['negative'] + '\n\n\n\n'), max_length) for p in batch]
        pos_inputs = [pad_or_truncate(encode(p['positive'] + '\n\n\n\n'), max_length) for p in batch]
        neg_tensor = torch.tensor(neg_inputs, dtype=torch.long, device=device)
        pos_tensor = torch.tensor(pos_inputs, dtype=torch.long, device=device)
        yield neg_tensor, pos_tensor

### Step 4: Load the pretrained NanoGPT model

In [5]:
ckpt = torch.load("/content/drive/MyDrive/SC3000/sft/gpt.pt", map_location=device)
gptconf = GPTConfig(**ckpt['model_args'])
gpt = GPT(gptconf)
state_dict = ckpt['model']
unwanted_prefix = '_orig_mod.'
for k in list(state_dict.keys()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
gpt.load_state_dict(state_dict)
gpt.to(device).train()

GPT(
  (transformer): ModuleDict(
    (wte): Embedding(74, 348)
    (wpe): Embedding(256, 348)
    (drop): Dropout(p=0.2, inplace=False)
    (h): ModuleList(
      (0-5): 6 x Block(
        (ln_1): LayerNorm()
        (attn): CausalSelfAttention(
          (c_attn): Linear(in_features=348, out_features=1044, bias=False)
          (c_proj): Linear(in_features=348, out_features=348, bias=False)
          (attn_dropout): Dropout(p=0.2, inplace=False)
          (resid_dropout): Dropout(p=0.2, inplace=False)
        )
        (ln_2): LayerNorm()
        (mlp): MLP(
          (c_fc): Linear(in_features=348, out_features=1392, bias=False)
          (gelu): GELU(approximate='none')
          (c_proj): Linear(in_features=1392, out_features=348, bias=False)
          (dropout): Dropout(p=0.2, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm()
  )
  (lm_head): Linear(in_features=348, out_features=74, bias=False)
)

### Step 5: Load Data (**students are required to complete this part!**)

In [6]:
import json
from pathlib import Path

def load_jsonl_as_list(path: str | Path):
    path = Path(path)
    records = []
    with path.open("r", encoding="utf-8") as f:
        for i, line in enumerate(f, 1):
            s = line.strip()
            if not s:
                continue
            try:
                obj = json.loads(s)
            except json.JSONDecodeError as e:
                raise RuntimeError(f"Bad JSON on line {i}: {e}") from e
            if not (isinstance(obj, dict) and "positive" in obj and "negative" in obj):
                raise ValueError(f"Bad schema on line {i}: {obj}")
            records.append(obj)
    if not records:
        raise RuntimeError(f"No records found in {path}")
    return records

# --- use it just like your old code ---
lines = load_jsonl_as_list("/content/drive/MyDrive/SC3000/data/pos_neg_pairs.jsonl")  # <-- JSONL file

print(f"Loaded {len(lines)} samples")
print("Example first entry:", lines[0])

def clean_sample(sample, stoi):
    sample['negative'] = ''.join([c for c in sample['negative'] if c in stoi])
    sample['positive'] = ''.join([c for c in sample['positive'] if c in stoi])
    return sample

# keep your cleaning step unchanged
lines = [clean_sample(p, stoi) for p in lines]

print("Example after cleaning:", lines[0])

Loaded 100000 samples
Example first entry: {'positive': '16+37=? The answer is 53 because 16+37 equals 53.', 'negative': "16+37=? Sorry,I don't know!"}
Example after cleaning: {'positive': '16+37=? The answer is 53 because 16+37 equals 53.', 'negative': "16+37=? Sorry,I don't know"}


### Step 6: Build the optimizer and scheduler (**students are required to complete this part!**)

In [7]:
# ===== AdamW + Smooth Cosine (cosine warmup + cosine decay) =====
import math
from torch.optim import AdamW
from torch.optim.lr_scheduler import LambdaLR

def build_param_groups(model, weight_decay=0.1):
    decay, no_decay = [], []
    for n, p in model.named_parameters():
        if not p.requires_grad:
            continue
        (decay if p.dim() >= 2 else no_decay).append(p)
    return [
        {"params": decay, "weight_decay": weight_decay},
        {"params": no_decay, "weight_decay": 0.0},
    ]

def build_adamw(model, lr, weight_decay=0.1, betas=(0.9, 0.95), eps=1e-8):
    fused_ok = (
        device == "cuda"
        and hasattr(torch.optim, "AdamW")
        and "fused" in AdamW.__init__.__code__.co_varnames
    )
    return AdamW(build_param_groups(model, weight_decay=weight_decay),
                 lr=lr, betas=betas, eps=eps, fused=fused_ok)

def build_smooth_cosine_scheduler(optimizer, warmup_steps, total_steps, min_lr_ratio=0.1):
    """
    Smooth cosine warmup (cosine from 0->1) followed by cosine decay (1->0),
    scaled to finish at min_lr_ratio * base_lr. No clamping/kinks.
    """
    assert total_steps > 0
    min_ratio = float(min_lr_ratio)

    def lr_lambda(step):
        # Normalize progress to [0, 1]
        s = float(step)
        T = float(total_steps)
        if s < warmup_steps and warmup_steps > 0:
            # cosine warmup: 0 -> 1 smoothly
            w = (s + 1.0) / warmup_steps
            warm = 0.5 * (1.0 - math.cos(math.pi * min(w, 1.0)))
            return warm  # scales from 0 -> 1
        # cosine decay from 1 -> min_ratio
        # progress p goes from 0 at end of warmup to 1 at final step
        denom = max(1.0, T - max(1.0, float(warmup_steps)))
        p = (s - warmup_steps) / denom
        p = max(0.0, min(1.0, p))
        cosine = 0.5 * (1.0 + math.cos(math.pi * p))  # 1 -> 0
        # Affine shift: land exactly at min_ratio when p=1
        return min_ratio + (1.0 - min_ratio) * cosine

    return LambdaLR(optimizer, lr_lambda)

# ---- size your schedule ----
steps_per_epoch = max(1, len(lines) // batch_size)
total_steps = epochs * steps_per_epoch
warmup_steps = max(1, int(0.03 * total_steps))  # 3% warmup is usually enough

optimizer = build_adamw(gpt, lr=base_lr, weight_decay=0.1)
scheduler = build_smooth_cosine_scheduler(
    optimizer,
    warmup_steps=warmup_steps,
    total_steps=total_steps,
    min_lr_ratio=0.10,      # floor at 10% of base_lr (e.g., 1e-5 if base=1e-4)
)

print(f"AdamW ready | fused={getattr(optimizer, 'fused', False)}")
print(f"steps/epoch={steps_per_epoch}, total_steps={total_steps}, warmup_steps={warmup_steps}")



AdamW ready | fused=False
steps/epoch=1562, total_steps=7810, warmup_steps=234


### Step 7: Begin training (**students are required to complete this part!**)

In [8]:
total_steps = len(lines) // batch_size
for epoch in range(epochs):
    pbar = tqdm(get_batches(lines, batch_size))
    for step, (neg_tensor,pos_tensor) in enumerate(pbar):
        ###########################################################
        neg_logprob = compute_logprob(neg_tensor)  # shape: (batch_size,)
        pos_logprob = compute_logprob(pos_tensor)  # shape: (batch_size,)
        loss = -F.logsigmoid((pos_logprob - neg_logprob) / beta).mean() - pos_logprob.mean() * 0.1
        # Backprop
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(gpt.parameters(), 1.0)  # gradient clipping
        optimizer.step()
        scheduler.step()

        # Update progress bar
        pbar.set_description(f"Epoch {epoch+1}, Step {step+1}, Loss: {loss.item():.4f}, LR: {scheduler.get_last_lr()[0]:.6f}")
        ###########################################################
    ckpt_path = f"./dpo.pt"
    torch.save({
        "model_state_dict": gpt.state_dict(),
        "model_args": ckpt['model_args'],
    }, ckpt_path)
    print(f"Saved checkpoint to {ckpt_path}")

Epoch 1, Step 1562, Loss: 0.0242, LR: 0.000093: : 1562it [04:17,  6.07it/s]


Saved checkpoint to ./dpo.pt


Epoch 2, Step 1562, Loss: 0.0215, LR: 0.000071: : 1562it [04:19,  6.03it/s]


Saved checkpoint to ./dpo.pt


Epoch 3, Step 1562, Loss: 0.0189, LR: 0.000043: : 1562it [04:19,  6.03it/s]


Saved checkpoint to ./dpo.pt


Epoch 4, Step 1562, Loss: 0.0191, LR: 0.000019: : 1562it [04:19,  6.02it/s]


Saved checkpoint to ./dpo.pt


Epoch 5, Step 1562, Loss: 0.0185, LR: 0.000010: : 1562it [04:18,  6.04it/s]


Saved checkpoint to ./dpo.pt


### Step 8: Begin testing (**students are required to complete this part!**)

In [9]:
# Load the fine-tuned model
ckpt_path = "/content/dpo.pt"
checkpoint = torch.load(ckpt_path, map_location=device)
gptconf = GPTConfig(**checkpoint['model_args'])
gpt = GPT(gptconf).cuda()
try:
    state_dict = checkpoint['model']
except:
    state_dict = checkpoint['model_state_dict']
unwanted_prefix = '_orig_mod.'
for k,v in list(state_dict.items()):
    if k.startswith(unwanted_prefix):
        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
gpt.load_state_dict(state_dict)
# Test
gpt.eval()
test_set = ["17+19=?", "3*17=?", "72/4=?", "72-x=34,x=?", "x*11=44,x=?", "3*17=?", "72/4=?", "72-x=34,x=?"]
with torch.no_grad():
    for prompt in test_set:
        prompt_ids = encode(prompt)
        ###########################################################
        x = torch.tensor(prompt_ids, dtype=torch.long, device=device).unsqueeze(0)

        y = gpt.generate(x, max_new_tokens=max_new_tokens,
                         temperature=temperature, top_k=top_k)

        # flatten tokens and decode
        tokens = y[0].view(-1).tolist()
        print(f"Prompt: {prompt}\nAnswer: {decode(tokens)}\n")

Prompt: 17+19=?
Answer: 17+19=? The answer is 36 because 17+19 equals 36.

Prompt: 3*17=?
Answer: 3*17=? The answer is 51 because 3*17 equals 51.

Prompt: 72/4=?
Answer: 72/4=? The answer is 18 because 72/4 equals 18.

Prompt: 72-x=34,x=?
Answer: 72-x=34,x=? The answer is 38 because 72-34=388.

Prompt: x*11=44,x=?
Answer: x*11=44,x=? The answer is 4 because 44/11=4444.

Prompt: 3*17=?
Answer: 3*17=? The answer is 51 because 3*17 equals 51.

Prompt: 72/4=?
Answer: 72/4=? The answer is 18 because 72/4 equals 18.

Prompt: 72-x=34,x=?
Answer: 72-x=34,x=? The answer is 48 because 72-34=488.

