In [1]:
import json
from tqdm.autonotebook import tqdm

import torch
from torch.optim import SGD
from torch.utils.data import Dataset, DataLoader

from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler

from cs336_alignment.sft import (
    tokenize_prompt_and_output,
    get_response_log_probs,
    sft_microbatch_train_step,
)

  from tqdm.autonotebook import tqdm


In [2]:
class QuestionAnswerDataset(Dataset):
    def __init__(self, jsonl_path):
        self.samples = self._read_jsonl(jsonl_path)
    
    def _read_jsonl(self, file_path):
        data = []
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():  # skip empty lines
                    data.append(json.loads(line))
        return data

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        question = sample["question"]
        answer = sample["answer"]
        return question, answer


In [3]:
train_dataset = QuestionAnswerDataset(jsonl_path="/assignment5-alignment/data/gsm8k/train.jsonl")

In [4]:
len(train_dataset)

7473

In [5]:
train_dataset[37]

('Five friends eat at a fast-food chain and order the following: 5 pieces of hamburger that cost $3 each; 4 sets of French fries that cost $1.20; 5 cups of soda that cost $0.5 each; and 1 platter of spaghetti that cost $2.7. How much will each of them pay if they will split the bill equally?',
 'The cost of 5 pieces of hamburger is $3 x 5 = $<<3*5=15>>15.\nThe cost of 4 sets of French fries is $1.20 x 4 = $<<1.20*4=4.80>>4.80.\nThe cost of 5 cups of soda is $0.5 x 5 = $<<0.5*5=2.50>>2.50.\nSo their total bill is $15 + $4.80 + $2.50 +$2.7 = $<<15+4.8+2.5+2.7=25>>25.\nHence, each of the five friends will contribute $25/5 = $<<25/5=5>>5.\n#### 5')

In [6]:
batch_size = 8
train_dataloader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True
)

In [7]:
model = AutoModelForCausalLM.from_pretrained("/assignment5-alignment/models/local-qwen-2_5-math-1_5B/")
tokenizer = AutoTokenizer.from_pretrained("/assignment5-alignment/models/local-qwen-2_5-math-1_5B/")

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


In [8]:
model = model.to("cuda")

In [9]:
# sft train loop
gradient_accumulation_steps = 4
normalize_constant = 1
num_epochs = 1
max_epoch_steps = 300 # len(train_dataset) // batch_size

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
num_training_steps = max_epoch_steps * num_epochs // gradient_accumulation_steps
scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=num_training_steps // 10,
    num_training_steps=num_training_steps
)
global_step = 0
running_loss = 0.0
logging_steps = gradient_accumulation_steps * 5

for epoch in range(num_epochs):

    for steps, batch in tqdm(enumerate(train_dataloader), total=max_epoch_steps):
        if steps == max_epoch_steps:
            break

        inputs = tokenize_prompt_and_output(
            *batch, tokenizer=tokenizer
        )

        input_ids = inputs["input_ids"].to("cuda")
        labels = inputs["labels"].to("cuda")
        response_mask = inputs["response_mask"].to("cuda")
        
        outputs = get_response_log_probs(
            model=model, 
            input_ids=input_ids, 
            labels=labels, 
            return_token_entropy=True
        )
        loss, stats = sft_microbatch_train_step(
            policy_log_probs=outputs["log_probs"],
            response_mask=response_mask,
            gradient_accumulation_steps=gradient_accumulation_steps,
            normalize_constant=normalize_constant
        )

        running_loss += loss.item()

        if (steps + 1) % gradient_accumulation_steps == 0:
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        avg_loss = running_loss / logging_steps

        global_step += 1
        print(f"Step {global_step}: Loss = {loss.item():.4f}, LR = {scheduler.get_last_lr()[0]:.2e}")
        if global_step % logging_steps == 0:
            print(f"Step {global_step}: Avg. Loss = {avg_loss:.4f}")
            running_loss = 0.0
    
    output_dir = f"/assignment5-alignment/models/local-qwen-2_5-math-1_5B_sft_train_{epoch}"
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)



  0%|          | 0/300 [00:00<?, ?it/s]

In [None]:
output_dir = "/assignment5-alignment/models/local-qwen-2_5-math-1_5B_sft_train"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

In [None]:
print(torch.cuda.memory_summary())