In [None]:
!python -m pip install --upgrade pip
!pip install -U bitsandbytes transformers peft datasets hf_transfer trl evaluate sacrebleu
!pip install flash-attn --no-build-isolation
!pip install wandb

Collecting wandb
  Downloading wandb-0.24.0-py3-none-manylinux_2_28_x86_64.whl.metadata (12 kB)
Collecting click>=8.0.1 (from wandb)
  Downloading click-8.3.1-py3-none-any.whl.metadata (2.6 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading gitpython-3.1.46-py3-none-any.whl.metadata (13 kB)
Collecting protobuf!=4.21.0,!=5.28.0,<7,>=3.19.0 (from wandb)
  Downloading protobuf-6.33.4-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)
Collecting pydantic<3 (from wandb)
  Downloading pydantic-2.12.5-py3-none-any.whl.metadata (90 kB)
Collecting sentry-sdk>=2.0.0 (from wandb)
  Downloading sentry_sdk-2.49.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting annotated-types>=0.6.0 (from pydantic<3->wandb)
  Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
Collecting pydantic-core==2.41.5 (from pydantic<3->wandb)
  Downloading pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Collecting typing-extensions<5,>=

In [1]:
import os
import json
import torch
import wandb
from transformers import AutoTokenizer, pipeline
from trl import SFTTrainer, SFTConfig
from datasets import load_dataset, Dataset, load_from_disk

from make_prompts import generate_prompts
from qlora import load_qlora_model, load_trained_model

with open("config.json", "r") as f:
    cfg = json.load(f)

#### Load Dataset and Model

In [2]:
train_data = load_from_disk("data/train_data_with_reasoning")
test_data = load_from_disk("data/test_data_with_reasoning")

In [3]:
MODEL_NAME = cfg["model_name"]
model = load_qlora_model(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [4]:
tokenizer.add_special_tokens(
    {"additional_special_tokens": ["<|think_start|>", "<|think_end|>"]}
)
model.resize_token_embeddings(len(tokenizer))

Embedding(151667, 3584)

In [5]:
train_ds = generate_prompts(train_data, tokenizer, is_test=False)
dev_ds = generate_prompts(test_data, tokenizer, is_test=False)

#### SFT

In [6]:
# wandb initialization
wandb.login(os.getenv("WANDB_API_KEY"))

project = "sft-hpo"
display_name = "qwen2.5-python-coder"
wandb.init(project=project, name=display_name)

[34m[1mwandb[0m: [wandb.login()] Using explicit session credentials for https://api.wandb.ai.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mshultra2[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
sft_config = SFTConfig(
    output_dir="checkpoints",
    num_train_epochs=cfg["sft"]["num_epochs"],
    per_device_train_batch_size=cfg["sft"]["batch_size"],
    save_strategy="epoch",
    optim="paged_adamw_8bit",
    learning_rate=cfg["sft"]["lr"],
    bf16=True,
    dataset_text_field="text",
    lr_scheduler_type="cosine",
    warmup_ratio=cfg["sft"]["warmup"],
    gradient_accumulation_steps=cfg["sft"]["grad_accum_steps"],
    do_eval=True,
    eval_strategy="epoch",
    group_by_length=True,
    report_to="wandb"
    )

trainer = SFTTrainer(
    model=model,
    train_dataset=train_ds,
    eval_dataset=dev_ds,
    args=sft_config,
)

Adding EOS to train dataset:   0%|          | 0/2641 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2641 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2641 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/119 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/119 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/119 [00:00<?, ? examples/s]

In [9]:
trainer.train()

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.
Casting fp32 inputs back to torch.bfloat16 for flash-attn compatibility.
  with device_autocast_ctx, torch.cpu.amp.autocast(**cpu_autocast_kwargs), recompute_context:  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss
1,1.0072,1.178523
2,0.7966,1.044336
3,0.7367,1.024756


  with device_autocast_ctx, torch.cpu.amp.autocast(**cpu_autocast_kwargs), recompute_context:  # type: ignore[attr-defined]
  with device_autocast_ctx, torch.cpu.amp.autocast(**cpu_autocast_kwargs), recompute_context:  # type: ignore[attr-defined]


TrainOutput(global_step=126, training_loss=0.915087207915291, metrics={'train_runtime': 2316.8398, 'train_samples_per_second': 3.42, 'train_steps_per_second': 0.054, 'total_flos': 2.953136411386184e+17, 'train_loss': 0.915087207915291})

In [29]:
# save SFT model
adapter_path = os.path.join("lora_checkpoints", cfg["ckpt_name"])
model.save_pretrained(adapter_path)

In [None]:
sft_model = load_trained_model(MODEL_NAME, adapter_path)
sft_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [31]:
test_data = load_dataset(cfg["dataset"], split="test")

In [None]:
test_ds = generate_prompts(test_data, tokenizer, is_eval=True)

test_pipeline = pipeline(
    "text-generation",
    model=sft_model,
    tokenizer=sft_tokenizer,
    max_new_tokens=cfg["generation"]["max_new_tokens"],
)

# 생성
outputs = test_pipeline(
    test_ds[0]["text"],
    do_sample=True,
    temperature=cfg["generation"]["temperature"],
    top_p=cfg["generation"]["top_p"],
    add_special_tokens=True,
)
print(outputs)

Device set to use cuda:0


[{'generated_text': "<|im_start|>system\nYou are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests.\n\nBefore solving a problem, think step by step and explain your reasoning. Start with your approach and explanation, then write the final code.\n\n<|im_end|>\n<|im_start|>user\n### Question:\nYou are given an integer n and a 2D integer array queries.\nThere are n cities numbered from 0 to n - 1. Initially, there is a unidirectional road from city i to city i + 1 for all 0 <= i < n - 1.\nqueries[i] = [ui, vi] represents the addition of a new unidirectional road from city ui to city vi. After each query, you need to find the length of the shortest path from city 0 to city n - 1.\nReturn an array answer where for each i in the range [0, queries.length - 1], answer[i] is the length of the shortest path from city 0 to city n - 1 after processing the first i + 1 queri

In [42]:
print(outputs[0]["generated_text"])

<|im_start|>system
You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests.

Before solving a problem, think step by step and explain your reasoning. Start with your approach and explanation, then write the final code.

<|im_end|>
<|im_start|>user
### Question:
You are given an integer n and a 2D integer array queries.
There are n cities numbered from 0 to n - 1. Initially, there is a unidirectional road from city i to city i + 1 for all 0 <= i < n - 1.
queries[i] = [ui, vi] represents the addition of a new unidirectional road from city ui to city vi. After each query, you need to find the length of the shortest path from city 0 to city n - 1.
Return an array answer where for each i in the range [0, queries.length - 1], answer[i] is the length of the shortest path from city 0 to city n - 1 after processing the first i + 1 queries.
 
Example 1:

Input: n = 5, 

In [None]:
print("question:", test_ds.iloc[0]["query"])
print("response:", test_ds.iloc[0]["response"])

question: You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests.

### Question:
You are given an integer array nums of length n and a 2D array queries where queries[i] = [li, ri, vali].
Each queries[i] represents the following action on nums:

Decrement the value at each index in the range [li, ri] in nums by at most vali.
The amount by which each value is decremented can be chosen independently for each index.

A Zero Array is an array with all its elements equal to 0.
Return the minimum possible non-negative value of k, such that after processing the first k queries in sequence, nums becomes a Zero Array. If no such k exists, return -1.
 
Example 1:

Input: nums = [2,0,2], queries = [[0,2,1],[0,2,1],[1,1,3]]
Output: 2
Explanation:

For i = 0 (l = 0, r = 2, val = 1):

Decrement values at indices [0, 1, 2] by [1, 0, 1] respectively.
The array will become [1