In [None]:
# !pip install --upgrade pip
# !pip install unsloth peft trl wandb
# !pip install -q human-eval datasets transformers accelerate einops tiktoken tqdm

In [None]:
import os, sys, json, math, time, random, tempfile, shutil, subprocess, signal, resource, textwrap, re
from dataclasses import dataclass
from pathlib import Path
from typing import List, Dict, Any, Optional
from kaggle_secrets import UserSecretsClient
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer
from unsloth import FastLanguageModel, is_bfloat16_supported
from trl import SFTTrainer
import trl, peft
from peft import LoraConfig
from transformers import TrainerCallback
# W&B (tùy chọn)
import wandb

In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

def set_seed(seed: int = 42):
    random.seed(seed); torch.manual_seed(seed); 
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
set_seed(42)

# ---- Helper: loại bỏ ```python ... ``` khi model sinh ra
_CODE_FENCE_RE = re.compile(r"```(?:python)?\n(.*?)```", re.DOTALL | re.IGNORECASE)
def extract_code(text: str) -> str:
    m = _CODE_FENCE_RE.search(text)
    if m:
        return m.group(1).strip()
    # fallback: bỏ các lời giải thích, chỉ lấy từ dòng chứa 'def ' đầu tiên
    lines = text.splitlines()
    for i, ln in enumerate(lines):
        if ln.strip().startswith("def "):
            return "\n".join(lines[i:]).strip()
    return text.strip()

# ---- Sandbox executor (POSIX): chạy test trong tiến trình con, giới hạn thời gian & tài nguyên
def _write(path: Path, content: str):
    path.write_text(content, encoding="utf-8")

def _limit_resources(timeout_sec: int = 5, max_mem_gb: int = 2):
    try:
        resource.setrlimit(resource.RLIMIT_CPU, (timeout_sec, timeout_sec))
    except Exception:
        pass
    try:
        bytes_lim = max_mem_gb * 1024**3
        resource.setrlimit(resource.RLIMIT_AS, (bytes_lim, bytes_lim))
    except Exception:
        pass
    try:
        resource.setrlimit(resource.RLIMIT_FSIZE, (10 * 1024**2, 10 * 1024**2))  # 10MB
    except Exception:
        pass

EVAL_TEMPLATE = """
import importlib.util, sys, traceback, json

# Nạp module ứng viên (chứa lời giải)
spec = importlib.util.spec_from_file_location("candidate", "candidate.py")
cand = importlib.util.module_from_spec(spec)
spec.loader.exec_module(cand)  # có thể raise lỗi syntax/exception

# Dán test code từ dataset (định nghĩa check(...))
{test_code}

passed = None
try:
    candidate = getattr(cand, "{entry_point}")
    # Một số test dùng assert và không trả về; nếu không exception thì coi là passed
    r = check(candidate)
    passed = (r is None) or bool(r)
except Exception as e:
    # Nếu test ném lỗi -> fail
    sys.stderr.write("TEST_ERROR: " + str(e) + "\n")
    passed = False

print(json.dumps({{"passed": bool(passed)}}))
"""

def run_program_with_tests(program: str, test_code: str, entry_point: str, timeout_sec: int = 6) -> bool:
    work = Path(tempfile.mkdtemp(prefix="lcd_eval_"))
    try:
        cand = work / "candidate.py"
        _write(cand, program)
        runner = work / "run_eval.py"
        _write(runner, EVAL_TEMPLATE.format(test_code=test_code, entry_point=entry_point))

        # Thực thi trong tiến trình con với giới hạn
        cmd = [sys.executable, str(runner)]
        proc = subprocess.Popen(
            cmd,
            cwd=str(work),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
            preexec_fn=lambda: _limit_resources(timeout_sec=timeout_sec, max_mem_gb=2),
        )
        try:
            out, err = proc.communicate(timeout=timeout_sec + 1)
        except subprocess.TimeoutExpired:
            proc.kill()
            return False

        if proc.returncode != 0:
            return False

        try:
            obj = json.loads(out.strip().splitlines()[-1])
            return bool(obj.get("passed", False))
        except Exception:
            return False
    finally:
        shutil.rmtree(work, ignore_errors=True)

In [None]:
# ==== Cấu hình model & dữ liệu ====
MODEL_NAME = "Qwen/Qwen2.5-Coder-3B-Instruct"
MAX_SEQ_LEN = 3072

# Đường dẫn 2 file JSONL (cập nhật theo máy của bạn)
TRAIN_JSONL = "/kaggle/input/datasetleetcode/LeetCodeDataset-train.jsonl"
TEST_JSONL  = "/kaggle/input/datasetleetcode/LeetCodeDataset-test.jsonl"

# Tham số sinh mã
GEN_KW = dict(
    max_new_tokens=640,
    temperature=0.2,
    top_p=0.95,
    do_sample=True,
)
SEED = 42

set_seed(SEED)

In [None]:
# ==== Load model & tokenizer (QLoRA 4-bit) ====
model, tokenizer = FastLanguageModel.from_pretrained(
    MODEL_NAME,
    max_seq_length=MAX_SEQ_LEN,
    dtype=None,
    load_in_4bit=True,
)
print("Loaded:", MODEL_NAME)

In [None]:
from datasets import load_dataset

train_ds = load_dataset("json", data_files=TRAIN_JSONL, split="train")
test_ds  = load_dataset("json", data_files=TEST_JSONL,  split="train")

REQUIRED_FIELDS = ["task_id", "problem_description", "completion", "starter_code", "test", "entry_point"]
for name, ds in [("train", train_ds), ("test", test_ds)]:
    missing = [c for c in REQUIRED_FIELDS if c not in ds.column_names]
    if missing:
        print(f"[CẢNH BÁO] {name} thiếu các cột: {missing}")

SYSTEM_INST = (
    "You are an expert competitive programming assistant. "
    "Given a problem description and optional starter code, write a complete and correct Python solution. "
    "Only output Python code, no explanations or markdown."
)

def map_to_sft(example):
    desc = (example.get("problem_description") or "").strip()
    starter = (example.get("starter_code") or "").strip()
    sol = (example.get("completion") or "").strip()
    if not desc or not sol:
        return {"text": None}
    user_prompt = desc
    if starter:
        user_prompt += f"\n\nStarter Code:\n{starter}"
    messages = [
        {"role": "system", "content": SYSTEM_INST},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": sol},
    ]
    txt = tokenizer.apply_chat_template(messages, tokenize=False) + tokenizer.eos_token
    return {"text": txt}

train_sft = train_ds.map(map_to_sft, num_proc=4, remove_columns=[c for c in train_ds.column_names if c!="task_id"])
train_sft = train_sft.filter(lambda ex: ex["text"] is not None)

print("Train samples:", len(train_sft))
print("Test samples:", len(test_ds))

In [None]:
# ==== Cấu hình LoRA ====
lora_cfg = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.0,           # 0.0 thường tốt cho QLoRA
    bias="none",
    target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
    task_type="CAUSAL_LM",
)

model = FastLanguageModel.get_peft_model(
    model,
    r=lora_cfg.r,
    target_modules=lora_cfg.target_modules,
    lora_alpha=lora_cfg.lora_alpha,
    lora_dropout=lora_cfg.lora_dropout,
    bias=lora_cfg.bias,
    use_gradient_checkpointing="unsloth",
    random_state=SEED,
    use_rslora=True,
    loftq_config=None,
)

In [None]:
# WandB setup - Giữ nguyên
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb")
wandb.login(key=secret_value_0)
wandb.init(project="UET deeplearning", name="qwen-leetcode")

In [None]:
# ==== Self-Check Callback (đã chỉnh đúng fields) ====
class SelfCheckCallback(TrainerCallback):
    def __init__(self, tokenizer, test_ds, sample_train=16, sample_test=32):
        self.tok = tokenizer
        self.test_ds = test_ds
        self.sample_train = sample_train
        self.sample_test = sample_test

    def _gen_completion(self, model, problem_description: str, starter_code: str = "") -> str:
        # Dùng problem_description + starter_code làm input
        user_prompt = problem_description.strip()
        if starter_code:
            user_prompt += f"\n\nStarter Code:\n{starter_code.strip()}"
        msgs = [
            {"role": "system", "content": SYSTEM_INST},
            {"role": "user", "content": user_prompt},
        ]
        prompt_text = self.tok.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
        enc = self.tok(prompt_text, return_tensors="pt").to(model.device)
        FastLanguageModel.for_inference(model)
        with torch.no_grad():
            out = model.generate(**enc, **GEN_KW)
        gen_text = self.tok.decode(out[0][enc["input_ids"].shape[1]:], skip_special_tokens=True)
        return extract_code(gen_text)

    def _check_many(self, model, dataset, n=16):
        n = min(n, len(dataset))
        if n <= 0:
            return 0.0
        idxs = random.sample(range(len(dataset)), n)
        ok = 0
        for i in idxs:
            ex = dataset[i]
            try:
                # Sinh code
                completion = self._gen_completion(
                    model,
                    ex.get("problem_description", ""),
                    ex.get("starter_code", "")
                )
                # Gộp starter_code (nếu có) + completion
                program = (ex.get("starter_code","") or "") + "\n" + completion
                passed = run_program_with_tests(
                    program,
                    ex.get("test","") or "",
                    ex.get("entry_point","")
                )
                ok += int(bool(passed))
            except Exception as e:
                pass
        return ok / n

    def on_epoch_end(self, args, state, control, **kwargs):
        model = kwargs.get("model")
        try:
            train_rate = self._check_many(model, train_ds, n=self.sample_train)
            test_rate  = self._check_many(model, self.test_ds, n=self.sample_test)
            print(f"[Self-Check] epoch={state.epoch:.2f}  train_pass@1={train_rate:.3f}  test_pass@1={test_rate:.3f}")
            if USE_WANDB:
                wandb.log({
                    "selfcheck/train_pass@1": train_rate,
                    "selfcheck/test_pass@1": test_rate,
                    "epoch": state.epoch
                })
        except Exception as e:
            print("[Self-Check] Error:", e)
        return control

# Khởi tạo callback
selfcheck_cb = SelfCheckCallback(tokenizer, test_ds, sample_train=8, sample_test=16)


In [None]:
# ==== Trainer ====
OUTPUT_DIR = "/kaggle/working/qwen2_5_leetcode_lora"

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_sft,
    eval_dataset=None,                  
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LEN,
    args=trl.SFTConfig(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=16,     
        num_train_epochs=3,
        warmup_ratio=0.06,
        learning_rate=4e-5,                 
        lr_scheduler_type="cosine",
        weight_decay=0.05,
        logging_steps=10,
        save_steps=500,
        save_total_limit=2,
        output_dir=OUTPUT_DIR,
        gradient_checkpointing=True,
        bf16=is_bfloat16_supported(),
        fp16=not is_bfloat16_supported(),
        optim="adamw_torch",
        report_to="wandb",
    ),
)

# Gắn self-check callback
trainer.add_callback(selfcheck_cb)

print("Bắt đầu train ...")

In [None]:
train_result = trainer.train()
print("Hoàn tất train!")

In [None]:
# ==== Lưu adapter & tokenizer ====
trainer.model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

#(Tuỳ chọn) Merge LoRA vào base model (nếu đủ VRAM)
#Cảnh báo: merge 16-bit có thể tốn nhiều bộ nhớ!
FastLanguageModel.for_inference(model)
model.save_pretrained_merged(f"{OUTPUT_DIR}_merged", tokenizer, save_method="merged_16bit")

In [None]:
# ==== Đánh giá pass@1 trên test ====
import tqdm

def predict_and_score(model, tok, dataset, out_path="/kaggle/working/predictions.jsonl", max_samples=None):
    FastLanguageModel.for_inference(model)
    total = len(dataset) if max_samples is None else min(max_samples, len(dataset))
    ok = 0
    with open(out_path, "w", encoding="utf-8") as f:
        for i in range(total):
            ex = dataset[i]
            # Sinh lời giải
            msgs = [
                {"role": "system", "content": SYSTEM_INST},
                {"role": "user", "content": ex["query"]},
            ]
            prompt_text = tok.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
            enc = tok(prompt_text, return_tensors="pt").to(model.device)
            out = model.generate(**enc, **GEN_KW)
            gen_text = tok.decode(out[0][enc["input_ids"].shape[1]:], skip_special_tokens=True)
            completion = extract_code(gen_text)

            # Lưu dạng {task_id, completion} — tương thích eval_lcd
            f.write(json.dumps({"task_id": ex["task_id"], "completion": completion}) + "\n")

            # Tự chấm qua test code
            program = (ex.get("prompt","") or "") + "\n" + completion
            if run_program_with_tests(program, ex.get("test","") or "", ex.get("entry_point","")):
                ok += 1

    return {"total": total, "pass@1": ok/total if total else 0.0}

# Load lại model (nếu cần)
FastLanguageModel.for_inference(model)
score = predict_and_score(model, tokenizer, test_ds, out_path="/kaggle/working/lcd_predictions.jsonl", max_samples=None)
print("Test pass@1:", score)
# File dự đoán cho eval_lcd CLI:
print("Predictions saved to lcd_predictions.jsonl")