
# Qwen3 30B A3B - mini SWE-bench thinking-to-nonthinking distillation loop

This Colab notebook does one full cycle:

1. Run **mini-swe-agent** on `MariusHobbhahn/swe-bench-verified-mini` with the **thinking** model to produce patches.
2. Build a distilled SFT dataset by pairing each input with the final patch and removing any thinking content.
3. Fine-tune the **non-thinking** model with **rank-1 LoRA** using **Unsloth FastModel** and **merge after each step**.
4. Create a **delta-interpolated** model by applying the weight difference between thinking and non-thinking bases onto the newly fine-tuned non-thinking model.
5. Run the agent again with the delta model on the same mini set.
6. Upload all artifacts to Hugging Face.
7. Submit predictions to SWE-bench via `sb-cli` for the 50 mini IDs only.

It is resumable and stateful across disconnects by pushing `state.json` and all intermediate artifacts to the Hub.


In [None]:

#@title Setup
# If running in Colab, uncomment the next line to ensure a clean environment
# !pip -q install -U pip

# Core deps
!pip -q install -U "transformers>=4.43.0" accelerate bitsandbytes datasets peft trl einops safetensors fugashi ipadic
# Unsloth with MoE FastModel support
!pip -q install -U unsloth
# Agentic harness + model router
!pip -q install -U mini-swe-agent litellm pyyaml
# vLLM OpenAI-compatible server
!pip -q install -U "vllm>=0.6.1" uvicorn
# Model merging
!pip -q install -U mergekit
# Hugging Face Hub utils
!pip -q install -U huggingface_hub
# SWE-bench remote evaluation CLI
!pip -q install -U sb-cli

import os, json, time, shutil, textwrap, subprocess, signal, sys, uuid, re, glob, math
from pathlib import Path

from huggingface_hub import HfApi, HfFolder, create_repo, login, upload_folder, snapshot_download
from datasets import load_dataset


In [None]:

#@title Config - tokens, models, paths
# Never print secrets
HF_TOKEN = "<REDACTED_HF_TOKEN>"  #@param {type:"string"}
HF_USERNAME = "oof-baroomf"                          #@param {type:"string"}
SWEBENCH_API_KEY = "<REDACTED_SWEBENCH_KEY>"  #@param {type:"string"}

# Base models
THINKING_MODEL_ID = "Qwen/Qwen3-30B-A3B-Thinking-2507"   # thinking-only
NONTHINK_MODEL_ID = "Qwen/Qwen3-30B-A3B-Instruct-2507"   # non-thinking

# Dataset
SWEBENCH_MINI_ID = "MariusHobbhahn/swe-bench-verified-mini"
SWEBENCH_SPLIT = "test"  # dataset is small; dev/test both acceptable, 'test' used when available

# Agent sampling best practices for Qwen
THINKING_GEN = dict(temperature=0.6, top_p=0.95, top_k=20, min_p=0.0, presence_penalty=1.0, max_tokens=32768)
NONTHINK_GEN = dict(temperature=0.7, top_p=0.8,  top_k=20, min_p=0.0, presence_penalty=1.0, max_tokens=32768)

# Training
MAX_SEQ_LEN = 4096
NUM_TRAIN_EPOCHS = 1.0
GRAD_ACCUM = 2
BATCH_SIZE = 1
LEARNING_RATE = 1e-5
LORA_RANK = 1
LORA_ALPHA = 16
LORA_DROPOUT = 0.0
MERGE_EVERY_STEP = True
WARMUP_RATIO = 0.0
WEIGHT_DECAY = 0.0

# Delta interpolation strength
DELTA_ALPHA = 1.0  # new = FT_nonthink + alpha * (think_base - nonthink_base)

# vLLM serve settings
VLLM_PORT = 8000
VLLM_GPU_UTIL = 0.9
VLLM_MAX_MODEL_LEN = 393216  # be generous to avoid truncation

# Persistent workdir
ROOT = Path("/content") if Path("/content").exists() else Path.cwd()
RUN_ROOT = ROOT / "a3b_mini_runs"
RUN_ROOT.mkdir(parents=True, exist_ok=True)

# Remote persistence
STATE_DATASET_REPO = f"{HF_USERNAME}/a3b-mini-state"
PUSH_PUBLIC = True

os.environ["HF_HOME"] = str(RUN_ROOT / "hf_home")
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
os.environ["HF_TOKEN"] = HF_TOKEN
os.environ["SWEBENCH_API_KEY"] = SWEBENCH_API_KEY

# Login to HF non-interactively
try:
    login(token=HF_TOKEN, add_to_git_credential=True)
except Exception as e:
    print("Hugging Face login note:", type(e).__name__)
    
api = HfApi()
try:
    api.create_repo(STATE_DATASET_REPO, repo_type="dataset", private=not PUSH_PUBLIC, exist_ok=True)
except Exception as e:
    pass

def _now():
    return time.strftime("%Y-%m-%d_%H-%M-%S")

def _sh(cmd, env=None, check=True):
    p = subprocess.run(cmd, shell=True, text=True, env=env or os.environ, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    if check and p.returncode != 0:
        raise RuntimeError(p.stdout)
    return p.stdout

def _write_json(path, obj):
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(obj, indent=2))

def _read_json(path, default=None):
    try:
        return json.loads(Path(path).read_text())
    except Exception:
        return default

def _push_state(run_dir):
    # push run_dir/state.json to dataset repo under state/{run_id}/
    rel = f"state/{run_dir.name}"
    upload_folder(folder_path=str(run_dir), repo_id=STATE_DATASET_REPO, repo_type="dataset", path_in_repo=rel, token=HF_TOKEN)


In [None]:

#@title Utilities: vLLM server, mini config, agent runner, sb-cli submit
import yaml, tempfile, atexit

_vllm_proc = None

def start_vllm(model_id: str, port: int = VLLM_PORT, gpu_util: float = VLLM_GPU_UTIL, max_len: int = VLLM_MAX_MODEL_LEN):
    global _vllm_proc
    stop_vllm()
    cmd = [
        "python", "-m", "vllm.entrypoints.openai.api_server",
        "--model", model_id,
        "--dtype", "auto",
        "--port", str(port),
        "--max-model-len", str(max_len),
        "--gpu-memory-utilization", str(gpu_util),
        "--disable-log-requests",
        "--served-model-name", "qwen-local",
    ]
    print("Starting vLLM:", " ".join(cmd))
    _vllm_proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    # wait for server ready
    started = False
    start_t = time.time()
    while time.time() - start_t < 180:
        line = _vllm_proc.stdout.readline()
        if not line:
            time.sleep(0.5)
            continue
        if "Uvicorn running on" in line or "Application startup complete" in line:
            started = True
            break
    if not started:
        raise RuntimeError("vLLM did not start in time")

def stop_vllm():
    global _vllm_proc
    if _vllm_proc is not None:
        try:
            _vllm_proc.terminate()
            _vllm_proc.wait(timeout=20)
        except Exception:
            try:
                _vllm_proc.kill()
            except Exception:
                pass
    _vllm_proc = None

atexit.register(stop_vllm)

def write_miniswea_config(path: Path, thinking: bool):
    cfg = {
        "agent": {
            # Include a clear THOUGHT section but only one bash action per step
            "step_limit": 48,
            "cost_limit": 1000.0,
            "mode": "confirm",
            "confirm_exit": True,
        },
        "environment": {
            "environment_class": "local",
            "env": {
                "PAGER": "cat",
                "MANPAGER": "cat",
                "LESS": "-R",
                "PIP_PROGRESS_BAR": "off",
                "TQDM_DISABLE": "1",
            },
        },
        "model": {
            "model_class": "litellm",
            "model_name": "qwen-local",
            "model_kwargs": {
                "custom_llm_provider": "openai",
                "api_base": f"http://127.0.0.1:{VLLM_PORT}/v1",
                "api_key": "sk-local",  # dummy, not used by vLLM
                "model": "qwen-local",
                "drop_params": True,  # ignore unsupported params
            },
        },
        "run": {
            # default prompts are fine for SWE-bench
        },
    }
    # Qwen best practices
    gen = THINKING_GEN if thinking else NONTHINK_GEN
    cfg["model"]["model_kwargs"].update(gen)
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(yaml.safe_dump(cfg))
    return path

def run_mini_swebench(dataset_id: str, split: str, out_dir: Path, config_path: Path):
    out_dir.mkdir(parents=True, exist_ok=True)
    # Use the documented batch runner module
    cmd = [
        sys.executable, "-m", "minisweagent.run.extra.swebench",
        dataset_id, "--split", split,
        "--config", str(config_path),
        "--output-dir", str(out_dir),
    ]
    print("Running mini-swe-agent:", " ".join(cmd))
    out = _sh(" ".join(cmd), check=False)
    # preds.json should exist if any instance finished
    preds = out_dir / "preds.json"
    if preds.exists():
        return json.loads(preds.read_text())
    # make empty file to be safe
    preds.write_text(json.dumps({}, indent=2))
    return {}

def collect_instance_ids(dataset_id: str, split: str):
    ds = load_dataset(dataset_id, split=split)
    # heuristically support different field names
    key = "instance_id" if "instance_id" in ds.features else ("id" if "id" in ds.features else None)
    if key is None:
        ids = [row.get("instance_id", row.get("id", f"idx_{i}")) for i, row in enumerate(ds)]
    else:
        ids = [row[key] for row in ds]
    return [str(x) for x in ids]

def sbcli_submit(predictions_path: Path, subset: str, split: str, run_id: str, instance_ids: list[str]):
    # sb-cli supports passing only some instance IDs
    env = os.environ.copy()
    env["SWEBENCH_API_KEY"] = SWEBENCH_API_KEY
    ids_arg = ",".join(instance_ids[:5000])
    cmd = [
        "sb-cli", "submit", subset, split,
        "--predictions_path", str(predictions_path),
        "--instance_ids", ids_arg,
        "--run_id", run_id,
        "--gen_report", "1",
    ]
    print("sb-cli:", " ".join(cmd))
    try:
        out = _sh(" ".join(cmd), env=env, check=False)
        return out
    except Exception as e:
        return f"sb-cli error: {e}"


In [None]:

#@title Data prep: from agent preds to SFT pairs
def preds_to_sft_pairs(preds: dict, dataset_id: str, split: str):
    # Load problems to map instance_id -> problem statement
    ds = load_dataset(dataset_id, split=split)
    # Build index
    idx = {}
    for row in ds:
        iid = row.get("instance_id", row.get("id"))
        task = row.get("problem_statement", row.get("problem", ""))
        idx[str(iid)] = str(task)
    # Pairs
    records = []
    for iid, payload in preds.items():
        patch = payload.get("model_patch", "")
        if not patch:
            continue
        task = idx.get(str(iid), "")
        # Strip any <think> blocks or THOUGHT markers if present
        patch_clean = re.sub(r"<think>.*?</think>", "", patch, flags=re.DOTALL)
        patch_clean = re.sub(r"(?is)^.*?FINAL\s*PATCH[:\-]*", "", patch_clean).strip()
        records.append({
            "instance_id": iid,
            "prompt": task,
            "target": patch_clean,
        })
    return records

def save_sft_jsonl(pairs: list[dict], out_path: Path):
    out_path.parent.mkdir(parents=True, exist_ok=True)
    with out_path.open("w", encoding="utf-8") as f:
        for ex in pairs:
            f.write(json.dumps(ex, ensure_ascii=False) + "\n")
    return out_path


In [None]:

#@title Rank-1 LoRA SFT with Unsloth.FastModel, merge every step
from unsloth import FastModel
from datasets import Dataset
import torch

def train_rank1_lora_merge_each_step(base_model_id: str, sft_pairs_path: Path, out_dir: Path):
    out_dir.mkdir(parents=True, exist_ok=True)
    # Load dataset into jsonlines -> HF Dataset
    records = [json.loads(x) for x in Path(sft_pairs_path).read_text().splitlines() if x.strip()]
    if not records:
        raise RuntimeError("No SFT pairs found - cannot train")
    ds = Dataset.from_list([{
        "text": f"<|im_start|>system\nYou are a helpful coding assistant.\n<|im_end|>\n"
                f"<|im_start|>user\n{r['prompt']}\n<|im_end|>\n"
                f"<|im_start|>assistant\n{r['target']}\n<|im_end|>\n"
    } for r in records])

    # Load base with 4-bit for memory - no GGUF anywhere
    model, tokenizer = FastModel.from_pretrained(
        base_model_id,
        load_in_4bit=True,
        use_gradient_checkpointing="unsloth",
    )
    model = FastModel.get_peft_model(
        model,
        r=LORA_RANK,
        lora_alpha=LORA_ALPHA,
        lora_dropout=LORA_DROPOUT,
        target_modules="all-linear",  # include MoE FFNs
        use_dora=False,
        bias="none",
        modules_to_save=None,
        task_type="CAUSAL_LM",
    )

    def tokenize(examples):
        return tokenizer(
            examples["text"],
            max_length=MAX_SEQ_LEN,
            truncation=True,
            padding=False,
        )
    tokenized = ds.map(tokenize, batched=True, remove_columns=["text"])

    from transformers import TrainingArguments
    from trl import SFTTrainer

    args = TrainingArguments(
        output_dir=str(out_dir),
        num_train_epochs=NUM_TRAIN_EPOCHS,
        per_device_train_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRAD_ACCUM,
        learning_rate=LEARNING_RATE,
        bf16=True,
        logging_steps=1,
        save_steps=0,
        report_to=[],
        warmup_ratio=WARMUP_RATIO,
        weight_decay=WEIGHT_DECAY,
        dataloader_num_workers=2,
        optim="paged_adamw_32bit",
        lr_scheduler_type="cosine",
        max_grad_norm=1.0,
    )

    class MergeEveryStepCallback:
        def __init__(self, model, save_dir: Path):
            self.model = model
            self.save_dir = save_dir
            self.global_step = 0
        def on_step_end(self, args, state, control, **kwargs):
            self.global_step = state.global_step
            if MERGE_EVERY_STEP:
                # Merge LoRA into base weights and save a rolling checkpoint
                try:
                    self.model.merge_and_unload()
                except Exception:
                    try:
                        FastModel.merge_lora(self.model)  # fallback
                    except Exception:
                        pass
                step_dir = self.save_dir / f"merged_step_{self.global_step:06d}"
                step_dir.mkdir(parents=True, exist_ok=True)
                self.model.save_pretrained(step_dir)
                tokenizer.save_pretrained(step_dir)
                # Re-enable LoRA adapters for continued training
                try:
                    FastModel.enable_lora(self.model)
                except Exception:
                    pass
            return control

        # Hook names expected by TRL/Transformers
        def on_train_end(self, args, state, control, **kwargs):
            # Final merge
            try:
                self.model.merge_and_unload()
            except Exception:
                try:
                    FastModel.merge_lora(self.model)
                except Exception:
                    pass
            final_dir = self.save_dir / "merged_final"
            final_dir.mkdir(parents=True, exist_ok=True)
            self.model.save_pretrained(final_dir)
            tokenizer.save_pretrained(final_dir)
            return control

    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=tokenized,
        args=args,
        dataset_text_field=None,
        packing=False,
        max_seq_length=MAX_SEQ_LEN,
    )

    merge_cb = MergeEveryStepCallback(model, out_dir)
    trainer.add_callback(merge_cb)
    trainer.train()

    final_path = out_dir / "merged_final"
    return final_path


In [None]:

#@title Delta interpolation with MergeKit
def merge_delta_with_mergekit(nonthink_ft_dir: Path, think_base_id: str, nonthink_base_id: str, out_dir: Path, alpha: float = 1.0):
    out_dir.mkdir(parents=True, exist_ok=True)
    cfg = {
        "merge_method": "linear",
        "dtype": "bfloat16",
        "slices": [
            {"sources": [
                {"model": str(nonthink_ft_dir), "layer_range": [0, 1], "weight": 1.0},
                {"model": think_base_id,       "layer_range": [0, 1], "weight": alpha},
                {"model": nonthink_base_id,    "layer_range": [0, 1], "weight": -alpha},
            ]}
        ],
        "parameters": {
            "normalize": False
        },
    }
    cfg_path = out_dir / "merge.yml"
    cfg_path.write_text(yaml.safe_dump(cfg))
    # run mergekit
    cmd = f"mergekit-yaml {cfg_path} {out_dir}"
    print(cmd)
    out = _sh(cmd, check=False)
    # Some versions output directly into out_dir - ensure model files exist
    safes = list(out_dir.glob("**/*.safetensors"))
    if not safes:
        raise RuntimeError("MergeKit did not produce weights")
    return out_dir


In [None]:

#@title Orchestrator: run one cycle or resume
def load_or_init_state(run_name: str):
    run_dir = RUN_ROOT / run_name
    run_dir.mkdir(parents=True, exist_ok=True)
    state_path = run_dir / "state.json"
    # Try pull from Hub first
    try:
        snapshot_download(repo_id=STATE_DATASET_REPO, repo_type="dataset", local_dir=run_dir.parent, allow_patterns=f"state/{run_name}/*", token=HF_TOKEN)
    except Exception:
        pass
    st = _read_json(state_path, default={
        "run_name": run_name,
        "created_at": _now(),
        "cycle": 0,
        "steps": {
            "thinking_done": False,
            "distill_done": False,
            "delta_done": False,
            "eval2_done": False,
            "uploads_done": False,
        },
        "models": {
            "thinking_base": THINKING_MODEL_ID,
            "nonthinking_base": NONTHINK_MODEL_ID,
            "thinking_for_cycle": THINKING_MODEL_ID,
            "nonthinking_for_cycle": NONTHINK_MODEL_ID,
            "nonthinking_ft": None,
            "delta_model": None,
        },
        "paths": {},
        "reports": {},
        "instance_ids": [],
    })
    _write_json(state_path, st)
    _push_state(run_dir)
    return st, run_dir, state_path

def save_state(st, state_path, run_dir):
    _write_json(state_path, st)
    _push_state(run_dir)
    return st

def run_one_cycle_or_resume(run_name: str = None):
    run_name = run_name or f"mini_a3b_{_now()}_{uuid.uuid4().hex[:6]}"
    st, run_dir, state_path = load_or_init_state(run_name)
    cycle = int(st["cycle"])

    # 0) instance ids
    if not st["instance_ids"]:
        ids = collect_instance_ids(SWEBENCH_MINI_ID, SWEBENCH_SPLIT)
        st["instance_ids"] = ids
        save_state(st, state_path, run_dir)

    # 1) Thinking pass with mini-swe-agent + vLLM
    think_out_dir = run_dir / f"cycle_{cycle:03d}" / "thinking_pass"
    preds_json = think_out_dir / "preds.json"
    if not st["steps"]["thinking_done"]:
        stop_vLLM_on_error = True
        try:
            cfg_path = write_miniswea_config(run_dir / "miniswea_thinking.yaml", thinking=True)
            start_vllm(st["models"]["thinking_for_cycle"])
            preds = run_mini_swebench(SWEBENCH_MINI_ID, SWEBENCH_SPLIT, think_out_dir, cfg_path)
            preds_json.write_text(json.dumps(preds, indent=2))
            st["paths"]["thinking_preds"] = str(preds_json)
            st["steps"]["thinking_done"] = True
            save_state(st, state_path, run_dir)
        finally:
            stop_vllm()

    # 2) Build SFT pairs and train rank-1 LoRA on non-thinking model
    sft_path = run_dir / f"cycle_{cycle:03d}" / "sft_pairs.jsonl"
    ft_dir = run_dir / f"cycle_{cycle:03d}" / "nonthink_ft"
    if not st["steps"]["distill_done"]:
        preds = _read_json(preds_json, default={})
        pairs = preds_to_sft_pairs(preds, SWEBENCH_MINI_ID, SWEBENCH_SPLIT)
        save_sft_jsonl(pairs, sft_path)
        ft_merged_dir = train_rank1_lora_merge_each_step(st["models"]["nonthinking_for_cycle"], sft_path, ft_dir)
        st["models"]["nonthinking_ft"] = str(ft_merged_dir)
        st["models"]["nonthinking_for_cycle"] = str(ft_merged_dir)  # use FT as next cycle's base
        st["steps"]["distill_done"] = True
        save_state(st, state_path, run_dir)

    # 3) Apply delta between base think and base nonthink onto new FT
    delta_dir = run_dir / f"cycle_{cycle:03d}" / "delta_model"
    if not st["steps"]["delta_done"]:
        merged_dir = Path(st["models"]["nonthinking_ft"])
        delta_model_dir = merge_delta_with_mergekit(
            nonthink_ft_dir=merged_dir,
            think_base_id=st["models"]["thinking_base"],
            nonthink_base_id=st["models"]["nonthinking_base"],
            out_dir=delta_dir,
            alpha=DELTA_ALPHA,
        )
        st["models"]["delta_model"] = str(delta_model_dir)
        # the new "thinking" for next cycle becomes the delta model
        st["models"]["thinking_for_cycle"] = str(delta_model_dir)
        st["steps"]["delta_done"] = True
        save_state(st, state_path, run_dir)

    # 4) Evaluate delta model with agent again
    eval2_dir = run_dir / f"cycle_{cycle:03d}" / "delta_eval"
    preds2_json = eval2_dir / "preds.json"
    if not st["steps"]["eval2_done"]:
        try:
            cfg_path = write_miniswea_config(run_dir / "miniswea_nonthinking.yaml", thinking=False)
            start_vllm(st["models"]["thinking_for_cycle"])  # new delta model
            preds2 = run_mini_swebench(SWEBENCH_MINI_ID, SWEBENCH_SPLIT, eval2_dir, cfg_path)
            preds2_json.write_text(json.dumps(preds2, indent=2))
            st["paths"]["delta_preds"] = str(preds2_json)
            st["steps"]["eval2_done"] = True
            save_state(st, state_path, run_dir)
        finally:
            stop_vllm()

    # 5) Upload artifacts to Hub
    if not st["steps"]["uploads_done"]:
        # Upload SFT, FT model, delta model, preds
        pairs_repo = f"{HF_USERNAME}/{run_name}-cycle{cycle:03d}-sft"
        ft_repo = f"{HF_USERNAME}/{run_name}-cycle{cycle:03d}-nonthink-ft"
        delta_repo = f"{HF_USERNAME}/{run_name}-cycle{cycle:03d}-delta"
        try:
            api.create_repo(pairs_repo, repo_type="dataset", private=not PUSH_PUBLIC, exist_ok=True)
        except Exception: pass
        try:
            api.create_repo(ft_repo, repo_type="model", private=not PUSH_PUBLIC, exist_ok=True)
        except Exception: pass
        try:
            api.create_repo(delta_repo, repo_type="model", private=not PUSH_PUBLIC, exist_ok=True)
        except Exception: pass

        upload_folder(folder_path=str(sft_path.parent), repo_id=pairs_repo, repo_type="dataset", token=HF_TOKEN)
        upload_folder(folder_path=st["models"]["nonthinking_ft"], repo_id=ft_repo, repo_type="model", token=HF_TOKEN)
        upload_folder(folder_path=st["models"]["delta_model"], repo_id=delta_repo, repo_type="model", token=HF_TOKEN)
        # also upload preds
        preds_repo = f"{HF_USERNAME}/{run_name}-cycle{cycle:03d}-preds"
        try:
            api.create_repo(preds_repo, repo_type="dataset", private=not PUSH_PUBLIC, exist_ok=True)
        except Exception: pass
        up_dir = run_dir / f"cycle_{cycle:03d}" / "preds_bundle"
        up_dir.mkdir(parents=True, exist_ok=True)
        shutil.copy2(preds_json, up_dir / "thinking_preds.json")
        shutil.copy2(preds2_json, up_dir / "delta_preds.json")
        upload_folder(folder_path=str(up_dir), repo_id=preds_repo, repo_type="dataset", token=HF_TOKEN)

        st["paths"]["repos"] = {
            "sft": pairs_repo,
            "nonthink_ft": ft_repo,
            "delta": delta_repo,
            "preds": preds_repo,
        }
        st["steps"]["uploads_done"] = True
        save_state(st, state_path, run_dir)

    # 6) Optional: submit both to sb-cli on verified split using only mini IDs
    try:
        thinking_submit = sbcli_submit(predictions_path=preds_json, subset="swe-bench_verified", split="dev", run_id=f"{run_name}-cycle{cycle:03d}-think", instance_ids=st["instance_ids"])
        delta_submit = sbcli_submit(predictions_path=preds2_json, subset="swe-bench_verified", split="dev", run_id=f"{run_name}-cycle{cycle:03d}-delta", instance_ids=st["instance_ids"])
        st["reports"]["sbcli_thinking"] = str(thinking_submit)[:4000]
        st["reports"]["sbcli_delta"] = str(delta_submit)[:4000]
    except Exception as e:
        st["reports"]["sbcli_error"] = f"{type(e).__name__}: {e}"
    save_state(st, state_path, run_dir)

    # Prepare for next cycle if you rerun
    st["cycle"] = cycle + 1
    # Keep bases fixed, but advance pointers
    st["models"]["nonthinking_base"] = st["models"]["nonthinking_base"]
    st["models"]["thinking_base"] = st["models"]["thinking_base"]
    # Next cycle will start from:
    st["models"]["nonthinking_for_cycle"] = st["models"]["nonthinking_ft"]
    st["models"]["thinking_for_cycle"] = st["models"]["delta_model"]
    # Reset step flags for the next call
    st["steps"] = {k: False for k in st["steps"]}
    save_state(st, state_path, run_dir)

    return run_dir, st



## How to run

1. Runtime type A100 80GB.
2. Run Setup.
3. Run `run_one_cycle_or_resume()` once. If Colab disconnects, just run it again - it resumes from Hub state. If it finishes fully, running again starts the next cycle and uses the latest weights for both models.
4. Artifacts are in your Hub, state is mirrored to `dataset://oof-baroomf/a3b-mini-state/state/<run_id>/`.

Notes

- Agentic harness is **mini-swe-agent** LocalEnvironment, not direct prompting.
- vLLM serves one model at a time. The notebook sequentially serves thinking, then the new model.
- LoRA training uses Unsloth `FastModel` for MoE, rank-1, merge-every-step by callback.
- No GGUF is used in training or merging.
- Qwen best practices are enforced in the agent config.


In [None]:

#@title Run one cycle or resume the current one
run_dir, state = run_one_cycle_or_resume(run_name=None)
print("Run dir:", run_dir)
print("Latest state keys:", list(state.keys()))
print("Next cycle:", state["cycle"])
