In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%html
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;
}
:root {
    --jp-widgets-color: var(--vscode-editor-foreground);
    --jp-widgets-font-size: var(--vscode-editor-font-size);
}  
</style>

In [3]:
import peft
import unsloth
import art
import asyncio
from dotenv import load_dotenv
import json
import openai
import random
import re
from typing import TypedDict

load_dotenv()


class TemporalCluePuzzle(TypedDict):
    num_clues: int
    prompt: str
    solution: dict[str, str]


puzzles: list[TemporalCluePuzzle] = json.load(open("./data/temporal-clue/puzzles.json"))
val_puzzles = puzzles[:64]
test_puzzles = puzzles[64:128]
train_puzzles = puzzles[128:]
random.seed(42)
random.shuffle(train_puzzles)


api = art.UnslothAPI(in_process=True, wandb_project="agent-reinforcement-training")
model = await api._get_or_create_model(
    name="temporal-clue-unsloth-002",
    base_model="Qwen/Qwen2.5-14B-Instruct",
    _config={
        "init_args": {
            "enable_sleep_mode": True,
        }
    },
)


async def rollout(
    client: openai.AsyncOpenAI, puzzle: TemporalCluePuzzle
) -> art.Trajectory:
    messages: art.Messages = [{"role": "user", "content": puzzle["prompt"]}]
    chat_completion = await client.chat.completions.create(
        messages=messages, model=model.name
    )
    choice = chat_completion.choices[0]
    content = choice.message.content
    assert isinstance(content, str)
    num_correct = 0
    for key, value in puzzle["solution"].items():
        if matches := re.findall(rf"{key}\. ([A-Za-z \.:-]+)", content):
            match = matches[-1]
            if match.strip().lower() == value.lower():
                num_correct += 1
    reward = acc = num_correct / len(puzzle["solution"])
    return art.Trajectory(
        messages_and_choices=[*messages, choice], reward=reward, metrics={"acc": acc}
    )


stride = 16
openai_client = await model.openai_client()

gather_task = asyncio.create_task(
    art.gather_trajectories(
        (
            (rollout(openai_client, puzzle) for _ in range(50))
            for puzzle in train_puzzles[:stride]
        ),
        pbar_desc="train",
        return_exceptions=False,
    )
)

# for i in range(await model.get_iteration(), 1_000):
#     val_groups, train_groups = await asyncio.gather(
#         art.gather_trajectories(
#             (
#                 (rollout(openai_client, puzzle) for _ in range(2))
#                 for puzzle in val_puzzles
#             ),
#             pbar_desc="val",
#             stream_chat_completions=8,
#             return_exceptions=False,
#         ),
#         art.gather_trajectories(
#             (
#                 (rollout(openai_client, puzzle) for _ in range(50))
#                 for puzzle in train_puzzles[i * stride : (i + 1) * stride]
#             ),
#             pbar_desc="train",
#             return_exceptions=False,
#         ),
#     )
#     await model.log(val_groups)
#     await model.clear_iterations()
#     await model.tune(
#         train_groups,
#         config=art.TuneConfig(
#             lr=5e-5, sequence_length=8192, plot_tensors=True, verbosity=2
#         ),
#     )


Please restructure your imports with 'import unsloth' at the top of your file.
  import unsloth


ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.


  left = re.match("[\s\n]{4,}", leftover).span()[1]
  .replace("*", "\*").replace("^", "\^")\
  .replace("*", "\*").replace("^", "\^")\
  .replace("-", "\-").replace("_", "\_")\
  .replace("-", "\-").replace("_", "\_")\
  .replace(":", "\:").replace("+", "\+")\
  .replace(":", "\:").replace("+", "\+")\
  .replace(".", "\.").replace(",", "\,")\
  .replace(".", "\.").replace(",", "\,")\
  .replace("(", "\(").replace(")", "\)")\
  .replace("(", "\(").replace(")", "\)")\
  .replace("[", "\[").replace("]", "\]")\
  .replace("[", "\[").replace("]", "\]")\
  if "loss\_function" in cross_entropy_find and "loss_function" not in forward:
  elif "loss\_function" not in cross_entropy_find and "loss_function" in forward:
  if "logits = outputs\.logits" in cross_entropy_find:
  r"for ([^\s]{1,}) in " + modulelist_item + "\:[\n]" + \
  regex_find = f"{call_class}\(([^\)]{{1,}})\)"
  regex_find = f"{call_class}\(([^\)]{{1,}})\)"
  regex_find = "def forward\(([^\)]{1,})\)"
  inherited_modules = re.find

ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
INFO 04-04 22:13:55 __init__.py:207] Automatically detected platform cuda.
==((====))==  Unsloth 2025.3.19: Fast Qwen2 patching. Transformers: 4.49.0. vLLM: 0.7.3.
   \\   /|    NVIDIA H100 80GB HBM3. Num GPUs = 1. Max memory: 79.109 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 9.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: vLLM loading unsloth/qwen2.5-14b-instruct-unsloth-bnb-4bit with actual GPU utilization = 79.39%
Unsloth: Your GPU has CUDA compute capability 9.0 with VRAM = 79.11 GB.
Unsloth: Using conservativeness = 1.0. Chunked prefill tokens = 8192. Num Sequences = 368.
Unsloth: vLLM's KV Cache can use up to 52.15 GB. Also swap space = 6 GB.
INFO 04-04 22:14:17 config.py:549]

tokenizer_config.json:   0%|          | 0.00/7.36k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/605 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/271 [00:00<?, ?B/s]

INFO 04-04 22:14:20 cuda.py:229] Using Flash Attention backend.
INFO 04-04 22:14:20 model_runner.py:1110] Starting to load model unsloth/qwen2.5-14b-instruct-unsloth-bnb-4bit...
INFO 04-04 22:14:20 loader.py:1089] Loading weights with BitsAndBytes quantization.  May take a while ...
INFO 04-04 22:14:21 weight_utils.py:254] Using model weights format ['*.safetensors']


model-00002-of-00003.safetensors:   0%|          | 0.00/4.73G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/1.56G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

INFO 04-04 22:14:48 weight_utils.py:270] Time spent downloading weights for unsloth/qwen2.5-14b-instruct-unsloth-bnb-4bit: 27.014179 seconds


Loading safetensors checkpoint shards:   0% Completed | 0/3 [00:00<?, ?it/s]


Loading safetensors checkpoint shards:   0% Completed | 0/3 [00:00<?, ?it/s]


INFO 04-04 22:14:52 model_runner.py:1115] Loading model weights took 10.6011 GB
INFO 04-04 22:14:52 punica_selector.py:18] Using PunicaWrapperGPU.
INFO 04-04 22:14:59 worker.py:267] Memory profiling takes 6.95 seconds
INFO 04-04 22:14:59 worker.py:267] the current vLLM instance can use total_gpu_memory (79.11GiB) x gpu_memory_utilization (0.79) = 62.81GiB
INFO 04-04 22:14:59 worker.py:267] model weights take 10.60GiB; non_torch_memory takes 0.15GiB; PyTorch activation peak memory takes 2.09GiB; the rest of the memory reserved for KV Cache is 49.96GiB.
INFO 04-04 22:15:00 executor_base.py:111] # cuda blocks: 17054, # CPU blocks: 2048
INFO 04-04 22:15:00 executor_base.py:116] Maximum concurrency for 8192 tokens per request: 33.31x
INFO 04-04 22:15:02 model_runner.py:1434] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. If out-of-memory erro

Capturing CUDA graph shapes: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 49/49 [00:45<00:00,  1.08it/s]

INFO 04-04 22:15:47 model_runner.py:1562] Graph capturing finished in 46 secs, took 9.48 GiB
INFO 04-04 22:15:47 llm_engine.py:436] init engine (profile, create kv cache, warmup model) took 55.23 seconds



Unsloth 2025.3.19 patched 48 layers with 48 QKV layers, 48 O layers and 48 MLP layers.


train:   0%|          | 0/800 [00:00<?, ?it/s]

In [9]:
async with api._services["temporal-clue-unsloth-002"].state.vllm.train_mode():
    print("train mode")

train mode
