In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%html
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;
}
:root {
    --jp-widgets-color: var(--vscode-editor-foreground);
    --jp-widgets-font-size: var(--vscode-editor-font-size);
}  
</style>

In [3]:
import art
from dotenv import load_dotenv
import openai

load_dotenv()


api = art.UnslothAPI(in_process=True, wandb_project="agent-reinforcement-training")
model = await api.get_or_create_model(
    name="yes-or-no-unsloth-001",
    base_model="unsloth/Qwen2.5-14B-Instruct",
)


async def rollout(client: openai.AsyncOpenAI, prompt: str) -> art.Trajectory:
    messages: art.Messages = [
        {
            "role": "user",
            "content": prompt,
        }
    ]
    chat_completion = await client.chat.completions.create(
        messages=messages, model=model.name, max_tokens=100
    )
    choice = chat_completion.choices[0]
    content = choice.message.content
    assert isinstance(content, str)
    if content == "yes":
        reward = 0.5
    elif content == "no":
        reward = 0.75
    elif content == "maybe":
        reward = 1.0
    else:
        reward = 0.0
    return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward)


async with model.openai_client(
    estimated_completion_tokens=16, verbosity=2
) as openai_client:
    for i in range(await model.get_iteration(), 1_000):
        train_groups = await art.gather_trajectories(
            (
                (rollout(openai_client, prompt) for _ in range(32))
                for prompt in [
                    f"{prefix} with {', '.join([f"'{w}'" if use_quotes else w for w in words]) if len(words) == 3 else f'{words[0]}' + (f' or {words[1]}' if len(words) > 1 else '')}"
                    for prefix in ["respond", "just respond"]
                    for use_quotes in [True, False]
                    for words in [
                        ["yes", "no", "maybe"],
                        ["maybe", "yes", "no"],
                        ["no", "yes", "maybe"],
                        ["yes", "maybe", "no"],
                        ["yes", "no"],
                        ["maybe", "no"],
                        ["no", "maybe"],
                        ["no", "yes"],
                        ["yes", "no"],
                    ]
                ]
            ),
            pbar_desc="train",
            stream_chat_completions=8,
        )
        await model.tune(
            train_groups,
            config=art.TuneConfig(
                lr=5e-5, sequence_length=8192, plot_tensors=False, verbosity=2
            ),
        )


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth.models import FastLanguageModel  # type: ignore


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.18: Fast Qwen2 patching. Transformers: 4.49.0. vLLM: 0.7.3.
   \\   /|    NVIDIA H100 80GB HBM3. Num GPUs = 1. Max memory: 79.109 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 9.0. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: vLLM loading unsloth/qwen2.5-14b-instruct-unsloth-bnb-4bit with actual GPU utilization = 61.53%
Unsloth: Your GPU has CUDA compute capability 9.0 with VRAM = 79.11 GB.
Unsloth: Using conservativeness = 1.0. Chunked prefill tokens = 8192. Num Sequences = 320.
Unsloth: vLLM's KV Cache can use up to 37.83 GB. Also swap space = 6 GB.
Unsloth: vLLM Bitsandbytes config using

Loading safetensors checkpoint shards:   0% Completed | 0/3 [00:00<?, ?it/s]


Loading safetensors checkpoint shards:   0% Completed | 0/3 [00:00<?, ?it/s]


Capturing CUDA graph shapes: 100%|██████████| 43/43 [00:40<00:00,  1.07it/s]
Unsloth 2025.3.18 patched 48 layers with 48 QKV layers, 48 O layers and 48 MLP layers.
usage: ipykernel_launcher.py [-h] [--host HOST] [--port PORT]
                             [--allow-credentials]
                             [--allowed-origins ALLOWED_ORIGINS]
                             [--allowed-methods ALLOWED_METHODS]
                             [--allowed-headers ALLOWED_HEADERS]
                             [--api-key API_KEY]
                             [--lora-modules LORA_MODULES [LORA_MODULES ...]]
                             [--prompt-adapters PROMPT_ADAPTERS [PROMPT_ADAPTERS ...]]
                             [--chat-template CHAT_TEMPLATE]
                             [--chat-template-content-format {auto,string,openai}]
                             [--response-role RESPONSE_ROLE]
                             [--ssl-keyfile SSL_KEYFILE]
                             [--ssl-certfile SSL_CER

SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
