In [None]:
%load_ext autoreload
%autoreload 2

### Installation

In [6]:
%%capture
!uv pip install openpipe-art openpipe --prerelease allow --no-cache-dir
!uv pip install patchright

In [None]:
%%html
<style>
.cell-output-ipywidget-background {
    background-color: transparent !important;
}
:root {
    --jp-widgets-color: var(--vscode-editor-foreground);
    --jp-widgets-font-size: var(--vscode-editor-font-size);
}  
</style>

In [None]:
import asyncio
import art
import random
from dotenv import load_dotenv
from art.utils.get_repo_root_path import get_repo_root_path
from art.skypilot import SkyPilotBackend

load_dotenv()

root_path = get_repo_root_path()

backend = await SkyPilotBackend.initialize_cluster(
    cluster_name="bo-art-wikilinks-single-turn",
    art_version=root_path,
    env_path=f"{root_path}/.env",
    gpu="H100-SXM"
)

model = art.TrainableModel(
    name="001-wiki-game", project="wiki-game", base_model="Qwen/Qwen2.5-14B-Instruct"
)

await backend._experimental_pull_from_s3(
    model,
    verbose=True,
)

await model.register(backend)


In [9]:
import openai
import requests
from dotenv import load_dotenv
from patchright.async_api import async_playwright

import art

async def get_random_wikipedia_url():
    with open("pages.txt") as f:
        pages = f.readlines()
    page = random.choice(pages).strip()
    return f"https://en.wikipedia.org/wiki/{page}"

async def extract_links(page):
    # Extract links in paragraphs before the References section
    links = await page.evaluate("""
         () => {
             const content = document.querySelector('#mw-content-text .mw-parser-output');
             const links = [];
             let stop = false;
             for (let node of content.children) {
                 // Stop if we reach the references or similar sections
                 if (node.tagName === 'H2' && node.innerText.match(/References|Notes|External links|See also/i)) {
                     break;
                 }
                 // Collect links only from <p> and <ul> elements
                 if (node.tagName === 'P' || node.tagName === 'UL') {
                     for (let a of node.querySelectorAll('a[href^="/wiki/"]:not([href*=":"])')) {
                         links.push(a.getAttribute('href'));
                     }
                 }
             }
             // Remove duplicates and trim '/wiki/'
             return [...new Set(links)].map(link => link.replace('/wiki/', ''));
         }
     """)
    return links


@art.retry(exceptions=(openai.LengthFinishReasonError, requests.ReadTimeout))
async def rollout(model: art.Model, step: int, start_url, end_url,
                  is_validation: bool) -> art.Trajectory:
    MAX_HOPS = 20
    HEADLESS = True

    async with async_playwright() as p:

        episode_trajectories = []

        browser = await p.chromium.launch(headless=HEADLESS)
        page = await browser.new_page()

        url = start_url
        hop = 0
        visited_pages = set()

        while hop < MAX_HOPS:
            current_trajectory = art.Trajectory(
                messages_and_choices=[{
                    "role":
                    "system",
                    "content":
                    "You are playing the wikilinks game. You are given a starting page and a target page. You need to find a shortest path from the starting page to the target page. You can only visit pages that are accesible from the current page. You will be given a list of pages you can navigate from the current page, and you need to choose the next page to visit from that list. Available pages will be presented in the follofing format: <available_pages><o>{page}</o><o>{page}</o><o>{page}</o></available_pages>. Return ONLY the page name without tags choosing ONLY from the list of available pages.",
                }],
                reward=0,
            )
            current_trajectory.metrics["reached_targed"] = 0
            current_trajectory.metrics["halucinated"] = 0

            episode_trajectories.append(current_trajectory)

            try:
                await page.goto(url, timeout=1000000)
                await page.wait_for_selector("#mw-content-text",
                                             timeout=1000000)
            except Exception as e:
                print(f"Error navigating to page {url}: {e}")
                break

            available_pages = await extract_links(page)
            if not available_pages:
                print("No valid links found. Stopping.")
                break

            visited_pages.add(url)

            current_trajectory.messages_and_choices.append({
                "role":
                "user",
                "content":
                f"Current page: {url.split('/wiki/')[-1]}\nTarget page: {end_url.split('/wiki/')[-1]}\n\nAvailable links on the current page:\n"
                + "\n".join(f"{page}" for page in available_pages) +
                ("\nVisited pages: " +
                 " -> ".join(visited_pages) if visited_pages else "") +
                "\n\nWhich link should I open next to reach the target page in the fewest steps? Return only the link (page name) choosing ONLY from the list of links available from the current page."
            })
            messages = current_trajectory.messages()

            try:
                model_client = model.openai_client()
                chat_completion = await model_client.chat.completions.create(
                    max_completion_tokens=35,
                    messages=messages,
                    model=model.inference_model_name,
                )

                choice = chat_completion.choices[0]
                next_link = choice.message.content

                assert isinstance(next_link, str)
                current_trajectory.messages_and_choices.append(choice)

            except Exception as e:
                print("caught exception generating chat completion", e)
                raise e

            if next_link not in available_pages:
                current_trajectory.metrics["halucinated"] = 1
                print(f"Invalid link: {next_link}. Step: {step}. Hop: {hop}")
                break

            url = "https://en.wikipedia.org/wiki/" + next_link

            if url == end_url:
                for t in episode_trajectories:
                    t.reward = 5 / (hop + 1)
                    t.metrics["reached_targed"] = 1
                    t.metrics["path_to_target"] = hop + 1

                print(f"Reached target page in {hop} steps")
                break

            hop += 1

        if hop == MAX_HOPS:
            print(f"Reached max hops: {MAX_HOPS}")

        await browser.close()
        return episode_trajectories


In [None]:
for i in range(await model.get_step(), 400):

    start_url = await get_random_wikipedia_url()
    end_url = await get_random_wikipedia_url()

    train_rollout_results = await asyncio.gather(
        *(rollout(model, i, start_url, end_url, is_validation=False) for _ in range(10))
    )
    
    # Flatten training trajectories
    train_trajectories = [
        trajectory 
        for rollout_result in train_rollout_results 
        for trajectory in rollout_result
    ]
    
    await model.delete_checkpoints()

    await model.train(
        [art.TrajectoryGroup(train_trajectories)],
        config=art.TrainConfig(learning_rate=5e-6)
    )

    await backend._experimental_push_to_s3(
        model,
    )

In [9]:
DESTROY_AFTER_RUN = False

if DESTROY_AFTER_RUN:
    await backend.down()