# Navigation Explorer

Step-by-step navigation extraction with DFS exploration.

In [None]:
# 1. SETUP
import sys
from pathlib import Path

backend_path = Path.cwd().parent.parent
if str(backend_path) not in sys.path:
    sys.path.insert(0, str(backend_path))

from dotenv import load_dotenv
load_dotenv(backend_path.parent / 'config' / '.env')

print(f"Backend: {backend_path}")
print("Run next cell to start browser.")

In [None]:
# 2. BROWSER
from playwright.async_api import async_playwright

# Cleanup existing
async def cleanup():
    for name in ['browser', 'playwright']:
        if name in globals():
            try:
                obj = globals()[name]
                await (obj.close() if name == 'browser' else obj.stop())
            except: pass

await cleanup()

playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless=False)
page = await browser.new_page(viewport={'width': 768, 'height': 900})

print("Browser ready.")

In [None]:
# 3. RELOAD MODULES (run after code changes)
import sys

modules_to_clear = [
    'scraper.navigation.dynamic_explorer',
    'scraper.navigation.llm_popup_dismiss',
    'scraper.navigation.step_explorer',
    'scraper.navigation.extraction.nav_elements',
    'scraper.navigation.llm.classification',
    'scraper.navigation.llm.client',
    'step_explorer',
]
for mod in modules_to_clear:
    if mod in sys.modules:
        del sys.modules[mod]

from step_explorer import NavExplorer, run_exploration

# Test brands
BRANDS = {
    "axel_arigato": "https://www.axelarigato.com",
    "macys": "https://www.macys.com",
    "uniqlo": "https://www.uniqlo.com",
    "ounass": "https://www.ounass.ae/women",
    "entire_studios": "https://www.entirestudios.com",
    "eckhaus_latta": "https://www.eckhauslatta.com",
    "alexander_mcqueen": "https://www.alexandermcqueen.com",
    "balenciaga": "https://www.balenciaga.com",
    "zalando_kids": "https://www.zalando.de/kinder-home/",
    "aelfric_eden": "https://www.aelfriceden.com",
    "named_collective": "https://namedcollective.com",
}

print(f"Modules reloaded. {len(BRANDS)} brands available.")
print("Brands:", list(BRANDS.keys()))

In [None]:
# 4. EXPLORE SINGLE BRAND
BRAND = "axel_arigato"  # <-- Change this

url = BRANDS[BRAND]
print(f"\n{'='*60}")
print(f"Exploring: {BRAND}")
print(f"URL: {url}")
print(f"{'='*60}\n")

result = await run_exploration(page, url, max_steps=100)

print(f"\n{'='*60}")
print(f"RESULTS: {BRAND}")
print(f"{'='*60}")
print(f"Success: {result['success']}")
print(f"Total links: {result['stats']['total_links']}")
print(f"Steps: {result['stats']['total_steps']}")
print(f"Errors: {result['stats']['errors']}")
if result['error']:
    print(f"Error: {result['error']}")

In [None]:
# 5. EXPLORE ALL BRANDS (batch)
results = {}

for brand, url in BRANDS.items():
    print(f"\n{'='*60}")
    print(f"[{brand}] {url}")
    print(f"{'='*60}")
    
    try:
        result = await run_exploration(page, url, max_steps=100, max_errors=3)
        results[brand] = result
        print(f"\n✓ {brand}: {result['stats']['total_links']} links")
    except Exception as e:
        results[brand] = {'success': False, 'error': str(e), 'stats': {'total_links': 0}}
        print(f"\n✗ {brand}: {e}")

# Summary
print(f"\n\n{'='*60}")
print("SUMMARY")
print(f"{'='*60}")
for brand, r in results.items():
    status = "✓" if r['success'] else "✗"
    links = r['stats']['total_links']
    err = f" ({r['error'][:30]}...)" if r.get('error') else ""
    print(f"{status} {brand}: {links} links{err}")

In [None]:
# 6. SHOW TREE (after single brand exploration)
if 'result' in dir() and result.get('categories'):
    categories = result['categories']
    print(f"\nCategories ({len(categories)} total):")
    print("-" * 40)
    for path, url in sorted(categories.items()):
        print(f"{path}")
        print(f"  → {url}")
else:
    print("No results. Run exploration first.")

In [None]:
# 7. MANUAL STEP-BY-STEP (for debugging)
# Create explorer manually
explorer = NavExplorer(page)

# Setup
URL = BRANDS["macys"]
setup_result = await explorer.setup(URL)
print(setup_result)

In [None]:
# 8. RUN SINGLE STEP (re-run to continue)
if not explorer.done():
    result = await explorer.step()
    print(f"\nItem: {result.item_name}")
    print(f"Success: {result.success}")
    print(f"Links: {list(result.links_found.keys()) if result.links_found else []}")
    print(f"Children: {result.children_added}")
    if result.error:
        print(f"Error: {result.error}")
else:
    print("Exploration complete!")
    explorer.print_tree()

In [None]:
# 9. SHOW EXPLORER STATE
explorer.show_state()
explorer.print_tree()