In [3]:
import nest_asyncio
from browser_use import Agent
nest_asyncio.apply()

In [8]:
from langchain_google_vertexai.model_garden import ChatAnthropicVertex
from langchain_google_vertexai import ChatVertexAI

In [11]:
project = "ai-call-bot-440714"
location = "us-east5"

# Initialise the Model
model = ChatAnthropicVertex(
    model_name="claude-3-5-haiku@20241022",
    project=project,
    location=location,
)

model = ChatVertexAI(
    model="gemini-1.5-flash-002",
    temperature=0,
    max_tokens=None,
    max_retries=6,
    stop=None,
)

In [12]:
agent = Agent(
    task="Go to https://www.tbank.ru/career/ and extract the list of all paths of the vacancies by href, if needed click next page button",
    llm=model,
)
result = await agent.run()

INFO     [agent] 🚀 Starting task: Go to https://www.tbank.ru/career/ and extract the list of all paths of the vacancies by href, if needed click next page button
INFO     [agent] 
📍 Step 1
ERROR    [agent] ❌ Result failed 1/3 times:
 Only text, image_url, and media types are supported!
INFO     [agent] 
📍 Step 1
ERROR    [agent] ❌ Result failed 2/3 times:
 Only text, image_url, and media types are supported!
INFO     [agent] 
📍 Step 1
ERROR    [agent] ❌ Result failed 3/3 times:
 Only text, image_url, and media types are supported!
ERROR    [agent] ❌ Stopping due to 3 consecutive failures
INFO     [agent] Created GIF at agent_history.gif


In [54]:
from playwright.async_api import async_playwright
import asyncio

async def extract_vacancy_paths():
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=True,
            args=[
            '--no-sandbox',
            '--disable-blink-features=AutomationControlled',
            '--disable-infobars',
            '--disable-background-timer-throttling',
            '--disable-popup-blocking',
            '--disable-backgrounding-occluded-windows',
            '--disable-renderer-backgrounding',
            '--disable-window-activation',
            '--disable-focus-on-load',
            '--no-first-run',
            '--no-default-browser-check',
            '--no-startup-window',
            '--window-position=0,0',
            '--window-size=1280,1000',
            '--disable-web-security',
            '--disable-site-isolation-trials',
            '--disable-features=IsolateOrigins,site-per-process'
        ])
        page = await browser.new_page()

        vacancy_paths = []
        
        # Updated URL and longer timeout for Wargaming's site
        try:
            await page.goto("https://jobs.booking.com/booking/jobs", timeout=60000)
            await page.wait_for_load_state('networkidle')
            
            # Wait for any job link to appear
            await page.wait_for_selector('a[href*="/booking/jobs/"]', timeout=60000)
            
            while True:
                # Extract all vacancy links on current page
                links = await page.eval_on_selector_all(
                    'a[href*="/booking/jobs/"]',
                    "elements => elements.map(el => el.getAttribute('href'))"
                )
                
                if links:
                    vacancy_paths.extend(links)
                
                # Try to find and click next page button using mat-button-wrapper class
                try:
                    next_button = await page.query_selector(
                        'button.mat-focus-indicator.mat-tooltip-trigger.mat-paginator-navigation-next.mat-icon-button.mat-button-base'
                    )
                    
                    if not next_button or await next_button.is_disabled():
                        break
                        
                    await next_button.click()
                    await page.wait_for_load_state('networkidle')
                    await page.wait_for_timeout(2000)  # Increased timeout to ensure page loads
                except Exception as e:
                    print("No more pages available or error:", e)
                    break
                
        except Exception as e:
            print(f"Error occurred: {e}")
        finally:
            await browser.close()
            
        return list(set(vacancy_paths))

In [55]:
paths = await extract_vacancy_paths()
print(f"Found {len(paths)} unique vacancy paths:")
for path in paths:
    print(path)

Found 56 unique vacancy paths:
/booking/jobs/14582?lang=en-us
/booking/jobs/14592?lang=en-us
/booking/jobs/14668?lang=en-us
/booking/jobs/14193?lang=en-us
/booking/jobs/14085?lang=en-us
/booking/jobs/14723?lang=en-us
/booking/jobs/14688?lang=en-us
/booking/jobs/14824?lang=en-us
/booking/jobs/12795?lang=en-us
/booking/jobs/12870?lang=en-us
/booking/jobs/14179?lang=en-us
/booking/jobs/14608?lang=en-us
/booking/jobs/14148?lang=en-us
/booking/jobs/13382?lang=en-us
/booking/jobs/12757?lang=en-us
/booking/jobs/13406?lang=en-us
/booking/jobs/14660?lang=en-us
/booking/jobs/14591?lang=en-us
/booking/jobs/12913?lang=en-us
/booking/jobs/12699?lang=en-us
/booking/jobs/14699?lang=en-us
/booking/jobs/14600?lang=en-us
/booking/jobs/14571?lang=en-us
/booking/jobs/14037?lang=en-us
/booking/jobs/14597?lang=en-us
/booking/jobs/14104?lang=en-us
/booking/jobs/14024?lang=en-us
/booking/jobs/13154?lang=en-us
/booking/jobs/13533?lang=en-us
/booking/jobs/14701?lang=en-us
/booking/jobs/13405?lang=en-us
/booking