In [5]:
import asyncio
from playwright.async_api import async_playwright
import pandas as pd
import nest_asyncio

nest_asyncio.apply()

NUM_PAGES = 250
OUTPUT_CSV = "bina_listings.csv"
CONCURRENT_REQUESTS = 10  # Max tabs (detail pages) opened at once

async def scrape_listing(browser, link):
    try:
        page = await browser.new_page()
        await page.goto(link, timeout=60000)

        await page.wait_for_selector("div.product-properties__column", timeout=10000)
        title_full = await page.locator("h1.product-title").text_content() or ''
        title_parts = title_full.strip().rsplit(",", 1)
        title = title_parts[0].strip()
        location = title_parts[1].strip() if len(title_parts) > 1 else ''

        try:
            price_val = await page.locator("div.product-price__i.product-price__i--bold span.price-val").text_content(timeout=5000)
            price_cur = await page.locator("div.product-price__i.product-price__i--bold span.price-cur").text_content(timeout=5000)
            price = f"{price_val.strip()} {price_cur.strip()}"
        except Exception:
            price = ''

        features = await page.eval_on_selector_all(
            "div.product-properties__i",
            """nodes => nodes.map(n => {
                const key = n.querySelector('.product-properties__i-name')?.innerText.trim();
                const value = n.querySelector('.product-properties__i-value')?.innerText.trim();
                return { key, value };
            })"""
        )

        await page.close()
        feature_dict = {f['key']: f['value'] for f in features if f['key'] and f['value']}
        feature_dict.update({
            "Title": title,
            "Price": price,
            "Location": location,
            "URL": link
        })
        return feature_dict

    except Exception as e:
        print(f" Failed to scrape {link}: {e}")
        return None

async def scrape_bina():
    listings_data = []
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()

        for page_num in range(1, NUM_PAGES + 1):
            print(f"Scraping page {page_num}...")
            url = f"https://bina.az/items?page={page_num}"
            await page.goto(url, timeout=60000)
            await page.wait_for_selector("div.items_list", timeout=10000)

            listing_links = await page.eval_on_selector_all(
                "div.items_list div.items-i a",
                "elements => elements.map(el => el.href)"
            )

            sem = asyncio.Semaphore(CONCURRENT_REQUESTS)

            async def bound_scrape(link):
                async with sem:
                    return await scrape_listing(browser, link)

            results = await asyncio.gather(*[bound_scrape(link) for link in listing_links])
            listings_data.extend([res for res in results if res])

        await browser.close()

    df = pd.DataFrame(listings_data)
    df.to_csv(OUTPUT_CSV, index=False)
    print(f"\nScraped {len(df)} listings and saved to {OUTPUT_CSV}")

await scrape_bina()


Scraping page 1...


Future exception was never retrieved
future: <Future finished exception=TargetClosedError('Target page, context or browser has been closed')>
playwright._impl._errors.TargetClosedError: Target page, context or browser has been closed


Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
Scraping page 35...
Scraping page 36...
Scraping page 37...
Scraping page 38...
Scraping page 39...
Scraping page 40...
Scraping page 41...
 Failed to scrape https://bina.az/items/5253187: Page.goto: Timeout 60000ms exceeded.
Call log:
  - navigating to "https://bina.az/items/5253187", waiting until "load"

Scraping page 42...
Scraping page 43...