In [7]:
import pandas as pd
import asyncio, json
from aiohttp import ClientSession, TCPConnector, ClientTimeout
from selectolax.parser import HTMLParser

# ========= 1) Load Excel =========
df = pd.read_excel("prices3.xlsx")
df.columns = [c.lower().strip() for c in df.columns]
df["sku"] = df["sku"].astype(str)

# ========= 2) API settings =========
API_URL  = "https://ksa-api.boutiqaat.com/searchplus/rest/V2/global/suggest"
PAGE_URL = "https://www.boutiqaat.com/en-kw/men/{slug}"

HEADERS = {
    "accept": "application/json, text/plain, */*",
    "content-type": "application/json",
    "origin": "https://www.boutiqaat.com",
    "referer": "https://www.boutiqaat.com/",
    "user-agent": "Mozilla/5.0",
    "param": '{"gender":"4194","app_version":"8.0.0","platform":"web","device_type":"desktop"}'
}

BASE_PAYLOAD = {
    "productId": "", "tvId": "", "slug": "", "celebrityId": "",
    "categoryId": "", "brandId": "", "celebrityIds": "", "categoryIds": "",
    "brandIds": "", "optionId": "", "attributeId": "",
    "countryCodeAndLanguage": "kw_en", "numberOfRecords": 20,
    "newOrAll": "", "featuredOnly": None, "sortKey": "", "sortDirection": "",
    "searchString": ""
}

# ========= 3) Functions =========
async def fetch_slug(session, sku):
    payload = dict(BASE_PAYLOAD)
    payload["searchString"] = sku
    async with session.post(API_URL, headers=HEADERS, json=payload) as r:
        j = await r.json(content_type=None)
    try:
        return j["data"][0]["data"][0]["slug"]
    except:
        return None

async def fetch_price(session, slug):
    url = PAGE_URL.format(slug=slug)
    async with session.get(url, headers=HEADERS) as r:
        html = await r.text()
    tree = HTMLParser(html)
    script = tree.css_first('script#__NEXT_DATA__')
    j = json.loads(script.text())
    try:
        return float(j["props"]["pageProps"]["response"][0]["final_price_with_tax"])
    except:
        return None

async def process_row(session, sku, expected_price, sem):
    async with sem:  # limit concurrent tasks
        slug = await fetch_slug(session, sku)
        web_price = await fetch_price(session, slug) if slug else None
        match = (web_price == expected_price) if web_price is not None else None
        return {"sku": sku, "excel_price": expected_price, "slug": slug, "web_price": web_price, "match": match}

# ========= 4) Run with concurrency limit =========
async def main(df, limit=50):
    sem = asyncio.Semaphore(limit)  # max concurrent tasks
    timeout = ClientTimeout(total=30)
    connector = TCPConnector(limit=limit, ssl=False)
    async with ClientSession(connector=connector, timeout=timeout) as session:
        tasks = [
            process_row(session, row["sku"], float(row["special price"]), sem)
            for _, row in df.iterrows()
        ]
        return await asyncio.gather(*tasks)

# Run with a concurrency of 50 (tune this number!)
out = await main(df, limit=50)
results = pd.DataFrame(out)

# ========= 5) Save =========
results.to_excel("comparison_results.xlsx", index=False)
results.to_csv("comparison_results.csv", index=False)

print("Done! Total rows:", len(results))


Done! Total rows: 52
