In [353]:
import requests
import json
import re
import pandas as pd
import nest_asyncio
import time

from tqdm import tqdm

from playwright.sync_api import sync_playwright
from selenium import webdriver
from selenium.webdriver.common.by import By

from selectolax.parser import HTMLParser

In [224]:
nest_asyncio.apply()

In [21]:
def parse_header(raw_header: str):
    header = dict()

    for line in raw_header.split("\n"):

        if line.startswith(":"):
            a, b = line[1:].split(":", 1)
            a = f":{a}"
        else:
            a, b = line.split(":",1)

        header[a.strip()] = b.strip()

    return header

<h3>Retrieve categories</h3>

In [None]:
category_html = """<div class="CatalogMenu_parents__Krpe1" bis_skin_checked="1"><a class="CatalogMenuLink_parentLink__5IG3T CatalogMenuLink_isActive__acRjg" href="/catalog/3547/skidki"><img class="CatalogMenuLink_icon__Fbn09" src="https://media.vprok.ru/content/orig/as/az/i67wlou2usg4b27omzqrixpd2uedazas.svg" alt="Скидки">Скидки</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/6736/novinki">Новинки</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1301/ovoschi-frukty-griby">Овощи, фрукты, ягоды</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1303/moloko-syr-yaytsa">Молоко, сыр, яйца</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/2726/23-fevralya">23 февраля</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1307/myaso-ptitsa-delikatesy">Мясо, птица, колбасы</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1304/ryba-i-moreprodukty">Рыба, икра </a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/5175/gotovaya-eda">Готовая еда</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1312/soki-vody-napitki">Воды, соки, напитки</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1305/tovary-dlya-mam-i-detey">Товары для мам и детей</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1306/krasota-gigiena-bytovaya-himiya">Красота и здоровье</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/4371/chipsy-sneki-suhariki">Чипсы, снеки, орехи</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/4019/sladosti-i-sneki">Сладости</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1300/makarony-krupy-spetsii">Макароны, крупы, специи</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1310/konservy-orehi-sousy">Соусы и консервация</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1309/hleb-sladosti-sneki">Хлеб и выпечка</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1302/kofe-chay-sahar">Чай, кофе, сахар</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1311/zamorojennye-produkty">Замороженные продукты</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/3453/zdorovoe-pitanie">Здоровое питание</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/2348/bytovaya-himiya-i-hoztovary">Бытовая химия и гигиена</a><a class="CatalogMenuLink_parentLink__5IG3T CatalogMenuLink_isBold__SVw8F" href="https://zoo.vprok.ru/">Зоотовары</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/promo/tovary-dlya-doma">Товары для дома и дачи</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/4450/aptechka">Аптечка</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/2561/bytovaya-tehnika">Бытовая техника</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1997/alkogol">Алкоголь</a></div>"""

In [250]:
dom = HTMLParser(category_html)
hrefs = [el.attributes["href"] for el in dom.css("a.CatalogMenuLink_parentLink__5IG3T")]

In [253]:
hrefs

['/catalog/3547/skidki',
 '/catalog/6736/novinki',
 '/catalog/1301/ovoschi-frukty-griby',
 '/catalog/1303/moloko-syr-yaytsa',
 '/catalog/2726/23-fevralya',
 '/catalog/1307/myaso-ptitsa-delikatesy',
 '/catalog/1304/ryba-i-moreprodukty',
 '/catalog/5175/gotovaya-eda',
 '/catalog/1312/soki-vody-napitki',
 '/catalog/1305/tovary-dlya-mam-i-detey',
 '/catalog/1306/krasota-gigiena-bytovaya-himiya',
 '/catalog/4371/chipsy-sneki-suhariki',
 '/catalog/4019/sladosti-i-sneki',
 '/catalog/1300/makarony-krupy-spetsii',
 '/catalog/1310/konservy-orehi-sousy',
 '/catalog/1309/hleb-sladosti-sneki',
 '/catalog/1302/kofe-chay-sahar',
 '/catalog/1311/zamorojennye-produkty',
 '/catalog/3453/zdorovoe-pitanie',
 '/catalog/2348/bytovaya-himiya-i-hoztovary',
 'https://zoo.vprok.ru/',
 '/promo/tovary-dlya-doma',
 '/catalog/4450/aptechka',
 '/catalog/2561/bytovaya-tehnika',
 '/catalog/1997/alkogol']

<h3>Promo pages</h3>

In [457]:
headers = """accept: application/json, text/plain, */*
accept-encoding: gzip, deflate, br
accept-language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7
cookie: luuid=b6415383-72e6-46fb-beb2-8c8c1394cb8d; suuid=d4cdf4a8-997e-406f-b961-cc6ba95b832b; split_segment=9; split_segment_amount=11; tmr_lvid=f140f2d66e129d40e4de4201c33196b8; tmr_lvidTS=1675346400028; _ym_uid=1675346400242416480; _ym_d=1675346400; flocktory-uuid=0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0; iap.uid=9365c0a9c73945d28c6909efdbe2b002; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; noHouse=0; fcf=3; isUserAgreeCookiesPolicy=true; hide_banner_block_1=true; ngenix_valid=633e3888e19035e396ed68f8522b7e42; is_pickup=0; addressChange=1; pickupZone=null; pickupAvailable=0; _slid=63e4c981c29837d7f10a66f9; _slid_server=63e4c981c29837d7f10a66f9; _gid=GA1.2.373746217.1676908872; _ym_isad=2; gsscgib-w-vprok=cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtnjQweqUPvTUcUaipmODgwNr3MYRtthX0Eeert0sar81QMOCSn8ABL34NzGj82bGgZAQjRFYbY4rQSMeriXmcsQ+4luFl1pxBOpnXAbTONCssgxbiJeg==; gsscgib-w-vprok=cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtnjQweqUPvTUcUaipmODgwNr3MYRtthX0Eeert0sar81QMOCSn8ABL34NzGj82bGgZAQjRFYbY4rQSMeriXmcsQ+4luFl1pxBOpnXAbTONCssgxbiJeg==; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; access_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiI5IiwianRpIjoidXVpZGI2NDE1MzgzLTcyZTYtNDZmYi1iZWIyLThjOGMxMzk0Y2I4ZDZlNjE4MTAxZGY4Y2QyODM1YmM3MTg4MGRmZjNiMzM5MDNlMGViZTIiLCJpYXQiOjE2NzY5MjM1MTAuMDY2NDAxLCJuYmYiOjE2NzY5MjM1MTAuMDY2NDAzLCJleHAiOjE2NzY5Mzc5MTAuMDUxMzU0LCJzdWIiOiIiLCJzY29wZXMiOltdLCJzcGxpdF9zZWdtZW50Ijo5fQ.nVFAsOdeRecsDj-Y7All8KZ7oNGG94QqGwm24qiFG7Lb4_nTCG_eKjR4gG4Ir3BKjdKG-jfeLlkV5MGGuJXNsbdMqIZ-EE8Ec5Iiem0H0N4b7NtuHGmo-V6p-s_ymSXV_LHOfCCYAuajmj3vpnd_hr-_Pbv2J_UEvU6WU3yhdgisU-fUQS9L2imtQuwypphommHmGm8pVbR5HfjJS7h9hRVMUDbCIbQMb2mVSQbtV8iqXBGqzp6i3ZDEU41sGLGBqLnFBZjf-bPxeYZFLJUVMmUK2gsV9f2TITyp1NXfcJ2OofTKlOm_yFS4HyTWwAsPhB_B2l5_YKMOMx9q_ZEpU1EBcmI9L0PNtAOL58e3hEGU6iJLQGh4UoNIaCKHuV6Tw8wM39mPpkdD_-OBtjHcPJBHbddGWM6cwxY_yfF-doa0Ppk2hwlwkS6VAQVmi28Pl-z0wADqOM7chXg4s3438dQIudR0oGT3BbBbbiuIgrqTWDSd84db3BVE78lzu1CNEF8axWqU5MFeQtk86dvQAr0aooq7Bt_gy7Eyu6wCKdc7OF8CfHTbiN4ik5ZiLrmplmnXGtu30cOPkxSyLJVEXrOfuYbTCdEjwhZWzprfIs9sTwV4diAG0oHm11ejM2yYmtDOGQU2J0ZBUAo2LpTJLHh21OQFh1UxNBDIbvukVWo; fgsscgib-w-vprok=Z6P112f585c00138b833091d58327ea2c23c6cdd; fgsscgib-w-vprok=Z6P112f585c00138b833091d58327ea2c23c6cdd; x-next-route-destination=%2Fcatalog%2F1307%2Fmyaso-ptitsa-delikatesy%3Fsort%3Dpopularity_desc%26page%3D4; regionChange=1; _slsession=66873AAE-28CF-46B8-8D99-E5FC835EAD57; _ym_visorc=b; region=2; deliveryTypeId=1; standardShopId=2246; _ga=GA1.2.880547947.1675346399; mindboxDeviceUUID=4929db8d-93f6-47d7-ac97-fa45d82811c4; directCrm-session=%7B%22deviceGuid%22%3A%224929db8d-93f6-47d7-ac97-fa45d82811c4%22%7D; tmr_detect=0%7C1676932738858; address=%D0%A1%D0%B0%D0%BD%D0%BA%D1%82-%D0%9F%D0%B5%D1%82%D0%B5%D1%80%D0%B1%D1%83%D1%80%D0%B3%2C%20%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; short_address=%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; flat=12; house=14; latLng=60.038537%2C30.344641; addressZone=16120; preview_address=%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; XSRF-TOKEN=eyJpdiI6IjBDdUtIOGRGZ2FJQVZIQWhaXC9VVTZRPT0iLCJ2YWx1ZSI6IkRkcnY4OWZKMVY2TDJZazhyYklyTE1UVnlWXC91dDNFRTdlRFhyTklqeldTa1UyUzVcLzhobEYxaDAxSFV3WmNKQktFazlnSThabXhzK1FyUUloK2JBSWc9PSIsIm1hYyI6ImQxMTU4YTQ4ODEzOWZiNjM5YmUzNDZiOWQyYzQxYTQwNDkxMmNiOWJkMTMxMWI2NDQ3MTBmM2M3ZWVkNzQ1YTMifQ%3D%3D; address_id=503576999; isHouse=eyJpdiI6ImtnMCtDaWhvVkthZXdkaWliYjhSQnc9PSIsInZhbHVlIjoiUXVOOG1rWkdIQUZCaFhHcUtGU1IyQT09IiwibWFjIjoiYWU1YjM2ZDczODkyMWY4YTU0NGRlNmUxYmZlYjYxMzliMzQ3ZmFjODM2MzhkZTJkMTFiNDIzNDlhNTFjNTM5NSJ9; deliveryZone=%D0%93%D0%9E%D0%A0%D0%9E%D0%94%20%D0%A1%D0%9F%D0%91%20%D0%A1%D0%B5%D0%B2%D0%B5%D1%80; shop=2246; aid=eyJpdiI6IklHbzNIZFwvXC8rUThMVUxnS21jMWdHUT09IiwidmFsdWUiOiJNMVJXZUpsUjM0aWxDaE1HenBWOGRmSytNdE03Y29abHppcElBdDFLampERkNobFVWenZKQ1wvVzgwcDRkMHdreU5mdGZaRFo0aHF3ZlwvNERrUUJkK2dRPT0iLCJtYWMiOiIzZTc5M2UyMWEyZjY3MjZmNDhlMGM4YzQ1NWQwMDEzNDhhMjgyN2ZjMDcxNWEyOTVjYTBjMzY5MjRjZGIyZDc3In0%3D; _ga_B122VKXXJE=GS1.1.1676932447.20.1.1676932793.2.0.0
referer: https://www.vprok.ru/promo/napitki?page=4&sort=rate_desc
sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"
sec-ch-ua-mobile: ?0
sec-ch-ua-platform: "macOS"
sec-fetch-dest: empty
sec-fetch-mode: cors
sec-fetch-site: same-origin
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
x-api-context-address-id: 503576999
x-api-context-delivery-type-id: 1
x-api-context-region-id: 2
x-api-context-shop-id: 2246
x-xsrf-token: eyJpdiI6IjBDdUtIOGRGZ2FJQVZIQWhaXC9VVTZRPT0iLCJ2YWx1ZSI6IkRkcnY4OWZKMVY2TDJZazhyYklyTE1UVnlWXC91dDNFRTdlRFhyTklqeldTa1UyUzVcLzhobEYxaDAxSFV3WmNKQktFazlnSThabXhzK1FyUUloK2JBSWc9PSIsIm1hYyI6ImQxMTU4YTQ4ODEzOWZiNjM5YmUzNDZiOWQyYzQxYTQwNDkxMmNiOWJkMTMxMWI2NDQ3MTBmM2M3ZWVkNzQ1YTMifQ=="""

In [456]:
with open("headers.json", "w") as file:
    json.dump(headers, file)

In [452]:
hrefs

['/catalog/3547/skidki',
 '/catalog/6736/novinki',
 '/catalog/1301/ovoschi-frukty-griby',
 '/catalog/1303/moloko-syr-yaytsa',
 '/catalog/2726/23-fevralya',
 '/catalog/1307/myaso-ptitsa-delikatesy',
 '/catalog/1304/ryba-i-moreprodukty',
 '/catalog/5175/gotovaya-eda',
 '/catalog/1312/soki-vody-napitki',
 '/catalog/1305/tovary-dlya-mam-i-detey',
 '/catalog/1306/krasota-gigiena-bytovaya-himiya',
 '/catalog/4371/chipsy-sneki-suhariki',
 '/catalog/4019/sladosti-i-sneki',
 '/catalog/1300/makarony-krupy-spetsii',
 '/catalog/1310/konservy-orehi-sousy',
 '/catalog/1309/hleb-sladosti-sneki',
 '/catalog/1302/kofe-chay-sahar',
 '/catalog/1311/zamorojennye-produkty',
 '/catalog/3453/zdorovoe-pitanie',
 '/catalog/2348/bytovaya-himiya-i-hoztovary',
 '/catalog/4450/aptechka',
 '/catalog/2561/bytovaya-tehnika',
 '/catalog/1997/alkogol']

In [458]:
headers = parse_header(headers)

In [464]:
params = {
    "use_brand_zone": "1",
    "limit": "30",
    "category": "3547",
    "page": "1",
    "sort": "rate_desc",
    "no_html": "false"
}

resp = requests.get(
    "https://www.vprok.ru/webapi/v1/category-search/3547",
    headers=headers,
    params=params
).json()

In [465]:
resp.keys()

dict_keys(['count', 'html', 'filters', 'categories', 'tags', 'metadata', 'banners', 'psb_banners'])

In [466]:
dom = HTMLParser(resp["html"])

In [467]:
products = dom.css("ul#catalogItems > li")

data = {
    "title": [],
    "prev_price": [],
    "cur_price": []
}

for product in products:
    title = product.css_first("div.xf-product-title > a")
    if title:
        title = title.text().strip()
        
    prev_price = product.css_first("div.xf-product-cost__prev")
    if prev_price:
        prev_price = re.sub("[^\d\.,]", "", prev_price.text())
        
    cur_price = product.css_first("div.xf-product-cost__current")
    if cur_price:
        cur_price = re.sub("[^\d\.,]", "", cur_price.text())
    
    data["title"].append(title)
    data["prev_price"].append(prev_price)
    data["cur_price"].append(cur_price)

In [470]:
df = pd.DataFrame(data)
df = df.drop_duplicates()

In [472]:
df["cur_price"]

0         419
1       129,9
2       109,9
3         169
4        1849
5         539
6       469,9
7       369,9
8       179,9
9         219
10        199
11        179
12       54,9
13       34,9
14        319
15       79,9
16        199
17    94,9100
18        109
19        249
20        349
21       79,9
22       99,9
23        381
24        349
25       99,9
26        529
27        179
28        249
29       79,9
Name: cur_price, dtype: object

<h3>Category pages</h3>

In [334]:
headers = """accept: application/json, text/plain, */*
accept-encoding: gzip, deflate, br
accept-language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7
content-length: 65
content-type: application/json
cookie: region=1; deliveryTypeId=1; luuid=b6415383-72e6-46fb-beb2-8c8c1394cb8d; suuid=d4cdf4a8-997e-406f-b961-cc6ba95b832b; split_segment=9; split_segment_amount=11; tmr_lvid=f140f2d66e129d40e4de4201c33196b8; tmr_lvidTS=1675346400028; _ym_uid=1675346400242416480; _ym_d=1675346400; flocktory-uuid=0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0; iap.uid=9365c0a9c73945d28c6909efdbe2b002; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; noHouse=0; fcf=3; isUserAgreeCookiesPolicy=true; hide_banner_block_1=true; ngenix_valid=633e3888e19035e396ed68f8522b7e42; is_pickup=0; address=%D0%9C%D0%BE%D1%81%D0%BA%D0%B2%D0%B0%2C%20%D0%A0%D0%BE%D0%BC%D0%B0%D0%BD%D0%BE%D0%B2%20%D0%BF%D0%B5%D1%80%D0%B5%D1%83%D0%BB%D0%BE%D0%BA%2C%202%2F6%D1%813; addressChange=1; short_address=%D0%A0%D0%BE%D0%BC%D0%B0%D0%BD%D0%BE%D0%B2%20%D0%BF%D0%B5%D1%80%D0%B5%D1%83%D0%BB%D0%BE%D0%BA%2C%202%2F6%D1%813; house=2%2F6%D1%813; latLng=55.753524%2C37.608787; addressZone=16070; pickupZone=null; pickupAvailable=0; preview_address=%D0%A0%D0%BE%D0%BC%D0%B0%D0%BD%D0%BE%D0%B2%20%D0%BF%D0%B5%D1%80%D0%B5%D1%83%D0%BB%D0%BE%D0%BA%2C%202%2F6%D1%813; address_id=463010399; deliveryZone=%D0%93%D0%9E%D0%A0%D0%9E%D0%94%20%D0%9C%D0%A1%D0%9A%20%D0%A6%D0%90%D0%9E%20%20%D0%97%D0%B0%D0%BF%D0%B0%D0%B4; shop=2527; standardShopId=7707; _slid=63e4c981c29837d7f10a66f9; _slid_server=63e4c981c29837d7f10a66f9; _gid=GA1.2.373746217.1676908872; _ym_isad=2; gsscgib-w-vprok=cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtnjQweqUPvTUcUaipmODgwNr3MYRtthX0Eeert0sar81QMOCSn8ABL34NzGj82bGgZAQjRFYbY4rQSMeriXmcsQ+4luFl1pxBOpnXAbTONCssgxbiJeg==; gsscgib-w-vprok=cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtnjQweqUPvTUcUaipmODgwNr3MYRtthX0Eeert0sar81QMOCSn8ABL34NzGj82bGgZAQjRFYbY4rQSMeriXmcsQ+4luFl1pxBOpnXAbTONCssgxbiJeg==; _slsession=66873AAE-28CF-46B8-8D99-E5FC835EAD57; _ym_visorc=b; ngenix_jscv_a68b51100641=cookie_expires=1676926450&cookie_signature=G4umfb3exGQWspVjM5drs4S5Gyg%3D; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; x-next-route-destination=%2Fcatalog%2F1307%2Fmyaso-ptitsa-delikatesy%3Fsort%3Dpopularity_desc%26page%3D3; _ga=GA1.2.880547947.1675346399; mindboxDeviceUUID=4929db8d-93f6-47d7-ac97-fa45d82811c4; directCrm-session=%7B%22deviceGuid%22%3A%224929db8d-93f6-47d7-ac97-fa45d82811c4%22%7D; tmr_detect=0%7C1676922877096; _ga_B122VKXXJE=GS1.1.1676921913.18.1.1676923507.60.0.0; XSRF-TOKEN=eyJpdiI6InFRWG4wM0Vlc3RNOE93WHdocDNseUE9PSIsInZhbHVlIjoiVHZVYU1cL2x0Tlc3emtBaGdYXC9HS1ZQY1VLSTR0UWhPSU15NGluTXdBWTQxVHgrY3pPT01lMFRPVUVja0NRYlc4bzgzWThzNytLRlJxT0FHMStZdkZWdz09IiwibWFjIjoiNzEzMTA5ZWZhNGM4NWY5NzVkNTA0YTY0NWJhMzA5MDhkN2FkNzJmM2VmNDJhNmQzNzlkMzFlNzM3ODNmYmEyYSJ9; isHouse=eyJpdiI6IlpscFBlSWFtRWM1MkliV2R5RURONXc9PSIsInZhbHVlIjoiQThDTEpUdDlnN2IwY1lsbDVMb041Zz09IiwibWFjIjoiYjU3Njk4NTUzOWZjNzNiMjU5NmI0NmFhNjk4NWE5YzY4ZjVkYzhhMWExYmZjMzQzN2IxNWNmNGNmMDcwY2Q5NSJ9; access_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiI5IiwianRpIjoidXVpZGI2NDE1MzgzLTcyZTYtNDZmYi1iZWIyLThjOGMxMzk0Y2I4ZDZlNjE4MTAxZGY4Y2QyODM1YmM3MTg4MGRmZjNiMzM5MDNlMGViZTIiLCJpYXQiOjE2NzY5MjM1MTAuMDY2NDAxLCJuYmYiOjE2NzY5MjM1MTAuMDY2NDAzLCJleHAiOjE2NzY5Mzc5MTAuMDUxMzU0LCJzdWIiOiIiLCJzY29wZXMiOltdLCJzcGxpdF9zZWdtZW50Ijo5fQ.nVFAsOdeRecsDj-Y7All8KZ7oNGG94QqGwm24qiFG7Lb4_nTCG_eKjR4gG4Ir3BKjdKG-jfeLlkV5MGGuJXNsbdMqIZ-EE8Ec5Iiem0H0N4b7NtuHGmo-V6p-s_ymSXV_LHOfCCYAuajmj3vpnd_hr-_Pbv2J_UEvU6WU3yhdgisU-fUQS9L2imtQuwypphommHmGm8pVbR5HfjJS7h9hRVMUDbCIbQMb2mVSQbtV8iqXBGqzp6i3ZDEU41sGLGBqLnFBZjf-bPxeYZFLJUVMmUK2gsV9f2TITyp1NXfcJ2OofTKlOm_yFS4HyTWwAsPhB_B2l5_YKMOMx9q_ZEpU1EBcmI9L0PNtAOL58e3hEGU6iJLQGh4UoNIaCKHuV6Tw8wM39mPpkdD_-OBtjHcPJBHbddGWM6cwxY_yfF-doa0Ppk2hwlwkS6VAQVmi28Pl-z0wADqOM7chXg4s3438dQIudR0oGT3BbBbbiuIgrqTWDSd84db3BVE78lzu1CNEF8axWqU5MFeQtk86dvQAr0aooq7Bt_gy7Eyu6wCKdc7OF8CfHTbiN4ik5ZiLrmplmnXGtu30cOPkxSyLJVEXrOfuYbTCdEjwhZWzprfIs9sTwV4diAG0oHm11ejM2yYmtDOGQU2J0ZBUAo2LpTJLHh21OQFh1UxNBDIbvukVWo; aid=eyJpdiI6IkNvTkN3aUptdzJHZ1JGYmU5XC91N2xnPT0iLCJ2YWx1ZSI6InMzbFwvRDBtMkoxdFNRVVBuYjFJZjhBNWFjZTdZZUtPOXNXZmNwRFA5NkM5U0Y1Ukc1KzgxamVpWmFWU1FkdEtUZGZwSzdwYnN5Q1hnSkM1ZklTZW81QT09IiwibWFjIjoiZTRhOTM0ZDA3MWQ3N2I1ZGJkZGEzN2JiYmJlNzE1OTg3ZTdiNGU4MTA1MDVhMWE3YTlhZjEyMTEyNGRlYjg4NSJ9; _POBP_s=rum=1&id=7bb9d351-0b35-4c26-92d4-5917f2582ff7&created=1676919067650&expire=1676924410065; fgsscgib-w-vprok=Z6P112f585c00138b833091d58327ea2c23c6cdd; fgsscgib-w-vprok=Z6P112f585c00138b833091d58327ea2c23c6cdd
origin: https://www.vprok.ru
referer: https://www.vprok.ru/catalog/1307/myaso-ptitsa-delikatesy?sort=popularity_desc&page=3
sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"
sec-ch-ua-mobile: ?0
sec-ch-ua-platform: "macOS"
sec-fetch-dest: empty
sec-fetch-mode: cors
sec-fetch-site: same-origin
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
x-gib-fgsscgib-w-vprok: Z6P112f585c00138b833091d58327ea2c23c6cdd
x-gib-gsscgib-w-vprok: cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtnjQweqUPvTUcUaipmODgwNr3MYRtthX0Eeert0sar81QMOCSn8ABL34NzGj82bGgZAQjRFYbY4rQSMeriXmcsQ+4luFl1pxBOpnXAbTONCssgxbiJeg==
x-xsrf-token: eyJpdiI6InFRWG4wM0Vlc3RNOE93WHdocDNseUE9PSIsInZhbHVlIjoiVHZVYU1cL2x0Tlc3emtBaGdYXC9HS1ZQY1VLSTR0UWhPSU15NGluTXdBWTQxVHgrY3pPT01lMFRPVUVja0NRYlc4bzgzWThzNytLRlJxT0FHMStZdkZWdz09IiwibWFjIjoiNzEzMTA5ZWZhNGM4NWY5NzVkNTA0YTY0NWJhMzA5MDhkN2FkNzJmM2VmNDJhNmQzNzlkMzFlNzM3ODNmYmEyYSJ9"""

In [335]:
headers = parse_header(headers)

In [450]:
payload = json.dumps(
    {"noRedirect":True,"url":"/catalog/1312/soki-vody-napitki"}
)

params = {
    "sort": "popularity_desc",
    "limit": "60",
    "page": "1",
}

resp = requests.post(
    "https://www.vprok.ru/web/api/v1/catalog/category/1312",
    params=params,
    data=payload,
    headers=headers
)

In [394]:
resp.text

'<!DOCTYPE html><html lang=en><head><meta charset="utf-8" /><meta name=viewport content="width=device-width, initial-scale=1" /><style>body{box-sizing:border-box;display:flex;height:100vh;margin:0;padding:0;text-align:center;font-family:sans-serif;font-weight:normal;font-size:16px;line-height:160%;color:#113a60;background-color:#f2f2f2}.c1{display:flex;flex-direction:column;margin:125px auto}h1{max-width:780px;width:100%;font-weight:normal;font-size:40px;line-height:140%;margin:0 auto;word-break:break-word}h3{font-weight:normal;font-size:16px;line-height:160%;margin:16px auto}.c2{max-width:980px;width:100%;margin:0 auto}.a{max-width:365px;}.b{max-width:680px;}.c{box-sizing:border-box;position:relative;display:flex;align-items:center;flex-wrap:wrap;margin-top:100px;margin-left:auto;padding:15px 0 15px 20px;border:1px solid #d4e0eb;color:#515a63;cursor:pointer}.c:last-child{margin-top:48px;}.c:hover>.v{opacity:1}#q{width:175px}#w{width:380px}#w,#q{margin:0 4px 0 4px;font-family:sans-seri

In [390]:
re.search("\d+", hrefs[8])[0], hrefs[8]

('1312', '/catalog/1312/soki-vody-napitki')

In [267]:
# resp["products"]

In [432]:
class Vprok:
    
    def __init__(self, hrefs):
        self.hrefs = hrefs
        
    def parse_json(self, resp):
        
        dom = HTMLParser(resp["html"])
        products = dom.css("ul#catalogItems > li")
        
        for product in products:
    
            title = product.css_first("div.xf-product-title > a")
            if title:
                title = title.text().strip()

            prev_price = product.css_first("div.xf-product-cost__prev")
            if prev_price:
                prev_price = re.sub("[^\d\.,]", "", prev_price.text())

            cur_price = product.css_first("div.xf-product-cost__current")
            if cur_price:
                cur_price = re.sub("[^\d\.,]", "", cur_price.text())
            
            yield {
                "name": title,
                "price": cur_price,
                "oldPrice": prev_price,
            }
        
    def query(self, href):
        
        category_id = re.search("\d+", href)[0]
        offset, n_page = 0, 1
    
        while True:
            
            params = {
                "use_brand_zone": "1",
                "limit": "30",
                "category": category_id,
                "page": n_page,
                "sort": "rate_desc",
                "no_html": "false"
            }
                
            print(f"Sending {n_page} request")

            resp = requests.get(
                f"https://www.vprok.ru/webapi/v1/category-search/{category_id}",
                headers=headers,
                params=params
            ).json()
            
            for product in self.parse_json(resp):
                self.data.append(product)
            
            time.sleep(1)
            
            offset += 30
            n_page += 1
            
            if offset >= resp["count"]:
                break
            
    
    def query_all(self):
        self.data = []
        
        for href in self.hrefs:
            self.query(href)

In [461]:
hrefs = [href for href in hrefs if "catalog" in href]

# parser = Vprok(hrefs)
# parser.query_all()