In [95]:
import requests
import json
import re
import pandas as pd
import nest_asyncio
import time

from urllib.parse import urlencode

from tqdm import tqdm

from playwright.sync_api import sync_playwright
from selenium import webdriver
from selenium.webdriver.common.by import By

from selectolax.parser import HTMLParser

In [224]:
nest_asyncio.apply()

In [14]:
def parse_header(raw_header: str):
    header = dict()

    for line in raw_header.split("\n"):

        if line.startswith(":"):
            a, b = line[1:].split(":", 1)
            a = f":{a}"
        else:
            a, b = line.split(":",1)

        header[a.strip()] = b.strip()

    return header

<h3>Retrieve categories</h3>

In [72]:
category_html = """<div class="CatalogMenu_parents__Krpe1" bis_skin_checked="1"><a class="CatalogMenuLink_parentLink__5IG3T CatalogMenuLink_isActive__acRjg" href="/catalog/3547/skidki"><img class="CatalogMenuLink_icon__Fbn09" src="https://media.vprok.ru/content/orig/as/az/i67wlou2usg4b27omzqrixpd2uedazas.svg" alt="Скидки">Скидки</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/6736/novinki">Новинки</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1301/ovoschi-frukty-griby">Овощи, фрукты, ягоды</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1303/moloko-syr-yaytsa">Молоко, сыр, яйца</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/2726/23-fevralya">23 февраля</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1307/myaso-ptitsa-delikatesy">Мясо, птица, колбасы</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1304/ryba-i-moreprodukty">Рыба, икра </a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/5175/gotovaya-eda">Готовая еда</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1312/soki-vody-napitki">Воды, соки, напитки</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1305/tovary-dlya-mam-i-detey">Товары для мам и детей</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1306/krasota-gigiena-bytovaya-himiya">Красота и здоровье</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/4371/chipsy-sneki-suhariki">Чипсы, снеки, орехи</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/4019/sladosti-i-sneki">Сладости</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1300/makarony-krupy-spetsii">Макароны, крупы, специи</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1310/konservy-orehi-sousy">Соусы и консервация</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1309/hleb-sladosti-sneki">Хлеб и выпечка</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1302/kofe-chay-sahar">Чай, кофе, сахар</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1311/zamorojennye-produkty">Замороженные продукты</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/3453/zdorovoe-pitanie">Здоровое питание</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/2348/bytovaya-himiya-i-hoztovary">Бытовая химия и гигиена</a><a class="CatalogMenuLink_parentLink__5IG3T CatalogMenuLink_isBold__SVw8F" href="https://zoo.vprok.ru/">Зоотовары</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/promo/tovary-dlya-doma">Товары для дома и дачи</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/4450/aptechka">Аптечка</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/2561/bytovaya-tehnika">Бытовая техника</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1997/alkogol">Алкоголь</a></div>"""

In [74]:
dom = HTMLParser(category_html)
hrefs = [el.attributes["href"] for el in dom.css("a.CatalogMenuLink_parentLink__5IG3T")]

In [76]:
categories = [
    el for el in hrefs if "catalog" in el
]

<h3>Promo pages</h3>

In [15]:
headers = """accept: application/json, text/plain, */*
accept-encoding: gzip, deflate, br
accept-language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7
cookie: luuid=b6415383-72e6-46fb-beb2-8c8c1394cb8d; suuid=d4cdf4a8-997e-406f-b961-cc6ba95b832b; split_segment=9; split_segment_amount=11; tmr_lvid=f140f2d66e129d40e4de4201c33196b8; tmr_lvidTS=1675346400028; _ym_uid=1675346400242416480; _ym_d=1675346400; flocktory-uuid=0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0; iap.uid=9365c0a9c73945d28c6909efdbe2b002; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; noHouse=0; fcf=3; isUserAgreeCookiesPolicy=true; hide_banner_block_1=true; ngenix_valid=633e3888e19035e396ed68f8522b7e42; is_pickup=0; addressChange=1; pickupZone=null; pickupAvailable=0; _slid=63e4c981c29837d7f10a66f9; _slid_server=63e4c981c29837d7f10a66f9; _gid=GA1.2.373746217.1676908872; _ym_isad=2; gsscgib-w-vprok=cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtnjQweqUPvTUcUaipmODgwNr3MYRtthX0Eeert0sar81QMOCSn8ABL34NzGj82bGgZAQjRFYbY4rQSMeriXmcsQ+4luFl1pxBOpnXAbTONCssgxbiJeg==; gsscgib-w-vprok=cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtnjQweqUPvTUcUaipmODgwNr3MYRtthX0Eeert0sar81QMOCSn8ABL34NzGj82bGgZAQjRFYbY4rQSMeriXmcsQ+4luFl1pxBOpnXAbTONCssgxbiJeg==; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; access_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiI5IiwianRpIjoidXVpZGI2NDE1MzgzLTcyZTYtNDZmYi1iZWIyLThjOGMxMzk0Y2I4ZDZlNjE4MTAxZGY4Y2QyODM1YmM3MTg4MGRmZjNiMzM5MDNlMGViZTIiLCJpYXQiOjE2NzY5MjM1MTAuMDY2NDAxLCJuYmYiOjE2NzY5MjM1MTAuMDY2NDAzLCJleHAiOjE2NzY5Mzc5MTAuMDUxMzU0LCJzdWIiOiIiLCJzY29wZXMiOltdLCJzcGxpdF9zZWdtZW50Ijo5fQ.nVFAsOdeRecsDj-Y7All8KZ7oNGG94QqGwm24qiFG7Lb4_nTCG_eKjR4gG4Ir3BKjdKG-jfeLlkV5MGGuJXNsbdMqIZ-EE8Ec5Iiem0H0N4b7NtuHGmo-V6p-s_ymSXV_LHOfCCYAuajmj3vpnd_hr-_Pbv2J_UEvU6WU3yhdgisU-fUQS9L2imtQuwypphommHmGm8pVbR5HfjJS7h9hRVMUDbCIbQMb2mVSQbtV8iqXBGqzp6i3ZDEU41sGLGBqLnFBZjf-bPxeYZFLJUVMmUK2gsV9f2TITyp1NXfcJ2OofTKlOm_yFS4HyTWwAsPhB_B2l5_YKMOMx9q_ZEpU1EBcmI9L0PNtAOL58e3hEGU6iJLQGh4UoNIaCKHuV6Tw8wM39mPpkdD_-OBtjHcPJBHbddGWM6cwxY_yfF-doa0Ppk2hwlwkS6VAQVmi28Pl-z0wADqOM7chXg4s3438dQIudR0oGT3BbBbbiuIgrqTWDSd84db3BVE78lzu1CNEF8axWqU5MFeQtk86dvQAr0aooq7Bt_gy7Eyu6wCKdc7OF8CfHTbiN4ik5ZiLrmplmnXGtu30cOPkxSyLJVEXrOfuYbTCdEjwhZWzprfIs9sTwV4diAG0oHm11ejM2yYmtDOGQU2J0ZBUAo2LpTJLHh21OQFh1UxNBDIbvukVWo; fgsscgib-w-vprok=Z6P112f585c00138b833091d58327ea2c23c6cdd; fgsscgib-w-vprok=Z6P112f585c00138b833091d58327ea2c23c6cdd; x-next-route-destination=%2Fcatalog%2F1307%2Fmyaso-ptitsa-delikatesy%3Fsort%3Dpopularity_desc%26page%3D4; regionChange=1; _slsession=66873AAE-28CF-46B8-8D99-E5FC835EAD57; _ym_visorc=b; region=2; deliveryTypeId=1; standardShopId=2246; _ga=GA1.2.880547947.1675346399; mindboxDeviceUUID=4929db8d-93f6-47d7-ac97-fa45d82811c4; directCrm-session=%7B%22deviceGuid%22%3A%224929db8d-93f6-47d7-ac97-fa45d82811c4%22%7D; tmr_detect=0%7C1676932738858; address=%D0%A1%D0%B0%D0%BD%D0%BA%D1%82-%D0%9F%D0%B5%D1%82%D0%B5%D1%80%D0%B1%D1%83%D1%80%D0%B3%2C%20%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; short_address=%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; flat=12; house=14; latLng=60.038537%2C30.344641; addressZone=16120; preview_address=%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; XSRF-TOKEN=eyJpdiI6IjBDdUtIOGRGZ2FJQVZIQWhaXC9VVTZRPT0iLCJ2YWx1ZSI6IkRkcnY4OWZKMVY2TDJZazhyYklyTE1UVnlWXC91dDNFRTdlRFhyTklqeldTa1UyUzVcLzhobEYxaDAxSFV3WmNKQktFazlnSThabXhzK1FyUUloK2JBSWc9PSIsIm1hYyI6ImQxMTU4YTQ4ODEzOWZiNjM5YmUzNDZiOWQyYzQxYTQwNDkxMmNiOWJkMTMxMWI2NDQ3MTBmM2M3ZWVkNzQ1YTMifQ%3D%3D; address_id=503576999; isHouse=eyJpdiI6ImtnMCtDaWhvVkthZXdkaWliYjhSQnc9PSIsInZhbHVlIjoiUXVOOG1rWkdIQUZCaFhHcUtGU1IyQT09IiwibWFjIjoiYWU1YjM2ZDczODkyMWY4YTU0NGRlNmUxYmZlYjYxMzliMzQ3ZmFjODM2MzhkZTJkMTFiNDIzNDlhNTFjNTM5NSJ9; deliveryZone=%D0%93%D0%9E%D0%A0%D0%9E%D0%94%20%D0%A1%D0%9F%D0%91%20%D0%A1%D0%B5%D0%B2%D0%B5%D1%80; shop=2246; aid=eyJpdiI6IklHbzNIZFwvXC8rUThMVUxnS21jMWdHUT09IiwidmFsdWUiOiJNMVJXZUpsUjM0aWxDaE1HenBWOGRmSytNdE03Y29abHppcElBdDFLampERkNobFVWenZKQ1wvVzgwcDRkMHdreU5mdGZaRFo0aHF3ZlwvNERrUUJkK2dRPT0iLCJtYWMiOiIzZTc5M2UyMWEyZjY3MjZmNDhlMGM4YzQ1NWQwMDEzNDhhMjgyN2ZjMDcxNWEyOTVjYTBjMzY5MjRjZGIyZDc3In0%3D; _ga_B122VKXXJE=GS1.1.1676932447.20.1.1676932793.2.0.0
referer: https://www.vprok.ru/promo/napitki?page=4&sort=rate_desc
sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"
sec-ch-ua-mobile: ?0
sec-ch-ua-platform: "macOS"
sec-fetch-dest: empty
sec-fetch-mode: cors
sec-fetch-site: same-origin
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
x-api-context-address-id: 503576999
x-api-context-delivery-type-id: 1
x-api-context-region-id: 2
x-api-context-shop-id: 2246
x-xsrf-token: eyJpdiI6IjBDdUtIOGRGZ2FJQVZIQWhaXC9VVTZRPT0iLCJ2YWx1ZSI6IkRkcnY4OWZKMVY2TDJZazhyYklyTE1UVnlWXC91dDNFRTdlRFhyTklqeldTa1UyUzVcLzhobEYxaDAxSFV3WmNKQktFazlnSThabXhzK1FyUUloK2JBSWc9PSIsIm1hYyI6ImQxMTU4YTQ4ODEzOWZiNjM5YmUzNDZiOWQyYzQxYTQwNDkxMmNiOWJkMTMxMWI2NDQ3MTBmM2M3ZWVkNzQ1YTMifQ=="""

In [456]:
with open("headers.json", "w") as file:
    json.dump(headers, file)

In [53]:
hrefs

NameError: name 'hrefs' is not defined

In [16]:
headers = parse_header(headers)

https://www.vprok.ru/catalog/1307/myaso-ptitsa-delikatesy

In [24]:
params = {
    "use_brand_zone": "1",
    "limit": "30",
    "category": "1307",
    "page": "1",
    "no_html": "false"
}

resp = requests.get(
    "https://www.vprok.ru/webapi/v1/category-search/1307",
    headers=headers,
    params=params
).json()

In [25]:
dom = HTMLParser(resp["html"])

In [26]:
products = dom.css("ul#catalogItems > li")

data = {
    "title": [],
    "prev_price": [],
    "cur_price": []
}

for product in products:
    title = product.css_first("div.xf-product-title > a")
    if title:
        title = title.text().strip()
        
    prev_price = product.css_first("div.xf-product-cost__prev")
    if prev_price:
        prev_price = re.sub("[^\d\.,]", "", prev_price.text())
        
    cur_price = product.css_first("div.xf-product-cost__current")
    if cur_price:
        cur_price = re.sub("[^\d\.,]", "", cur_price.text())
    
    data["title"].append(title)
    data["prev_price"].append(prev_price)
    data["cur_price"].append(cur_price)

In [27]:
df = pd.DataFrame(data)
df

Unnamed: 0,title,prev_price,cur_price
0,Окорок задний свиной 0.4-0.7кг,,419
1,Свинина духовая без кости 0.4-0.7кг,399.0,299
2,Филе куриное 0.8-1.2кг,389.0,289
3,Филе грудки индейки Индилайт 500г,,330
4,Филе грудки Петелинка куриное 0.6-0.9 кг,429.0,349
5,Окорочка куриные,,249
6,Корейка свиная без кости 0.8-1.2кг,439.0,339
7,Колбаса Вязанка Классическая вареная 500г,,269
8,Филе индейки 0.8-1.2кг,,529
9,"Тушка цыпленка-бройлера Петруха 1.8-2,3кг",,210


<h3>Category pages</h3>

In [152]:
headers = """accept: application/json, text/plain, */*
accept-encoding: gzip, deflate, br
accept-language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7
content-length: 65
content-type: application/json
cookie: luuid=b6415383-72e6-46fb-beb2-8c8c1394cb8d; suuid=d4cdf4a8-997e-406f-b961-cc6ba95b832b; split_segment=9; split_segment_amount=11; tmr_lvid=f140f2d66e129d40e4de4201c33196b8; tmr_lvidTS=1675346400028; _ym_uid=1675346400242416480; _ym_d=1675346400; flocktory-uuid=0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0; iap.uid=9365c0a9c73945d28c6909efdbe2b002; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; noHouse=0; fcf=3; isUserAgreeCookiesPolicy=true; hide_banner_block_1=true; ngenix_valid=633e3888e19035e396ed68f8522b7e42; addressChange=1; pickupAvailable=0; _slid=63e4c981c29837d7f10a66f9; _slid_server=63e4c981c29837d7f10a66f9; _gid=GA1.2.373746217.1676908872; regionChange=1; flat=12; region=2; is_pickup=0; deliveryTypeId=1; standardShopId=2246; address=%D0%A1%D0%B0%D0%BD%D0%BA%D1%82-%D0%9F%D0%B5%D1%82%D0%B5%D1%80%D0%B1%D1%83%D1%80%D0%B3%2C%20%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2011; latLng=60.036142%2C30.345441; has_elevator=0; is_house=0; short_address=%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2011; house=11; address_id=505072094; addressZone=16120; deliveryZone=%D0%93%D0%9E%D0%A0%D0%9E%D0%94%20%D0%A1%D0%9F%D0%91%20%D0%A1%D0%B5%D0%B2%D0%B5%D1%80; shop=2246; x-next-route-destination=%2Fcatalog%2F1307%2Fmyaso-ptitsa-delikatesy%3Fsort%3Dpopularity_desc%26page%3D9; _gat_UA-93122031-1=1; ngenix_jscv_a68b51100641=cookie_expires=1676990537&cookie_signature=0mMFcceWkOtdVOb5leBCY0qjBHo%3D; XSRF-TOKEN=eyJpdiI6Ik9tWFUrYlM0dWR1ajI4MmszcDZSTFE9PSIsInZhbHVlIjoieGZzRmtWTDNXYlY0M2I0cGNcL2lIcXVxZnV5cmROcjFrenlzd0dzOFBja3hSaHlMRGNyXC9aN1R4ZG5kU1wvbjBhOFNxXC9kaXRcL3Y4V28yeWhGdEROZnQ5Zz09IiwibWFjIjoiYjhhZTI3ODg2YjU5ZmNlY2NkMDk1MGM5ZGE5YWRiOTUyMGFiNjBhNTUzYTI1MGFhZTg1MWEzMDQwNGJjMTgzYyJ9; isHouse=eyJpdiI6IlFacTBnd0crRzgwWkR3SEdzbWpabVE9PSIsInZhbHVlIjoiY0NnQ2lCNlBPR1Q0dTdQcUxjb3ZjZz09IiwibWFjIjoiMmU1ZTNmMjk1ZDhlOTc4NTcxNjlhZjdiYzdjYjQ1YjFlNmFmYTg3ZWQ2Mjc0NjRlNTg3MTUxNDg4MDcxOGYwNSJ9; access_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiI5IiwianRpIjoidXVpZGI2NDE1MzgzLTcyZTYtNDZmYi1iZWIyLThjOGMxMzk0Y2I4ZDQzMWVmYTVjMWY5YTYyOGVjYmMwNDQyNTAwNmJmZTc0OWU2NTljNmYiLCJpYXQiOjE2NzY5ODY5NDEuNzA0NDgsIm5iZiI6MTY3Njk4Njk0MS43MDQ0ODIsImV4cCI6MTY3NzAwMTM0MS42OTc1NDYsInN1YiI6IiIsInNjb3BlcyI6W10sInNwbGl0X3NlZ21lbnQiOjl9.mOHhnomxZbIiytkUtH5JZ0ElYGaGG4IbqHVPSfOmWlxFe-wgAwUYheQdHXX2dT2doJQh913ieODlgBH7xsrRnna1z87UpNxKqFtRCbFhsu2dBXS-2K2OqRrSLgb8PWw0QZt4qk9wsxaUiGpEUX2hF-kLwpmrKCCI17VDjKZHFiBSFcuSSdcqO3ZXFiMBGZHo9iOFoP4JXtLx-kD9CJ4PPCa6TqsqUp7re9vgSmXQXr0VUx61XHGC0_iXpN0spnC5iAcsqeHlhhQhxg9wT7iQYFX0LxSipiHirRcVI1MCcIG_HnT48wjoXlQeSNoyqUpu7TfXA0m_m9zr95D6VHQuJKJ-SZ4tv9XHnhAlFpE4auzA1wGwmZkJl2iNSSjpO6uybeWrZHopUFtQF2He2pgHSusuOgoA-yNx8na1CPBDDxqPFTlm4jkm6qHu4Dekt-zoBtAX7xLb9jYttBKO9y_aPkqK4-k2_ibB4tDfeJk0o-eptONj6rTtuBXpSbjBN1_tIq4bDC-gF_ATIx4JPi2I9VpbWvjWN-D3pH_3vX58qIO8Ddi8Ui1cuw-xPyQqYh0U2l1ldEhU6Olc0mfcavj7sND1rBtVrpsGRaa4CVMgdM101OA6L25I6LcFO0ItDy7_WC2xNtPP05cFEGc7Ws89DBWJdVw9eVVfhhvCDPcbNDA; aid=eyJpdiI6Im1jV0J6NGJwOGgzYzM3V2pjTTh2UVE9PSIsInZhbHVlIjoiVDdoMGtBbWtFUm4wRlNFRmF2a3FkZnhtVHByZUxOYktoSk1sN3JHditcL2hhTlMwSlNvQU9NRk1jclYyYW5kQXF3a3JReTlxbzJBTE52dVR1WittMzhnPT0iLCJtYWMiOiJiZTY5NjVlYzBiMWJjOTJmNWZjNTVkN2ExZGYzNWE5MjU4NTNkZjVmODExNTZlOTQ5Zjg2OTViMmI2ODQ3ZGJkIn0%3D; cfidsgib-w-vprok=gM4n/S3Q2YXrM4nZHFlsnR8jX1hYdKx7r0od4Ec5XDsl1yXPFr1v66taP98dbOkpIHsASgwdC7haFXTEJ3u6NgX78oJ/5XujWr8mZaKZMVXel9PPQ1J3ijtlc+pa8wGBs5mLSn77ZPx+Dd9xq42F97XgO1hS7RZhnID/ji4=; _ym_isad=2; _ga=GA1.2.880547947.1675346399; cfidsgib-w-vprok=gM4n/S3Q2YXrM4nZHFlsnR8jX1hYdKx7r0od4Ec5XDsl1yXPFr1v66taP98dbOkpIHsASgwdC7haFXTEJ3u6NgX78oJ/5XujWr8mZaKZMVXel9PPQ1J3ijtlc+pa8wGBs5mLSn77ZPx+Dd9xq42F97XgO1hS7RZhnID/ji4=; cfidsgib-w-vprok=gM4n/S3Q2YXrM4nZHFlsnR8jX1hYdKx7r0od4Ec5XDsl1yXPFr1v66taP98dbOkpIHsASgwdC7haFXTEJ3u6NgX78oJ/5XujWr8mZaKZMVXel9PPQ1J3ijtlc+pa8wGBs5mLSn77ZPx+Dd9xq42F97XgO1hS7RZhnID/ji4=; gsscgib-w-vprok=855out4JQAfqdiypaHTUiRQyc/At81yq8QVhAA/JwL3GMHV0+RK9EUVMG2XNcd76luO7vo2cErL82oVyNgjIpYogSuK9OO/TedvkaI7/aISqVUEXW/zURuTFH84VV60AXxRi0xJ0PsqAxSREVEoVysNdHDbHyxYwp+udElI8chT6QkZGkKMQSqhze50aPnEj+clHi6PyUCpIOjyLc7TC8ndVAdJDrAeVID3WfeWXS5DzGGaNtFV5+NVYS3YzaSPeKmHZLe2Tiw==; gsscgib-w-vprok=855out4JQAfqdiypaHTUiRQyc/At81yq8QVhAA/JwL3GMHV0+RK9EUVMG2XNcd76luO7vo2cErL82oVyNgjIpYogSuK9OO/TedvkaI7/aISqVUEXW/zURuTFH84VV60AXxRi0xJ0PsqAxSREVEoVysNdHDbHyxYwp+udElI8chT6QkZGkKMQSqhze50aPnEj+clHi6PyUCpIOjyLc7TC8ndVAdJDrAeVID3WfeWXS5DzGGaNtFV5+NVYS3YzaSPeKmHZLe2Tiw==; _ym_visorc=w; mindboxDeviceUUID=4929db8d-93f6-47d7-ac97-fa45d82811c4; directCrm-session=%7B%22deviceGuid%22%3A%224929db8d-93f6-47d7-ac97-fa45d82811c4%22%7D; tmr_detect=0%7C1676986955780; _ga_B122VKXXJE=GS1.1.1676986938.26.1.1676986964.34.0.0; _POBP_s=rum=1&id=ec53d8fe-b325-4e1a-94c5-140e380f6179&created=1676986516864&expire=1676987867567; fgsscgib-w-vprok=Xt5Lb759f5099c107a2f7da3352fec7a9084cb0c; fgsscgib-w-vprok=Xt5Lb759f5099c107a2f7da3352fec7a9084cb0c
origin: https://www.vprok.ru
referer: https://www.vprok.ru/catalog/1307/myaso-ptitsa-delikatesy?sort=popularity_desc&page=9
sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"
sec-ch-ua-mobile: ?0
sec-ch-ua-platform: "macOS"
sec-fetch-dest: empty
sec-fetch-mode: cors
sec-fetch-site: same-origin
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
x-gib-fgsscgib-w-vprok: Xt5Lb759f5099c107a2f7da3352fec7a9084cb0c
x-gib-gsscgib-w-vprok: 855out4JQAfqdiypaHTUiRQyc/At81yq8QVhAA/JwL3GMHV0+RK9EUVMG2XNcd76luO7vo2cErL82oVyNgjIpYogSuK9OO/TedvkaI7/aISqVUEXW/zURuTFH84VV60AXxRi0xJ0PsqAxSREVEoVysNdHDbHyxYwp+udElI8chT6QkZGkKMQSqhze50aPnEj+clHi6PyUCpIOjyLc7TC8ndVAdJDrAeVID3WfeWXS5DzGGaNtFV5+NVYS3YzaSPeKmHZLe2Tiw==
x-xsrf-token: eyJpdiI6Ik9tWFUrYlM0dWR1ajI4MmszcDZSTFE9PSIsInZhbHVlIjoieGZzRmtWTDNXYlY0M2I0cGNcL2lIcXVxZnV5cmROcjFrenlzd0dzOFBja3hSaHlMRGNyXC9aN1R4ZG5kU1wvbjBhOFNxXC9kaXRcL3Y4V28yeWhGdEROZnQ5Zz09IiwibWFjIjoiYjhhZTI3ODg2YjU5ZmNlY2NkMDk1MGM5ZGE5YWRiOTUyMGFiNjBhNTUzYTI1MGFhZTg1MWEzMDQwNGJjMTgzYyJ9"""

In [153]:
headers = parse_header(headers)

In [87]:
import os

os.getcwd()

'/Users/borokoko/Crawlers'

In [154]:
with open("crawlers/Vprok/cfg/headers.json", "w") as file:
    json.dump(headers, file)

In [167]:
href = "/catalog/3547/skidki"

payload = json.dumps(
    {"noRedirect":True,"url": href}
)
    
category_id = re.search("\d+", href)[0]

params = {
    "sort": "popularity_desc",
    "limit": "60",
    "page": 35,
}

resp = requests.post(
    f"https://www.vprok.ru/web/api/v1/catalog/category/{category_id}?{urlencode(params)}",
#         params=params,
    data=payload,
    headers=headers
).json()

In [178]:
def nav_json(data, key_list):
    for key in key_list:
        if data:
            if key in data:
                data = data[key]
        else:
            return None
    return data

In [180]:
nav_json(resp["products"][46], ["category", "name"])

'Мясо'

In [177]:
resp["products"][47]

{'productId': 432009,
 'url': '/product/bitey-t-a-bit-bat-yabl-vish-fryag-25g--432009',
 'name': 'Батончик Bitey Яблоко-вишня без глютена 25г',
 'images': [{'name': 'фото 1',
   'url': 'https://media.vprok.ru/products/<SIZE>/hm/mm/khdx5mr7xfyprvro2bjzbd55dcwammhm.jpeg'},
  {'name': 'фото 2',
   'url': 'https://media.vprok.ru/products/<SIZE>/i7/qz/pgotkigyjgitxwqdregg4mep3iluqzi7.jpeg'}],
 'quantumImages': [],
 'isNew': False,
 'isBought': False,
 'isAdult': False,
 'isAlcohol': False,
 'isFractional': False,
 'isFractionalNominal': False,
 'isSpecialPrice': False,
 'isPersonalPrice': False,
 'rating': 4.8,
 'reviews': 13,
 'isFavorite': False,
 'isSubscribed': False,
 'price': 48.9,
 'oldPrice': 65.9,
 'unitPrice': 0,
 'discount': 17,
 'discountPercent': 26,
 'fractionTextPrice': 'шт',
 'unitFractionTextPrice': 'шт',
 'fractionText': 'шт',
 'qtyMin': 1,
 'qtyMax': 99000,
 'fraction': 1,
 'quantum': None,
 'activityStatus': 'available',
 'delivery': {'minProductDeliveryDate': '2023-02-2

In [173]:
for i, product in enumerate(resp["products"]):
    if not product["category"]:
        print(i)
        print(product)

47
{'productId': 432009, 'url': '/product/bitey-t-a-bit-bat-yabl-vish-fryag-25g--432009', 'name': 'Батончик Bitey Яблоко-вишня без глютена 25г', 'images': [{'name': 'фото 1', 'url': 'https://media.vprok.ru/products/<SIZE>/hm/mm/khdx5mr7xfyprvro2bjzbd55dcwammhm.jpeg'}, {'name': 'фото 2', 'url': 'https://media.vprok.ru/products/<SIZE>/i7/qz/pgotkigyjgitxwqdregg4mep3iluqzi7.jpeg'}], 'quantumImages': [], 'isNew': False, 'isBought': False, 'isAdult': False, 'isAlcohol': False, 'isFractional': False, 'isFractionalNominal': False, 'isSpecialPrice': False, 'isPersonalPrice': False, 'rating': 4.8, 'reviews': 13, 'isFavorite': False, 'isSubscribed': False, 'price': 48.9, 'oldPrice': 65.9, 'unitPrice': 0, 'discount': 17, 'discountPercent': 26, 'fractionTextPrice': 'шт', 'unitFractionTextPrice': 'шт', 'fractionText': 'шт', 'qtyMin': 1, 'qtyMax': 99000, 'fraction': 1, 'quantum': None, 'activityStatus': 'available', 'delivery': {'minProductDeliveryDate': '2023-02-21T00:00:00+00:00', 'lastMileType': 

In [132]:
for href in tqdm(categories):
    
    payload = json.dumps(
        {"noRedirect":True,"url":"/catalog/3547/skidki"}
    )
    
    category_id = re.search("\d+", href)[0]

    params = {
        "sort": "popularity_desc",
        "limit": "60",
        "page": 1,
    }

    resp = requests.post(
        f"https://www.vprok.ru/web/api/v1/catalog/category/{3547}?{urlencode(params)}",
#         params=params,
        data=payload,
        headers=headers
    ).json()

100%|███████████████████████████████████████████| 23/23 [00:12<00:00,  1.86it/s]


In [97]:
/catalog/3547/skidki

'https://www.vprok.ru/web/api/v1/catalog/category/5175?sort=popularity_desc&limit=60&page=1'

In [151]:
href

'/catalog/1997/alkogol'

In [147]:
resp["categories"]

[{'id': 1, 'name': 'Каталог', 'slug': '', 'itemCount': 1177, 'parentId': 0},
 {'id': 1997,
  'name': 'Алкоголь',
  'slug': 'alkogol',
  'itemCount': 1177,
  'parentId': 1},
 {'id': 2104,
  'name': 'Пиво',
  'slug': 'pivo',
  'itemCount': 219,
  'parentId': 1997},
 {'id': 1999,
  'name': 'Вино',
  'slug': 'vino',
  'itemCount': 204,
  'parentId': 1997},
 {'id': 2114,
  'name': 'Водка',
  'slug': 'vodka',
  'itemCount': 187,
  'parentId': 1997},
 {'id': 2117,
  'name': 'Коньяк и коньячные напитки',
  'slug': 'konyak-i-konyachnye-napitki',
  'itemCount': 181,
  'parentId': 1997},
 {'id': 2130,
  'name': 'Коньяк',
  'slug': 'konyak',
  'itemCount': 161,
  'parentId': 2117},
 {'id': 2123,
  'name': 'Виски, Бурбон',
  'slug': 'viski-burbon',
  'itemCount': 153,
  'parentId': 1997},
 {'id': 2000,
  'name': 'Ликеро-водочные напитки',
  'slug': 'likero-vodochnye-napitki',
  'itemCount': 141,
  'parentId': 1997},
 {'id': 2126,
  'name': 'Настойки',
  'slug': 'nastoyki',
  'itemCount': 55,
  'par

In [150]:
resp["products"][2]

{'productId': 306557,
 'url': '/product/strongbow-sidr-strongbou-slad-gaz-4-5-0-4l--306557',
 'name': 'Сидр Strongbow сладкий 4.5% 0.4л',
 'images': [{'name': 'фото 1',
   'url': 'https://media.vprok.ru/products/<SIZE>/jd/54/kdmf3d7n4c5n2ragihigqwk76kvx54jd.jpeg'}],
 'quantumImages': [],
 'isNew': False,
 'isBought': False,
 'isAdult': True,
 'isAlcohol': True,
 'isFractional': False,
 'isFractionalNominal': False,
 'isSpecialPrice': False,
 'isPersonalPrice': False,
 'rating': 4.7,
 'reviews': 56,
 'isFavorite': False,
 'isSubscribed': False,
 'price': 71.9,
 'oldPrice': 0,
 'unitPrice': 0,
 'discount': 0,
 'discountPercent': 0,
 'fractionTextPrice': 'шт',
 'unitFractionTextPrice': 'шт',
 'fractionText': 'шт',
 'qtyMin': 1,
 'qtyMax': 99000,
 'fraction': 1,
 'quantum': None,
 'activityStatus': 'available',
 'delivery': {'minProductDeliveryDate': '2023-02-22T00:00:00+00:00',
  'lastMileType': 'courier',
  'lastMileTypeId': 1,
  'expressAvailable': False,
  'courierAvailable': True},
 '

In [267]:
# resp["products"]

In [432]:
class Vprok:
    
    def __init__(self, hrefs):
        self.hrefs = hrefs
        
    def parse_json(self, resp):
        
        dom = HTMLParser(resp["html"])
        products = dom.css("ul#catalogItems > li")
        
        for product in products:
    
            title = product.css_first("div.xf-product-title > a")
            if title:
                title = title.text().strip()

            prev_price = product.css_first("div.xf-product-cost__prev")
            if prev_price:
                prev_price = re.sub("[^\d\.,]", "", prev_price.text())

            cur_price = product.css_first("div.xf-product-cost__current")
            if cur_price:
                cur_price = re.sub("[^\d\.,]", "", cur_price.text())
            
            yield {
                "name": title,
                "price": cur_price,
                "oldPrice": prev_price,
            }
        
    def query(self, href):
        
        category_id = re.search("\d+", href)[0]
        offset, n_page = 0, 1
    
        while True:
            
            params = {
                "use_brand_zone": "1",
                "limit": "30",
                "category": category_id,
                "page": n_page,
                "sort": "rate_desc",
                "no_html": "false"
            }
                
            print(f"Sending {n_page} request")

            resp = requests.get(
                f"https://www.vprok.ru/webapi/v1/category-search/{category_id}",
                headers=headers,
                params=params
            ).json()
            
            for product in self.parse_json(resp):
                self.data.append(product)
            
            time.sleep(1)
            
            offset += 30
            n_page += 1
            
            if offset >= resp["count"]:
                break
            
    
    def query_all(self):
        self.data = []
        
        for href in self.hrefs:
            self.query(href)

In [461]:
hrefs = [href for href in hrefs if "catalog" in href]

# parser = Vprok(hrefs)
# parser.query_all()

In [463]:
with open("cfg/categories.json", "w") as file:
    json.dump({"categories": hrefs}, file)

In [439]:
with open("data.json", "w") as file:
    json.dump(parser.data, file)

In [364]:
import pandas as pd

In [442]:
df = pd.read_json("data.json")
df.head(2)

Unnamed: 0,name,price,oldPrice
0,Помидоры Розовые 500г упаковка,159,
1,Форель охлажденная потрошеная 1-2кг,639,899.0


In [443]:
df[df.price.notna()]

Unnamed: 0,name,price,oldPrice
0,Помидоры Розовые 500г упаковка,159,
1,Форель охлажденная потрошеная 1-2кг,639,899
2,Окорок задний свиной 0.4-0.7кг,299,
3,Масло сливочное Экомилк 82.5% 180г,1299,199
4,Молоко ЭкоНива ультрапастеризованное 3.2% 1л,799,
...,...,...,...
41928,Вентилятор настольный Centek CT-5003 White 19с...,999,
41929,Вентилятор Centek CT-5040 настольный 14см,699,
41930,Блендер Moulinex LM16L110,8999,
42112,Напиток Bosca Anniversary белый полусухой беза...,566,


In [446]:
cookie_str = "luuid=b6415383-72e6-46fb-beb2-8c8c1394cb8d; suuid=d4cdf4a8-997e-406f-b961-cc6ba95b832b; split_segment=9; split_segment_amount=11; tmr_lvid=f140f2d66e129d40e4de4201c33196b8; tmr_lvidTS=1675346400028; _ym_uid=1675346400242416480; _ym_d=1675346400; flocktory-uuid=0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0; iap.uid=9365c0a9c73945d28c6909efdbe2b002; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; noHouse=0; fcf=3; isUserAgreeCookiesPolicy=true; hide_banner_block_1=true; ngenix_valid=633e3888e19035e396ed68f8522b7e42; is_pickup=0; addressChange=1; pickupZone=null; pickupAvailable=0; _slid=63e4c981c29837d7f10a66f9; _slid_server=63e4c981c29837d7f10a66f9; _gid=GA1.2.373746217.1676908872; _ym_isad=2; gsscgib-w-vprok=cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtnjQweqUPvTUcUaipmODgwNr3MYRtthX0Eeert0sar81QMOCSn8ABL34NzGj82bGgZAQjRFYbY4rQSMeriXmcsQ+4luFl1pxBOpnXAbTONCssgxbiJeg==; gsscgib-w-vprok=cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtnjQweqUPvTUcUaipmODgwNr3MYRtthX0Eeert0sar81QMOCSn8ABL34NzGj82bGgZAQjRFYbY4rQSMeriXmcsQ+4luFl1pxBOpnXAbTONCssgxbiJeg==; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; access_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiI5IiwianRpIjoidXVpZGI2NDE1MzgzLTcyZTYtNDZmYi1iZWIyLThjOGMxMzk0Y2I4ZDZlNjE4MTAxZGY4Y2QyODM1YmM3MTg4MGRmZjNiMzM5MDNlMGViZTIiLCJpYXQiOjE2NzY5MjM1MTAuMDY2NDAxLCJuYmYiOjE2NzY5MjM1MTAuMDY2NDAzLCJleHAiOjE2NzY5Mzc5MTAuMDUxMzU0LCJzdWIiOiIiLCJzY29wZXMiOltdLCJzcGxpdF9zZWdtZW50Ijo5fQ.nVFAsOdeRecsDj-Y7All8KZ7oNGG94QqGwm24qiFG7Lb4_nTCG_eKjR4gG4Ir3BKjdKG-jfeLlkV5MGGuJXNsbdMqIZ-EE8Ec5Iiem0H0N4b7NtuHGmo-V6p-s_ymSXV_LHOfCCYAuajmj3vpnd_hr-_Pbv2J_UEvU6WU3yhdgisU-fUQS9L2imtQuwypphommHmGm8pVbR5HfjJS7h9hRVMUDbCIbQMb2mVSQbtV8iqXBGqzp6i3ZDEU41sGLGBqLnFBZjf-bPxeYZFLJUVMmUK2gsV9f2TITyp1NXfcJ2OofTKlOm_yFS4HyTWwAsPhB_B2l5_YKMOMx9q_ZEpU1EBcmI9L0PNtAOL58e3hEGU6iJLQGh4UoNIaCKHuV6Tw8wM39mPpkdD_-OBtjHcPJBHbddGWM6cwxY_yfF-doa0Ppk2hwlwkS6VAQVmi28Pl-z0wADqOM7chXg4s3438dQIudR0oGT3BbBbbiuIgrqTWDSd84db3BVE78lzu1CNEF8axWqU5MFeQtk86dvQAr0aooq7Bt_gy7Eyu6wCKdc7OF8CfHTbiN4ik5ZiLrmplmnXGtu30cOPkxSyLJVEXrOfuYbTCdEjwhZWzprfIs9sTwV4diAG0oHm11ejM2yYmtDOGQU2J0ZBUAo2LpTJLHh21OQFh1UxNBDIbvukVWo; fgsscgib-w-vprok=Z6P112f585c00138b833091d58327ea2c23c6cdd; fgsscgib-w-vprok=Z6P112f585c00138b833091d58327ea2c23c6cdd; x-next-route-destination=%2Fcatalog%2F1307%2Fmyaso-ptitsa-delikatesy%3Fsort%3Dpopularity_desc%26page%3D4; regionChange=1; _slsession=66873AAE-28CF-46B8-8D99-E5FC835EAD57; _ym_visorc=b; region=2; deliveryTypeId=1; standardShopId=2246; _ga=GA1.2.880547947.1675346399; mindboxDeviceUUID=4929db8d-93f6-47d7-ac97-fa45d82811c4; directCrm-session=%7B%22deviceGuid%22%3A%224929db8d-93f6-47d7-ac97-fa45d82811c4%22%7D; tmr_detect=0%7C1676932738858; address=%D0%A1%D0%B0%D0%BD%D0%BA%D1%82-%D0%9F%D0%B5%D1%82%D0%B5%D1%80%D0%B1%D1%83%D1%80%D0%B3%2C%20%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; short_address=%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; flat=12; house=14; latLng=60.038537%2C30.344641; addressZone=16120; preview_address=%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; XSRF-TOKEN=eyJpdiI6IjBDdUtIOGRGZ2FJQVZIQWhaXC9VVTZRPT0iLCJ2YWx1ZSI6IkRkcnY4OWZKMVY2TDJZazhyYklyTE1UVnlWXC91dDNFRTdlRFhyTklqeldTa1UyUzVcLzhobEYxaDAxSFV3WmNKQktFazlnSThabXhzK1FyUUloK2JBSWc9PSIsIm1hYyI6ImQxMTU4YTQ4ODEzOWZiNjM5YmUzNDZiOWQyYzQxYTQwNDkxMmNiOWJkMTMxMWI2NDQ3MTBmM2M3ZWVkNzQ1YTMifQ%3D%3D; address_id=503576999; isHouse=eyJpdiI6ImtnMCtDaWhvVkthZXdkaWliYjhSQnc9PSIsInZhbHVlIjoiUXVOOG1rWkdIQUZCaFhHcUtGU1IyQT09IiwibWFjIjoiYWU1YjM2ZDczODkyMWY4YTU0NGRlNmUxYmZlYjYxMzliMzQ3ZmFjODM2MzhkZTJkMTFiNDIzNDlhNTFjNTM5NSJ9; deliveryZone=%D0%93%D0%9E%D0%A0%D0%9E%D0%94%20%D0%A1%D0%9F%D0%91%20%D0%A1%D0%B5%D0%B2%D0%B5%D1%80; shop=2246; aid=eyJpdiI6IklHbzNIZFwvXC8rUThMVUxnS21jMWdHUT09IiwidmFsdWUiOiJNMVJXZUpsUjM0aWxDaE1HenBWOGRmSytNdE03Y29abHppcElBdDFLampERkNobFVWenZKQ1wvVzgwcDRkMHdreU5mdGZaRFo0aHF3ZlwvNERrUUJkK2dRPT0iLCJtYWMiOiIzZTc5M2UyMWEyZjY3MjZmNDhlMGM4YzQ1NWQwMDEzNDhhMjgyN2ZjMDcxNWEyOTVjYTBjMzY5MjRjZGIyZDc3In0%3D; _ga_B122VKXXJE=GS1.1.1676932447.20.1.1676932793.2.0.0"

In [449]:
cookies = {}

for el in cookie_str.split("; "):
    key, val = el.split("=", 1)
    cookies[key] = val
    
cookies

{'luuid': 'b6415383-72e6-46fb-beb2-8c8c1394cb8d',
 'suuid': 'd4cdf4a8-997e-406f-b961-cc6ba95b832b',
 'split_segment': '9',
 'split_segment_amount': '11',
 'tmr_lvid': 'f140f2d66e129d40e4de4201c33196b8',
 'tmr_lvidTS': '1675346400028',
 '_ym_uid': '1675346400242416480',
 '_ym_d': '1675346400',
 'flocktory-uuid': '0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0',
 'iap.uid': '9365c0a9c73945d28c6909efdbe2b002',
 '__zzatgib-w-vprok': 'MDA0dBA=Fz2+aQ==',
 'noHouse': '0',
 'fcf': '3',
 'isUserAgreeCookiesPolicy': 'true',
 'hide_banner_block_1': 'true',
 'ngenix_valid': '633e3888e19035e396ed68f8522b7e42',
 'is_pickup': '0',
 'addressChange': '1',
 'pickupZone': 'null',
 'pickupAvailable': '0',
 '_slid': '63e4c981c29837d7f10a66f9',
 '_slid_server': '63e4c981c29837d7f10a66f9',
 '_gid': 'GA1.2.373746217.1676908872',
 '_ym_isad': '2',
 'gsscgib-w-vprok': 'cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtn

In [2]:
import os
import pandas as pd

In [4]:
os.chdir("/Users/borokoko/Crawlers/")

In [182]:
df_1 = pd.read_json("data/data_hudozhnikov.json")
df_1 = df_1.drop_duplicates()

In [190]:
df_1.groupby("category")["price"].mean().to_frame()

Unnamed: 0_level_0,price
category,Unnamed: 1_level_1
Алкоголь,535.975
Аптечка,1592.38704
Бытовая техника,2770.300719
Бытовая химия и гигиена,422.037454
"Воды, соки, напитки",135.263109
Готовая еда,190.502613
Замороженные продукты,318.995598
Здоровое питание,196.044946
Зоотовары,496.417637
Каталог,276.264672


In [205]:
df_disc = df_1[df_1["price"].notna() & df_1["oldPrice"] != 0].copy()

df_disc["discount"] = (df_disc["oldPrice"] - df_disc["price"]) / df_disc["price"]

In [209]:
df_disc.groupby("category")["discount"].mean().to_frame()

Unnamed: 0_level_0,discount
category,Unnamed: 1_level_1
Аптечка,0.193966
Бытовая техника,0.529281
Бытовая химия и гигиена,0.615875
"Воды, соки, напитки",0.338813
Готовая еда,0.248852
Замороженные продукты,0.357785
Здоровое питание,0.428619
Зоотовары,0.288556
Каталог,0.347648
Красота и здоровье,0.433481
