In [46]:
import requests
import json
import re
import pandas as pd
import nest_asyncio
import time

from urllib.parse import urlencode

from tqdm import tqdm

from playwright.sync_api import sync_playwright
from selenium import webdriver
from selenium.webdriver.common.by import By

from selectolax.parser import HTMLParser

In [224]:
nest_asyncio.apply()

In [51]:
def parse_header(raw_header: str):
    header = dict()

    for line in raw_header.split("\n"):

        if line.startswith(":"):
            a, b = line[1:].split(":", 1)
            a = f":{a}"
        else:
            a, b = line.split(":",1)

        header[a.strip()] = b.strip()

    return header

<h3>Retrieve categories</h3>

In [72]:
category_html = """<div class="CatalogMenu_parents__Krpe1" bis_skin_checked="1"><a class="CatalogMenuLink_parentLink__5IG3T CatalogMenuLink_isActive__acRjg" href="/catalog/3547/skidki"><img class="CatalogMenuLink_icon__Fbn09" src="https://media.vprok.ru/content/orig/as/az/i67wlou2usg4b27omzqrixpd2uedazas.svg" alt="Скидки">Скидки</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/6736/novinki">Новинки</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1301/ovoschi-frukty-griby">Овощи, фрукты, ягоды</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1303/moloko-syr-yaytsa">Молоко, сыр, яйца</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/2726/23-fevralya">23 февраля</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1307/myaso-ptitsa-delikatesy">Мясо, птица, колбасы</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1304/ryba-i-moreprodukty">Рыба, икра </a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/5175/gotovaya-eda">Готовая еда</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1312/soki-vody-napitki">Воды, соки, напитки</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1305/tovary-dlya-mam-i-detey">Товары для мам и детей</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1306/krasota-gigiena-bytovaya-himiya">Красота и здоровье</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/4371/chipsy-sneki-suhariki">Чипсы, снеки, орехи</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/4019/sladosti-i-sneki">Сладости</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1300/makarony-krupy-spetsii">Макароны, крупы, специи</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1310/konservy-orehi-sousy">Соусы и консервация</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1309/hleb-sladosti-sneki">Хлеб и выпечка</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1302/kofe-chay-sahar">Чай, кофе, сахар</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1311/zamorojennye-produkty">Замороженные продукты</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/3453/zdorovoe-pitanie">Здоровое питание</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/2348/bytovaya-himiya-i-hoztovary">Бытовая химия и гигиена</a><a class="CatalogMenuLink_parentLink__5IG3T CatalogMenuLink_isBold__SVw8F" href="https://zoo.vprok.ru/">Зоотовары</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/promo/tovary-dlya-doma">Товары для дома и дачи</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/4450/aptechka">Аптечка</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/2561/bytovaya-tehnika">Бытовая техника</a><a class="CatalogMenuLink_parentLink__5IG3T" href="/catalog/1997/alkogol">Алкоголь</a></div>"""

In [74]:
dom = HTMLParser(category_html)
hrefs = [el.attributes["href"] for el in dom.css("a.CatalogMenuLink_parentLink__5IG3T")]

In [76]:
categories = [
    el for el in hrefs if "catalog" in el
]

<h3>Promo pages</h3>

In [50]:
headers = """accept: application/json, text/plain, */*
accept-encoding: gzip, deflate, br
accept-language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7
cookie: luuid=b6415383-72e6-46fb-beb2-8c8c1394cb8d; suuid=d4cdf4a8-997e-406f-b961-cc6ba95b832b; split_segment=9; split_segment_amount=11; tmr_lvid=f140f2d66e129d40e4de4201c33196b8; tmr_lvidTS=1675346400028; _ym_uid=1675346400242416480; _ym_d=1675346400; flocktory-uuid=0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0; iap.uid=9365c0a9c73945d28c6909efdbe2b002; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; noHouse=0; fcf=3; isUserAgreeCookiesPolicy=true; hide_banner_block_1=true; ngenix_valid=633e3888e19035e396ed68f8522b7e42; addressChange=1; pickupAvailable=0; _slid=63e4c981c29837d7f10a66f9; _slid_server=63e4c981c29837d7f10a66f9; regionChange=1; region=2; is_pickup=0; deliveryTypeId=1; standardShopId=2246; has_elevator=0; is_house=0; _gid=GA1.2.1986293396.1677079658; flat=2; access_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiI5IiwianRpIjoidXVpZGI2NDE1MzgzLTcyZTYtNDZmYi1iZWIyLThjOGMxMzk0Y2I4ZDUzMTdlZDMwNzg0MDNlZTNiNTliNWI3ZmI4ZmI3MmY4ZDUxNjkyOGIiLCJpYXQiOjE2NzcxNDcyMzYuNjYyNDE4LCJuYmYiOjE2NzcxNDcyMzYuNjYyNDIxLCJleHAiOjE2NzcxNjE2MzYuNjU2NjU0LCJzdWIiOiIiLCJzY29wZXMiOltdLCJzcGxpdF9zZWdtZW50Ijo5fQ.ls6DK6ItaiJjBeXrzJuFquHcLeg-93dR86kJl7nTGGOyF4q-pdwWbuJtWVyvBTVequ7nfVShnu2f1UR8b3l-PoKLEuz9Jky914njeqijtSFyeioZzG4iGgNTRTp9udEZZzqM5KYX2aJXjy9dNkuefnQyFWri2LXotUc2vMscOeOfbSRXRkwh0GAnnDGa4-lOTp7tMSEimACh0OadWqcgZwmObGaV6EMSoKtQtVvXH-qb6lXhTheKLWHKtf_sS-YIrxuT0xLDJtr8sZcDg-QVqhnPKsuAf8WydM3Bm4qwv4FeAiDXGp-eNRt-QiCtCoIIX0rLPHot4Vh6yfnARSiEVtR3s4OtqiB6zWQuv4UHs4cZ5BZH0nPDnL-995FLcqr3Hf4iHbyFCPbIe5Arxudb9TQhJH3dz61X62ILEI5FA0Um-7wODQxqwkTTH2XGcG8utrF9T3WnQJoKroubQxhw0VyHZJow0c-rotg4kkxCH-s-ftAFzYGgFPtfWFF7foa2XZ5i4FTy21KNbcj5rDwnmJp8PnYjj2VCORYkaq4z8bsjXZ6tOdhzQ_w8G-IycJBGRs0T3M5GXh9Z-2FyQ3J-5pdUjzqiWwM-mK2oMVDd0ypceC8bunvRPhmDc2iViAtxOj9jwkLDXpfQGDfWpYA3L-mk7hEQCMTLIcTiqio4M3g; _ym_isad=2; address=%D0%A1%D0%B0%D0%BD%D0%BA%D1%82-%D0%9F%D0%B5%D1%82%D0%B5%D1%80%D0%B1%D1%83%D1%80%D0%B3%2C%20%D0%9D%D0%B0%D1%80%D0%BE%D0%B4%D0%BD%D0%B0%D1%8F%20%D1%83%D0%BB%D0%B8%D1%86%D0%B0%2C%2016; latLng=59.880979%2C30.464351; short_address=%D0%9D%D0%B0%D1%80%D0%BE%D0%B4%D0%BD%D0%B0%D1%8F%20%D1%83%D0%BB%D0%B8%D1%86%D0%B0%2C%2016; house=16; address_id=510727731; addressZone=16123; deliveryZone=%D0%93%D0%9E%D0%A0%D0%9E%D0%94%20%D0%A1%D0%9F%D0%91%20%D0%AE%D0%B3%D0%BE-%D0%92%D0%BE%D1%81%D1%82%D0%BE%D0%BA; shop=2246; x-next-route-destination=%2Fcatalog%2F1301%2Fovoschi-frukty-griby%3Fsort%3Dpopularity_desc%26page%3D5; _gat_UA-93122031-1=1; ngenix_jscv_a68b51100641=cookie_signature=eVYbcC1P7H84Ozis3KaCnAzH%2Frg%3D&cookie_expires=1677164684; XSRF-TOKEN=eyJpdiI6IlwvWk9EMmpBXC9hQVVuenZTUmM2WWExUT09IiwidmFsdWUiOiIwTzNKNWVwa0R1bmlYMjZnMElmc3RTaUlxVEFDZ3d3bXNQUVoyNlBBTEh5ZE1BUldJeDdYRExnRHlMVnVyWkV2SlU3N3R6ZWQzRlVvMkZBejlNdDAydz09IiwibWFjIjoiNTU1M2IwZWU0NzI3OTY0NWY1ZDUxMTkyNGM4MDljZDc4NDY0NjVkNGU2M2RiYWZkYWNjMWNjN2I0NWMxN2EwNCJ9; isHouse=eyJpdiI6ImYzUjMwTXVMSVNiNVBTaU9valwvcXFBPT0iLCJ2YWx1ZSI6IkthNUE0SUlmWlpsT1kxR0hFT1lSVUE9PSIsIm1hYyI6ImRlMmY5Mzg1MjIyN2E3Y2E2YTdlMmZiYjZkYjVlNjM2YzZkYmFlMjgwNjM0NWM1M2YwMjdiOGM3ZjFkZWYzODcifQ%3D%3D; aid=eyJpdiI6IlAzWWtqM0dhNFd1V3NVcEhVdzVUXC9nPT0iLCJ2YWx1ZSI6IlBncXhwblhFcEplcnVTVXFOTWdVQmdzckI2bTJGaWJFcHpQRzM3SEt5RVBSOWhBbVV2bFJUSVVMQWQ5dHdEVmp3YmRKT1ltR08ra1RpRDZqbFJKckVnPT0iLCJtYWMiOiJiMWNjNDRiYmIyNDI5MjhkMGQ2OGM0OTQyMWJmNTYyNWQwMTIwYzg2MmI4YzhkMWYzNDhkMTFmNTNmMWE1ZGZkIn0%3D; _ga_B122VKXXJE=GS1.1.1677161084.33.1.1677161097.47.0.0; _ga=GA1.2.880547947.1675346399; cfidsgib-w-vprok=bdMgkwp6y5sVBdNNW1eTBl8T2je+sGvkFGEo2H7FfdLXUmrEjQN9PHsf/GBLEgKTuQMWyWALogzkPXN3Mh+tfmcAKXeOuIa2pU7lKlv12Bq7sivaSWGOQbypJMUkJ5AW2Tn7t+TPKDDys7j4SzSiQHryvXpbnTbFQdaRXXo=; cfidsgib-w-vprok=bdMgkwp6y5sVBdNNW1eTBl8T2je+sGvkFGEo2H7FfdLXUmrEjQN9PHsf/GBLEgKTuQMWyWALogzkPXN3Mh+tfmcAKXeOuIa2pU7lKlv12Bq7sivaSWGOQbypJMUkJ5AW2Tn7t+TPKDDys7j4SzSiQHryvXpbnTbFQdaRXXo=; cfidsgib-w-vprok=bdMgkwp6y5sVBdNNW1eTBl8T2je+sGvkFGEo2H7FfdLXUmrEjQN9PHsf/GBLEgKTuQMWyWALogzkPXN3Mh+tfmcAKXeOuIa2pU7lKlv12Bq7sivaSWGOQbypJMUkJ5AW2Tn7t+TPKDDys7j4SzSiQHryvXpbnTbFQdaRXXo=; gsscgib-w-vprok=uNoYWkKOcBU5a1sKkuZf1krPo6uD3lo86k7B14e5p5ZItdkVAt1DrUFL9J91PGaHeiNwKpowEdsOic7R6rlmReTf14vuwYc4I50EaaZCW6jlPgSXzso7dbyKvBtTDn5Ljd3/0szrxzgrBsnq3T0ul8X6xDpHk5amip4dXfmB0+V62ycxR+a0+kTiyywMlq3AXea9Ec/8FLRTFaAS0TsT5fwwH/LQ2a2ERHgoyvbEwdxmVDw5lGx9oHzQbWAx8GkVSCZQHXTdrSTVk5cOqx4M; gsscgib-w-vprok=uNoYWkKOcBU5a1sKkuZf1krPo6uD3lo86k7B14e5p5ZItdkVAt1DrUFL9J91PGaHeiNwKpowEdsOic7R6rlmReTf14vuwYc4I50EaaZCW6jlPgSXzso7dbyKvBtTDn5Ljd3/0szrxzgrBsnq3T0ul8X6xDpHk5amip4dXfmB0+V62ycxR+a0+kTiyywMlq3AXea9Ec/8FLRTFaAS0TsT5fwwH/LQ2a2ERHgoyvbEwdxmVDw5lGx9oHzQbWAx8GkVSCZQHXTdrSTVk5cOqx4M; _ym_visorc=w; mindboxDeviceUUID=4929db8d-93f6-47d7-ac97-fa45d82811c4; directCrm-session=%7B%22deviceGuid%22%3A%224929db8d-93f6-47d7-ac97-fa45d82811c4%22%7D; tmr_detect=0%7C1677161101520; _POBP_s=rum=1&id=f730c754-4e10-4c70-a732-db6517dbf713&created=1677161062676&expire=1677162015591; fgsscgib-w-vprok=hBtQ97a63b34b641364b314cb9d7bf18c811ca0e; fgsscgib-w-vprok=hBtQ97a63b34b641364b314cb9d7bf18c811ca0e
referer: https://www.vprok.ru/promo/napitki?page=4&sort=rate_desc
sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"
sec-ch-ua-mobile: ?0
sec-ch-ua-platform: "macOS"
sec-fetch-dest: empty
sec-fetch-mode: cors
sec-fetch-site: same-origin
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
x-api-context-address-id: 503576999
x-api-context-delivery-type-id: 1
x-api-context-region-id: 2
x-api-context-shop-id: 2246
x-xsrf-token: eyJpdiI6IjBDdUtIOGRGZ2FJQVZIQWhaXC9VVTZRPT0iLCJ2YWx1ZSI6IkRkcnY4OWZKMVY2TDJZazhyYklyTE1UVnlWXC91dDNFRTdlRFhyTklqeldTa1UyUzVcLzhobEYxaDAxSFV3WmNKQktFazlnSThabXhzK1FyUUloK2JBSWc9PSIsIm1hYyI6ImQxMTU4YTQ4ODEzOWZiNjM5YmUzNDZiOWQyYzQxYTQwNDkxMmNiOWJkMTMxMWI2NDQ3MTBmM2M3ZWVkNzQ1YTMifQ=="""

In [456]:
with open("headers.json", "w") as file:
    json.dump(headers, file)

In [53]:
hrefs

NameError: name 'hrefs' is not defined

In [16]:
headers = parse_header(headers)

https://www.vprok.ru/catalog/1307/myaso-ptitsa-delikatesy

In [49]:
params = {
    "use_brand_zone": "1",
    "limit": "30",
    "category": "1307",
    "page": "1",
    "no_html": "false"
}

resp = requests.get(
    "https://www.vprok.ru/webapi/v1/category-search/1307",
    headers=headers,
    params=params
)

AttributeError: 'str' object has no attribute 'items'

In [25]:
dom = HTMLParser(resp["html"])

In [26]:
products = dom.css("ul#catalogItems > li")

data = {
    "title": [],
    "prev_price": [],
    "cur_price": []
}

for product in products:
    title = product.css_first("div.xf-product-title > a")
    if title:
        title = title.text().strip()
        
    prev_price = product.css_first("div.xf-product-cost__prev")
    if prev_price:
        prev_price = re.sub("[^\d\.,]", "", prev_price.text())
        
    cur_price = product.css_first("div.xf-product-cost__current")
    if cur_price:
        cur_price = re.sub("[^\d\.,]", "", cur_price.text())
    
    data["title"].append(title)
    data["prev_price"].append(prev_price)
    data["cur_price"].append(cur_price)

In [27]:
df = pd.DataFrame(data)
df

Unnamed: 0,title,prev_price,cur_price
0,Окорок задний свиной 0.4-0.7кг,,419
1,Свинина духовая без кости 0.4-0.7кг,399.0,299
2,Филе куриное 0.8-1.2кг,389.0,289
3,Филе грудки индейки Индилайт 500г,,330
4,Филе грудки Петелинка куриное 0.6-0.9 кг,429.0,349
5,Окорочка куриные,,249
6,Корейка свиная без кости 0.8-1.2кг,439.0,339
7,Колбаса Вязанка Классическая вареная 500г,,269
8,Филе индейки 0.8-1.2кг,,529
9,"Тушка цыпленка-бройлера Петруха 1.8-2,3кг",,210


<h3>Category pages</h3>

In [65]:
headers = """accept: application/json, text/plain, */*
accept-encoding: gzip, deflate, br
accept-language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7
content-length: 62
content-type: application/json
cookie: luuid=b6415383-72e6-46fb-beb2-8c8c1394cb8d; suuid=d4cdf4a8-997e-406f-b961-cc6ba95b832b; split_segment=9; split_segment_amount=11; tmr_lvid=f140f2d66e129d40e4de4201c33196b8; tmr_lvidTS=1675346400028; _ym_uid=1675346400242416480; _ym_d=1675346400; flocktory-uuid=0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0; iap.uid=9365c0a9c73945d28c6909efdbe2b002; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; noHouse=0; fcf=3; isUserAgreeCookiesPolicy=true; hide_banner_block_1=true; ngenix_valid=633e3888e19035e396ed68f8522b7e42; addressChange=1; pickupAvailable=0; _slid=63e4c981c29837d7f10a66f9; _slid_server=63e4c981c29837d7f10a66f9; regionChange=1; region=2; is_pickup=0; deliveryTypeId=1; standardShopId=2246; has_elevator=0; is_house=0; _gid=GA1.2.1986293396.1677079658; flat=2; _ym_isad=2; address=%D0%A1%D0%B0%D0%BD%D0%BA%D1%82-%D0%9F%D0%B5%D1%82%D0%B5%D1%80%D0%B1%D1%83%D1%80%D0%B3%2C%20%D0%9D%D0%B0%D1%80%D0%BE%D0%B4%D0%BD%D0%B0%D1%8F%20%D1%83%D0%BB%D0%B8%D1%86%D0%B0%2C%2016; latLng=59.880979%2C30.464351; short_address=%D0%9D%D0%B0%D1%80%D0%BE%D0%B4%D0%BD%D0%B0%D1%8F%20%D1%83%D0%BB%D0%B8%D1%86%D0%B0%2C%2016; house=16; address_id=510727731; addressZone=16123; deliveryZone=%D0%93%D0%9E%D0%A0%D0%9E%D0%94%20%D0%A1%D0%9F%D0%91%20%D0%AE%D0%B3%D0%BE-%D0%92%D0%BE%D1%81%D1%82%D0%BE%D0%BA; shop=2246; ngenix_jscv_a68b51100641=cookie_signature=eVYbcC1P7H84Ozis3KaCnAzH%2Frg%3D&cookie_expires=1677164684; _ga=GA1.2.880547947.1675346399; _ym_visorc=w; mindboxDeviceUUID=4929db8d-93f6-47d7-ac97-fa45d82811c4; directCrm-session=%7B%22deviceGuid%22%3A%224929db8d-93f6-47d7-ac97-fa45d82811c4%22%7D; tmr_detect=0%7C1677161101520; x-next-route-destination=%2Fcatalog%2F1301%2Fovoschi-frukty-griby%3Fsort%3Dpopularity_desc%26page%3D6; _ga_B122VKXXJE=GS1.1.1677161084.33.1.1677161120.24.0.0; gssc777781=; cfidsgib-w-vprok=6nLG8I8+fwiiHDIQsbXjJ7pvKyWmcYbtVtu3wEMrusfU1gFTaMQsbBK2R27uwl+VQ6KKR/Ue/U+iTOWWayDBfv7V6QjVxUU6rYehiweIlDcs2vBZ/X/5APO0t5deScnpsK2cc1EpoqEI0FW5/qAgkrYYA58QOXg8NLTtRu4=; cfidsgib-w-vprok=6nLG8I8+fwiiHDIQsbXjJ7pvKyWmcYbtVtu3wEMrusfU1gFTaMQsbBK2R27uwl+VQ6KKR/Ue/U+iTOWWayDBfv7V6QjVxUU6rYehiweIlDcs2vBZ/X/5APO0t5deScnpsK2cc1EpoqEI0FW5/qAgkrYYA58QOXg8NLTtRu4=; cfidsgib-w-vprok=6nLG8I8+fwiiHDIQsbXjJ7pvKyWmcYbtVtu3wEMrusfU1gFTaMQsbBK2R27uwl+VQ6KKR/Ue/U+iTOWWayDBfv7V6QjVxUU6rYehiweIlDcs2vBZ/X/5APO0t5deScnpsK2cc1EpoqEI0FW5/qAgkrYYA58QOXg8NLTtRu4=; gsscgib-w-vprok=qCB3awXvv2J6VdV91ssAECys3ifE3SOks/DZig/ItIDmmI1j4jBfKm69Ff7KLgp75Pssau3B3HeNjBXz5mMxLZ9njcFpsvtAC9a4nSv7Dksj/SSYO8wW0IUVrVfSz4jNXM82I5/LmSEKzQLVn8nlTEgmMTDzTjDcaO843eXQ/JAb+sHmnA6Gl4kc5je0MvYmSz4uD02bDNrjnIDtnhwxZqJ3G10HGSCBc5CSTTK/AJbI+ENWsF297c3zHlBxKAgPLPObWKbdSTtjZRZiaOKsow==; gsscgib-w-vprok=qCB3awXvv2J6VdV91ssAECys3ifE3SOks/DZig/ItIDmmI1j4jBfKm69Ff7KLgp75Pssau3B3HeNjBXz5mMxLZ9njcFpsvtAC9a4nSv7Dksj/SSYO8wW0IUVrVfSz4jNXM82I5/LmSEKzQLVn8nlTEgmMTDzTjDcaO843eXQ/JAb+sHmnA6Gl4kc5je0MvYmSz4uD02bDNrjnIDtnhwxZqJ3G10HGSCBc5CSTTK/AJbI+ENWsF297c3zHlBxKAgPLPObWKbdSTtjZRZiaOKsow==; _POBP_s=rum=1&id=f730c754-4e10-4c70-a732-db6517dbf713&created=1677161062676&expire=1677163161218; XSRF-TOKEN=eyJpdiI6InNjRWQycHZZd1EyaStrMHM0bko1K2c9PSIsInZhbHVlIjoiSlFOS2JKQ1ExN2tlUlJTUVwvbDlkNkJoeTdzbTNNc2JyY2EycExJYjdFQnc2Vm1ad1UySEJtdkhDalwvK3gwYVFFbXN2dXFhMUhpbCtDM0RKVUJLK0lHZz09IiwibWFjIjoiMGQwYTM2YzAwNWYzZjQ0NDlkOWU3ZjAyNGQ4ZWNiNDMxZTQ5MjgyODkyNzViOTdmNWM5NzA4YWQ5MjMyNjgxYyJ9; isHouse=eyJpdiI6InZHZTM0alRTbE1vRGF0QitXRTYrZ3c9PSIsInZhbHVlIjoieHMzbWx2RHBTaVJiV3lIbFFqVTZ1QT09IiwibWFjIjoiMGVmZmRmNmI5OTJmZDk0MTM5MDU3YTY4NWZhZmY1ZWI3YTA0ODE2MzY5NzM0OTRkZjU2MzE5ODZkMzc1MWJlOSJ9; access_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiI5IiwianRpIjoidXVpZGI2NDE1MzgzLTcyZTYtNDZmYi1iZWIyLThjOGMxMzk0Y2I4ZDZlNjFkNjZmNTM4N2JiZGRiM2ViODQzZjU5MDkwNmM0MDY5Y2I1ZjAiLCJpYXQiOjE2NzcxNjIyNjEuNzg0OTY5LCJuYmYiOjE2NzcxNjIyNjEuNzg0OTcyLCJleHAiOjE2NzcxNzY2NjEuNzgwMTgxLCJzdWIiOiIiLCJzY29wZXMiOltdLCJzcGxpdF9zZWdtZW50Ijo5fQ.BnbrbT9LrXy01uJN14GKnsUfIGUBtU6WnM933QqnluyYGJqZvOP8_VyprTFI_3O3H6f4tRIjXRwiJK6SB8RPigOyMNQ8_sin3gPoecAFokH1wAcvU6z2eveQUL6WB3aV9EClOQ0IUzJzFfMC7rx0nUI-v6v1fSHSR7lAoBQZBHUs_rkHOYwRg0GEz_fCMR783ip2p1Mz0R9Y0WDPtZWjj08hQfwQYwzr-bL_sCp-S4A9LT-56IlwnIQHhJ8113p87up9xEyAwIp62WbBU0-C17Sxn2HefpJc_6qq5oOfbRHhSCpfr0vjlm_8BB3tOk8VcK-zGMhh4lCmjwolqxMMsvseuJBbM1JduptItHLmpYuaBrIy1JYLrWZGqlnQIu3VMejad_XybqXRFJ9Zy6bXIn1324N0hlAHhxyB1vEk-JrvxC23iJGSONPk409rQEXqz_eHHfarTde8MNuyQVe6wi0rwGjcwwFvGQS8-34YfswyFYG9qnmP1nT72VSbOGl2Bt79vfujl942VGQk5WHUOaN_q8zWftDuMhQdQ-hzHIzE_1A0deqho28GPje7zhKTi_x-kBtCHebaxauyvK0pbdrXu82mfM0C-E4_7Abe8o8r_kpycaP993X09hwshZX1ebM7vMRumNqaTS22zKm-1GzkqDbQBBKgsNw3Y2R4dSc; aid=eyJpdiI6InBhWjFLVktOcFhqNnBsdVVtRXkwVmc9PSIsInZhbHVlIjoiYlwvWU8zMFFBcnpPczBvSUxoUTlZeU5EXC9FQnRjZm8xYStySnpDa1dYWGlrcDJcLzc4U0NDWGVcL2J0WUVzRGtKYjU5STcxODI4TGtzVVwvc1FWVWF6UFZGZz09IiwibWFjIjoiYzk0MzllZWQ4ZjYzN2UxN2M1ZGFmYjM4ZjZmNjg4NGM4NmY4YjcyNjQ2YWYxZTIxOTdhNTBiZGQyNTI2NjgyNiJ9; fgsscgib-w-vprok=1IF0a6dbdcf84f5be4e8cf4c92f6112d80c8bcd8; fgsscgib-w-vprok=1IF0a6dbdcf84f5be4e8cf4c92f6112d80c8bcd8
origin: https://www.vprok.ru
referer: https://www.vprok.ru/catalog/1301/ovoschi-frukty-griby?sort=popularity_desc&page=6
sec-ch-ua: "Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"
sec-ch-ua-mobile: ?0
sec-ch-ua-platform: "macOS"
sec-fetch-dest: empty
sec-fetch-mode: cors
sec-fetch-site: same-origin
user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
x-gib-fgsscgib-w-vprok: 1IF0a6dbdcf84f5be4e8cf4c92f6112d80c8bcd8
x-gib-gsscgib-w-vprok: qCB3awXvv2J6VdV91ssAECys3ifE3SOks/DZig/ItIDmmI1j4jBfKm69Ff7KLgp75Pssau3B3HeNjBXz5mMxLZ9njcFpsvtAC9a4nSv7Dksj/SSYO8wW0IUVrVfSz4jNXM82I5/LmSEKzQLVn8nlTEgmMTDzTjDcaO843eXQ/JAb+sHmnA6Gl4kc5je0MvYmSz4uD02bDNrjnIDtnhwxZqJ3G10HGSCBc5CSTTK/AJbI+ENWsF297c3zHlBxKAgPLPObWKbdSTtjZRZiaOKsow==
x-xsrf-token: eyJpdiI6InNjRWQycHZZd1EyaStrMHM0bko1K2c9PSIsInZhbHVlIjoiSlFOS2JKQ1ExN2tlUlJTUVwvbDlkNkJoeTdzbTNNc2JyY2EycExJYjdFQnc2Vm1ad1UySEJtdkhDalwvK3gwYVFFbXN2dXFhMUhpbCtDM0RKVUJLK0lHZz09IiwibWFjIjoiMGQwYTM2YzAwNWYzZjQ0NDlkOWU3ZjAyNGQ4ZWNiNDMxZTQ5MjgyODkyNzViOTdmNWM5NzA4YWQ5MjMyNjgxYyJ9"""

In [66]:
headers = parse_header(headers)

In [87]:
import os

os.getcwd()

'/Users/borokoko/Crawlers'

In [61]:
os.getcwd()

'/Users/borokoko/Crawlers/crawlers/Vprok/shims'

In [72]:
with open("headers.json", "w") as file:
    json.dump(headers, file)

In [73]:
href = "/catalog/3547/skidki"

payload = json.dumps(
    {"noRedirect":True,"url": href}
)
    
category_id = re.search("\d+", href)[0]

params = {
    "sort": "popularity_desc",
    "limit": "60",
    "page": 35,
}

resp = requests.post(
    f"https://www.vprok.ru/web/api/v1/catalog/category/{category_id}?{urlencode(params)}",
#         params=params,
    data=payload,
    headers=headers
).json()

In [74]:
resp

{'categories': [{'id': 1,
   'name': 'Каталог',
   'slug': '',
   'itemCount': 9723,
   'parentId': 0},
  {'id': 3547,
   'name': 'Скидки',
   'slug': 'skidki',
   'itemCount': 9723,
   'parentId': 1},
  {'id': 1306,
   'name': 'Красота и здоровье',
   'slug': 'krasota-gigiena-bytovaya-himiya',
   'itemCount': 2976,
   'parentId': 1},
  {'id': 6846,
   'name': 'Красота и здоровье',
   'slug': 'krasota-gigiena-bytovaya-himiya',
   'itemCount': 2274,
   'parentId': 3547},
  {'id': 2348,
   'name': 'Бытовая химия и гигиена',
   'slug': 'bytovaya-himiya-i-hoztovary',
   'itemCount': 1494,
   'parentId': 1},
  {'id': 6944,
   'name': 'Бытовая химия и гигиена',
   'slug': 'bytovaya-himiya-i-hoztovary',
   'itemCount': 1243,
   'parentId': 3547},
  {'id': 1305,
   'name': 'Товары для мам и детей',
   'slug': 'tovary-dlya-mam-i-detey',
   'itemCount': 1213,
   'parentId': 1},
  {'id': 6904,
   'name': 'Товары для мам и детей',
   'slug': 'tovary-dlya-mam-i-detey',
   'itemCount': 1139,
   'par

In [178]:
def nav_json(data, key_list):
    for key in key_list:
        if data:
            if key in data:
                data = data[key]
        else:
            return None
    return data

In [180]:
nav_json(resp["products"][46], ["category", "name"])

'Мясо'

In [177]:
resp["products"][47]

{'productId': 432009,
 'url': '/product/bitey-t-a-bit-bat-yabl-vish-fryag-25g--432009',
 'name': 'Батончик Bitey Яблоко-вишня без глютена 25г',
 'images': [{'name': 'фото 1',
   'url': 'https://media.vprok.ru/products/<SIZE>/hm/mm/khdx5mr7xfyprvro2bjzbd55dcwammhm.jpeg'},
  {'name': 'фото 2',
   'url': 'https://media.vprok.ru/products/<SIZE>/i7/qz/pgotkigyjgitxwqdregg4mep3iluqzi7.jpeg'}],
 'quantumImages': [],
 'isNew': False,
 'isBought': False,
 'isAdult': False,
 'isAlcohol': False,
 'isFractional': False,
 'isFractionalNominal': False,
 'isSpecialPrice': False,
 'isPersonalPrice': False,
 'rating': 4.8,
 'reviews': 13,
 'isFavorite': False,
 'isSubscribed': False,
 'price': 48.9,
 'oldPrice': 65.9,
 'unitPrice': 0,
 'discount': 17,
 'discountPercent': 26,
 'fractionTextPrice': 'шт',
 'unitFractionTextPrice': 'шт',
 'fractionText': 'шт',
 'qtyMin': 1,
 'qtyMax': 99000,
 'fraction': 1,
 'quantum': None,
 'activityStatus': 'available',
 'delivery': {'minProductDeliveryDate': '2023-02-2

In [173]:
for i, product in enumerate(resp["products"]):
    if not product["category"]:
        print(i)
        print(product)

47
{'productId': 432009, 'url': '/product/bitey-t-a-bit-bat-yabl-vish-fryag-25g--432009', 'name': 'Батончик Bitey Яблоко-вишня без глютена 25г', 'images': [{'name': 'фото 1', 'url': 'https://media.vprok.ru/products/<SIZE>/hm/mm/khdx5mr7xfyprvro2bjzbd55dcwammhm.jpeg'}, {'name': 'фото 2', 'url': 'https://media.vprok.ru/products/<SIZE>/i7/qz/pgotkigyjgitxwqdregg4mep3iluqzi7.jpeg'}], 'quantumImages': [], 'isNew': False, 'isBought': False, 'isAdult': False, 'isAlcohol': False, 'isFractional': False, 'isFractionalNominal': False, 'isSpecialPrice': False, 'isPersonalPrice': False, 'rating': 4.8, 'reviews': 13, 'isFavorite': False, 'isSubscribed': False, 'price': 48.9, 'oldPrice': 65.9, 'unitPrice': 0, 'discount': 17, 'discountPercent': 26, 'fractionTextPrice': 'шт', 'unitFractionTextPrice': 'шт', 'fractionText': 'шт', 'qtyMin': 1, 'qtyMax': 99000, 'fraction': 1, 'quantum': None, 'activityStatus': 'available', 'delivery': {'minProductDeliveryDate': '2023-02-21T00:00:00+00:00', 'lastMileType': 

In [132]:
for href in tqdm(categories):
    
    payload = json.dumps(
        {"noRedirect":True,"url":"/catalog/3547/skidki"}
    )
    
    category_id = re.search("\d+", href)[0]

    params = {
        "sort": "popularity_desc",
        "limit": "60",
        "page": 1,
    }

    resp = requests.post(
        f"https://www.vprok.ru/web/api/v1/catalog/category/{3547}?{urlencode(params)}",
#         params=params,
        data=payload,
        headers=headers
    ).json()

100%|███████████████████████████████████████████| 23/23 [00:12<00:00,  1.86it/s]


In [97]:
/catalog/3547/skidki

'https://www.vprok.ru/web/api/v1/catalog/category/5175?sort=popularity_desc&limit=60&page=1'

In [151]:
href

'/catalog/1997/alkogol'

In [147]:
resp["categories"]

[{'id': 1, 'name': 'Каталог', 'slug': '', 'itemCount': 1177, 'parentId': 0},
 {'id': 1997,
  'name': 'Алкоголь',
  'slug': 'alkogol',
  'itemCount': 1177,
  'parentId': 1},
 {'id': 2104,
  'name': 'Пиво',
  'slug': 'pivo',
  'itemCount': 219,
  'parentId': 1997},
 {'id': 1999,
  'name': 'Вино',
  'slug': 'vino',
  'itemCount': 204,
  'parentId': 1997},
 {'id': 2114,
  'name': 'Водка',
  'slug': 'vodka',
  'itemCount': 187,
  'parentId': 1997},
 {'id': 2117,
  'name': 'Коньяк и коньячные напитки',
  'slug': 'konyak-i-konyachnye-napitki',
  'itemCount': 181,
  'parentId': 1997},
 {'id': 2130,
  'name': 'Коньяк',
  'slug': 'konyak',
  'itemCount': 161,
  'parentId': 2117},
 {'id': 2123,
  'name': 'Виски, Бурбон',
  'slug': 'viski-burbon',
  'itemCount': 153,
  'parentId': 1997},
 {'id': 2000,
  'name': 'Ликеро-водочные напитки',
  'slug': 'likero-vodochnye-napitki',
  'itemCount': 141,
  'parentId': 1997},
 {'id': 2126,
  'name': 'Настойки',
  'slug': 'nastoyki',
  'itemCount': 55,
  'par

In [150]:
resp["products"][2]

{'productId': 306557,
 'url': '/product/strongbow-sidr-strongbou-slad-gaz-4-5-0-4l--306557',
 'name': 'Сидр Strongbow сладкий 4.5% 0.4л',
 'images': [{'name': 'фото 1',
   'url': 'https://media.vprok.ru/products/<SIZE>/jd/54/kdmf3d7n4c5n2ragihigqwk76kvx54jd.jpeg'}],
 'quantumImages': [],
 'isNew': False,
 'isBought': False,
 'isAdult': True,
 'isAlcohol': True,
 'isFractional': False,
 'isFractionalNominal': False,
 'isSpecialPrice': False,
 'isPersonalPrice': False,
 'rating': 4.7,
 'reviews': 56,
 'isFavorite': False,
 'isSubscribed': False,
 'price': 71.9,
 'oldPrice': 0,
 'unitPrice': 0,
 'discount': 0,
 'discountPercent': 0,
 'fractionTextPrice': 'шт',
 'unitFractionTextPrice': 'шт',
 'fractionText': 'шт',
 'qtyMin': 1,
 'qtyMax': 99000,
 'fraction': 1,
 'quantum': None,
 'activityStatus': 'available',
 'delivery': {'minProductDeliveryDate': '2023-02-22T00:00:00+00:00',
  'lastMileType': 'courier',
  'lastMileTypeId': 1,
  'expressAvailable': False,
  'courierAvailable': True},
 '

In [267]:
# resp["products"]

In [432]:
class Vprok:
    
    def __init__(self, hrefs):
        self.hrefs = hrefs
        
    def parse_json(self, resp):
        
        dom = HTMLParser(resp["html"])
        products = dom.css("ul#catalogItems > li")
        
        for product in products:
    
            title = product.css_first("div.xf-product-title > a")
            if title:
                title = title.text().strip()

            prev_price = product.css_first("div.xf-product-cost__prev")
            if prev_price:
                prev_price = re.sub("[^\d\.,]", "", prev_price.text())

            cur_price = product.css_first("div.xf-product-cost__current")
            if cur_price:
                cur_price = re.sub("[^\d\.,]", "", cur_price.text())
            
            yield {
                "name": title,
                "price": cur_price,
                "oldPrice": prev_price,
            }
        
    def query(self, href):
        
        category_id = re.search("\d+", href)[0]
        offset, n_page = 0, 1
    
        while True:
            
            params = {
                "use_brand_zone": "1",
                "limit": "30",
                "category": category_id,
                "page": n_page,
                "sort": "rate_desc",
                "no_html": "false"
            }
                
            print(f"Sending {n_page} request")

            resp = requests.get(
                f"https://www.vprok.ru/webapi/v1/category-search/{category_id}",
                headers=headers,
                params=params
            ).json()
            
            for product in self.parse_json(resp):
                self.data.append(product)
            
            time.sleep(1)
            
            offset += 30
            n_page += 1
            
            if offset >= resp["count"]:
                break
            
    
    def query_all(self):
        self.data = []
        
        for href in self.hrefs:
            self.query(href)

In [461]:
hrefs = [href for href in hrefs if "catalog" in href]

# parser = Vprok(hrefs)
# parser.query_all()

In [463]:
with open("cfg/categories.json", "w") as file:
    json.dump({"categories": hrefs}, file)

In [439]:
with open("data.json", "w") as file:
    json.dump(parser.data, file)

In [364]:
import pandas as pd

In [442]:
df = pd.read_json("data.json")
df.head(2)

Unnamed: 0,name,price,oldPrice
0,Помидоры Розовые 500г упаковка,159,
1,Форель охлажденная потрошеная 1-2кг,639,899.0


In [443]:
df[df.price.notna()]

Unnamed: 0,name,price,oldPrice
0,Помидоры Розовые 500г упаковка,159,
1,Форель охлажденная потрошеная 1-2кг,639,899
2,Окорок задний свиной 0.4-0.7кг,299,
3,Масло сливочное Экомилк 82.5% 180г,1299,199
4,Молоко ЭкоНива ультрапастеризованное 3.2% 1л,799,
...,...,...,...
41928,Вентилятор настольный Centek CT-5003 White 19с...,999,
41929,Вентилятор Centek CT-5040 настольный 14см,699,
41930,Блендер Moulinex LM16L110,8999,
42112,Напиток Bosca Anniversary белый полусухой беза...,566,


In [446]:
cookie_str = "luuid=b6415383-72e6-46fb-beb2-8c8c1394cb8d; suuid=d4cdf4a8-997e-406f-b961-cc6ba95b832b; split_segment=9; split_segment_amount=11; tmr_lvid=f140f2d66e129d40e4de4201c33196b8; tmr_lvidTS=1675346400028; _ym_uid=1675346400242416480; _ym_d=1675346400; flocktory-uuid=0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0; iap.uid=9365c0a9c73945d28c6909efdbe2b002; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; noHouse=0; fcf=3; isUserAgreeCookiesPolicy=true; hide_banner_block_1=true; ngenix_valid=633e3888e19035e396ed68f8522b7e42; is_pickup=0; addressChange=1; pickupZone=null; pickupAvailable=0; _slid=63e4c981c29837d7f10a66f9; _slid_server=63e4c981c29837d7f10a66f9; _gid=GA1.2.373746217.1676908872; _ym_isad=2; gsscgib-w-vprok=cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtnjQweqUPvTUcUaipmODgwNr3MYRtthX0Eeert0sar81QMOCSn8ABL34NzGj82bGgZAQjRFYbY4rQSMeriXmcsQ+4luFl1pxBOpnXAbTONCssgxbiJeg==; gsscgib-w-vprok=cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtnjQweqUPvTUcUaipmODgwNr3MYRtthX0Eeert0sar81QMOCSn8ABL34NzGj82bGgZAQjRFYbY4rQSMeriXmcsQ+4luFl1pxBOpnXAbTONCssgxbiJeg==; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; cfidsgib-w-vprok=b3EYd24fnKAEEaqPhIqwmlKA5C8pZg7K+9WaSkmeAXCOv6hd/C75B1kke828K993qKpLgFdESDrcyaNv8pnJ6R+2z+eZV7BRnlSk44Ky3aMta+Fry6ACeDZNZMeSUu6ROVGDqz1ZH+82duwOQBnr+fwW+tvl2oHplLMbc8k=; access_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiI5IiwianRpIjoidXVpZGI2NDE1MzgzLTcyZTYtNDZmYi1iZWIyLThjOGMxMzk0Y2I4ZDZlNjE4MTAxZGY4Y2QyODM1YmM3MTg4MGRmZjNiMzM5MDNlMGViZTIiLCJpYXQiOjE2NzY5MjM1MTAuMDY2NDAxLCJuYmYiOjE2NzY5MjM1MTAuMDY2NDAzLCJleHAiOjE2NzY5Mzc5MTAuMDUxMzU0LCJzdWIiOiIiLCJzY29wZXMiOltdLCJzcGxpdF9zZWdtZW50Ijo5fQ.nVFAsOdeRecsDj-Y7All8KZ7oNGG94QqGwm24qiFG7Lb4_nTCG_eKjR4gG4Ir3BKjdKG-jfeLlkV5MGGuJXNsbdMqIZ-EE8Ec5Iiem0H0N4b7NtuHGmo-V6p-s_ymSXV_LHOfCCYAuajmj3vpnd_hr-_Pbv2J_UEvU6WU3yhdgisU-fUQS9L2imtQuwypphommHmGm8pVbR5HfjJS7h9hRVMUDbCIbQMb2mVSQbtV8iqXBGqzp6i3ZDEU41sGLGBqLnFBZjf-bPxeYZFLJUVMmUK2gsV9f2TITyp1NXfcJ2OofTKlOm_yFS4HyTWwAsPhB_B2l5_YKMOMx9q_ZEpU1EBcmI9L0PNtAOL58e3hEGU6iJLQGh4UoNIaCKHuV6Tw8wM39mPpkdD_-OBtjHcPJBHbddGWM6cwxY_yfF-doa0Ppk2hwlwkS6VAQVmi28Pl-z0wADqOM7chXg4s3438dQIudR0oGT3BbBbbiuIgrqTWDSd84db3BVE78lzu1CNEF8axWqU5MFeQtk86dvQAr0aooq7Bt_gy7Eyu6wCKdc7OF8CfHTbiN4ik5ZiLrmplmnXGtu30cOPkxSyLJVEXrOfuYbTCdEjwhZWzprfIs9sTwV4diAG0oHm11ejM2yYmtDOGQU2J0ZBUAo2LpTJLHh21OQFh1UxNBDIbvukVWo; fgsscgib-w-vprok=Z6P112f585c00138b833091d58327ea2c23c6cdd; fgsscgib-w-vprok=Z6P112f585c00138b833091d58327ea2c23c6cdd; x-next-route-destination=%2Fcatalog%2F1307%2Fmyaso-ptitsa-delikatesy%3Fsort%3Dpopularity_desc%26page%3D4; regionChange=1; _slsession=66873AAE-28CF-46B8-8D99-E5FC835EAD57; _ym_visorc=b; region=2; deliveryTypeId=1; standardShopId=2246; _ga=GA1.2.880547947.1675346399; mindboxDeviceUUID=4929db8d-93f6-47d7-ac97-fa45d82811c4; directCrm-session=%7B%22deviceGuid%22%3A%224929db8d-93f6-47d7-ac97-fa45d82811c4%22%7D; tmr_detect=0%7C1676932738858; address=%D0%A1%D0%B0%D0%BD%D0%BA%D1%82-%D0%9F%D0%B5%D1%82%D0%B5%D1%80%D0%B1%D1%83%D1%80%D0%B3%2C%20%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; short_address=%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; flat=12; house=14; latLng=60.038537%2C30.344641; addressZone=16120; preview_address=%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%20%D0%A5%D1%83%D0%B4%D0%BE%D0%B6%D0%BD%D0%B8%D0%BA%D0%BE%D0%B2%2C%2014; XSRF-TOKEN=eyJpdiI6IjBDdUtIOGRGZ2FJQVZIQWhaXC9VVTZRPT0iLCJ2YWx1ZSI6IkRkcnY4OWZKMVY2TDJZazhyYklyTE1UVnlWXC91dDNFRTdlRFhyTklqeldTa1UyUzVcLzhobEYxaDAxSFV3WmNKQktFazlnSThabXhzK1FyUUloK2JBSWc9PSIsIm1hYyI6ImQxMTU4YTQ4ODEzOWZiNjM5YmUzNDZiOWQyYzQxYTQwNDkxMmNiOWJkMTMxMWI2NDQ3MTBmM2M3ZWVkNzQ1YTMifQ%3D%3D; address_id=503576999; isHouse=eyJpdiI6ImtnMCtDaWhvVkthZXdkaWliYjhSQnc9PSIsInZhbHVlIjoiUXVOOG1rWkdIQUZCaFhHcUtGU1IyQT09IiwibWFjIjoiYWU1YjM2ZDczODkyMWY4YTU0NGRlNmUxYmZlYjYxMzliMzQ3ZmFjODM2MzhkZTJkMTFiNDIzNDlhNTFjNTM5NSJ9; deliveryZone=%D0%93%D0%9E%D0%A0%D0%9E%D0%94%20%D0%A1%D0%9F%D0%91%20%D0%A1%D0%B5%D0%B2%D0%B5%D1%80; shop=2246; aid=eyJpdiI6IklHbzNIZFwvXC8rUThMVUxnS21jMWdHUT09IiwidmFsdWUiOiJNMVJXZUpsUjM0aWxDaE1HenBWOGRmSytNdE03Y29abHppcElBdDFLampERkNobFVWenZKQ1wvVzgwcDRkMHdreU5mdGZaRFo0aHF3ZlwvNERrUUJkK2dRPT0iLCJtYWMiOiIzZTc5M2UyMWEyZjY3MjZmNDhlMGM4YzQ1NWQwMDEzNDhhMjgyN2ZjMDcxNWEyOTVjYTBjMzY5MjRjZGIyZDc3In0%3D; _ga_B122VKXXJE=GS1.1.1676932447.20.1.1676932793.2.0.0"

In [449]:
cookies = {}

for el in cookie_str.split("; "):
    key, val = el.split("=", 1)
    cookies[key] = val
    
cookies

{'luuid': 'b6415383-72e6-46fb-beb2-8c8c1394cb8d',
 'suuid': 'd4cdf4a8-997e-406f-b961-cc6ba95b832b',
 'split_segment': '9',
 'split_segment_amount': '11',
 'tmr_lvid': 'f140f2d66e129d40e4de4201c33196b8',
 'tmr_lvidTS': '1675346400028',
 '_ym_uid': '1675346400242416480',
 '_ym_d': '1675346400',
 'flocktory-uuid': '0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0',
 'iap.uid': '9365c0a9c73945d28c6909efdbe2b002',
 '__zzatgib-w-vprok': 'MDA0dBA=Fz2+aQ==',
 'noHouse': '0',
 'fcf': '3',
 'isUserAgreeCookiesPolicy': 'true',
 'hide_banner_block_1': 'true',
 'ngenix_valid': '633e3888e19035e396ed68f8522b7e42',
 'is_pickup': '0',
 'addressChange': '1',
 'pickupZone': 'null',
 'pickupAvailable': '0',
 '_slid': '63e4c981c29837d7f10a66f9',
 '_slid_server': '63e4c981c29837d7f10a66f9',
 '_gid': 'GA1.2.373746217.1676908872',
 '_ym_isad': '2',
 'gsscgib-w-vprok': 'cPjjH1Pagu7Osr64tIs1tYCkRhvIM/5BKS7jqtq3zkIJlr9jW7V5gyfMhOE19llnX7Mcel/U7i7PTm0heNit1/4Owjg6O7791nvrB6+yoYf4MbGGYdcjarPAKbZTcZOpFRVFKEefj0dlnokPI/fMfjtn

In [2]:
import os
import pandas as pd

In [216]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.1-py2.py3-none-any.whl (249 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.8/249.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting et-xmlfile
  Using cached et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0[0m[39;49m -> [0m[32;49m23.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
os.chdir("/Users/borokoko/Crawlers/")

In [182]:
df_1 = pd.read_json("data/data_hudozhnikov.json")
df_1 = df_1.drop_duplicates()

In [217]:
df_1.to_excel("data/data_hudozhnikov.xlsx", index=False)

In [205]:
df_disc = df_1[df_1["price"].notna() & df_1["oldPrice"] != 0].copy()

df_disc["discount"] = (df_disc["oldPrice"] - df_disc["price"]) / df_disc["price"]

In [209]:
df_disc.groupby("category")["discount"].mean().to_frame()

Unnamed: 0_level_0,discount
category,Unnamed: 1_level_1
Аптечка,0.193966
Бытовая техника,0.529281
Бытовая химия и гигиена,0.615875
"Воды, соки, напитки",0.338813
Готовая еда,0.248852
Замороженные продукты,0.357785
Здоровое питание,0.428619
Зоотовары,0.288556
Каталог,0.347648
Красота и здоровье,0.433481


In [232]:
cookie_str = """luuid=b6415383-72e6-46fb-beb2-8c8c1394cb8d; suuid=d4cdf4a8-997e-406f-b961-cc6ba95b832b; split_segment=9; split_segment_amount=11; tmr_lvid=f140f2d66e129d40e4de4201c33196b8; tmr_lvidTS=1675346400028; _ym_uid=1675346400242416480; _ym_d=1675346400; flocktory-uuid=0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0; iap.uid=9365c0a9c73945d28c6909efdbe2b002; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; noHouse=0; fcf=3; isUserAgreeCookiesPolicy=true; hide_banner_block_1=true; ngenix_valid=633e3888e19035e396ed68f8522b7e42; addressChange=1; pickupAvailable=0; _slid=63e4c981c29837d7f10a66f9; _slid_server=63e4c981c29837d7f10a66f9; regionChange=1; region=2; is_pickup=0; deliveryTypeId=1; standardShopId=2246; has_elevator=0; is_house=0; access_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiI5IiwianRpIjoidXVpZGI2NDE1MzgzLTcyZTYtNDZmYi1iZWIyLThjOGMxMzk0Y2I4ZGNjNDAxNGEyYTVhMTQ5OTIzNWUzYTdiMjY2NGZhNWQ2M2U5YjMzYmIiLCJpYXQiOjE2NzcwNzk2NTUuNTUzMDYyLCJuYmYiOjE2NzcwNzk2NTUuNTUzMDY0LCJleHAiOjE2NzcwOTQwNTUuNTQzNDg5LCJzdWIiOiIiLCJzY29wZXMiOltdLCJzcGxpdF9zZWdtZW50Ijo5fQ.BaynD5_FZWrNiO44VKz5GDujDxHdWSpU3C5Lhv7XWjHLI5cuaIRejB-ZR9-VrRAl7KyN7S0LNVg3TKeb9GrXbP_5AHToPnULKVBy_Y0jx4tl_dJjO9TmWVO9qqHpa8VFf2UMMaaM9hC-8cHmP55LWKAGN63ObpZPFBif1OnzAFdYmpcZV8mNYBG0F5RcW-xmgDaI6wZ4-hLmqkYynSLPvDzDqvBQ2LT27QAQxWEsGsvhrY7Ulpm7iQn1-vqMrx8ltqzzrof-VoCYm6WkHBqmo84xnmpecnJiNM10SuU-QDMpqEVMyq-yWpl-bgAkInRJc0VClptNy-BJWDBxDgU490N2V8GzE3AmbGvBg45Bm3zzUMzXdTKMgQEd7COV355sKz1xAAPakmlWQUcEeow1vce-W__xONr_5iuSljwYPfIhiCABwDOQABSqQobxjwFAKt0JGZORE6Xq-WA-93C-6rNxcO74MQ7X1zQmzSpJYQ-5FmUpcAZEQs7j8dRdBU8TBNWZkVKL7CGBNEHcPPJb8Q2J_vye53VtIX2n2lFS8H_JF7b5BdEnlEtY0Ha3pl-irJEHjvXHdVfD-dZaj1g7T_pI7lEtK9QD1MtIeZ5EJw09XzYjViQ6n5GLN6TRBbF6heV6YyAOlHuMC3_-IvTLXXgV6QlDSSs5Lv9H962UBGQ; _gid=GA1.2.1986293396.1677079658; _ym_isad=1; flat=2; ngenix_jscv_a68b51100641=cookie_signature=Zgb3B9iWdTpA8P7HGVD9%2Br1RBKo%3D&cookie_expires=1677087143; _ym_visorc=b; address=%D0%A1%D0%B0%D0%BD%D0%BA%D1%82-%D0%9F%D0%B5%D1%82%D0%B5%D1%80%D0%B1%D1%83%D1%80%D0%B3%2C%20%D0%93%D1%80%D0%B0%D0%B6%D0%B4%D0%B0%D0%BD%D1%81%D0%BA%D0%B8%D0%B9%20%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%2C%2041%D0%BA2%D0%91; latLng=60.012251%2C30.398576; short_address=%D0%93%D1%80%D0%B0%D0%B6%D0%B4%D0%B0%D0%BD%D1%81%D0%BA%D0%B8%D0%B9%20%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%2C%2041%D0%BA2%D0%91; house=41%D0%BA2%D0%91; address_id=508407750; addressZone=16120; deliveryZone=%D0%93%D0%9E%D0%A0%D0%9E%D0%94%20%D0%A1%D0%9F%D0%91%20%D0%A1%D0%B5%D0%B2%D0%B5%D1%80; shop=2246; _ga=GA1.2.880547947.1675346399; mindboxDeviceUUID=4929db8d-93f6-47d7-ac97-fa45d82811c4; directCrm-session=%7B%22deviceGuid%22%3A%224929db8d-93f6-47d7-ac97-fa45d82811c4%22%7D; tmr_detect=0%7C1677083727660; XSRF-TOKEN=eyJpdiI6IjdmYXBSck9hK0gzMks0bnBpeHU1UlE9PSIsInZhbHVlIjoiV0hBZzdCakpXSXZhZXIyM0ZIdjhVTmdFSEt4a2ZPUVNrcVNWMUR1dElsR3FHMk41ZFBsdE5rMElSdk9PS0xpVWpwQ1hUZ0F3MTdGWGZcLzVIbmVJN2h3PT0iLCJtYWMiOiJlYTc3NmQwZTg3NzE2MWFhNWEwM2EyZDliYzNkZjdjNmQ5ODA2ZDE1OWQ3MjU5NTIwZjUwYjI1ZWZiYzg2MjlmIn0%3D; isHouse=eyJpdiI6Im0xckhEMkdTQzBlc05mVWFodFRlaWc9PSIsInZhbHVlIjoia29XRlhuSGh3TjV2ZmxtU2YrWldZQT09IiwibWFjIjoiN2RkZTlmNTJkNDViMmY5MWNmNTRlMDQyOTgyODhlNWQyYmQxNzg1NjViNTcwNjM3ZTdmNGJlYWRmODk2ZGVjZSJ9; aid=eyJpdiI6InU1RUlzK1oyVWR5SkZoXC9wRitxczN3PT0iLCJ2YWx1ZSI6IlwvMGlKSkNodko4SlMzT3BrR2FLWmF6YlYraGFQWTN2N0pTQzZzejNhVlByYVB1eVhYUTdPVHpRem5rQkpSdlFsYmZVV05jWUZQVjRBcVwvbm9LVlFzdkE9PSIsIm1hYyI6IjZhYzQwMzA2YWQ3YmFhNjExMzQ3MzY1NTFjYjQyYmRiODExMWE5ZTk1Y2UxMmRhNzlhNzk5MjcxYjVhNjczYzQifQ%3D%3D; _gat_UA-93122031-1=1; x-next-route-destination=%2Fcatalog%2F6736%2Fnovinki; cfidsgib-w-vprok=iv8amQL/oUlyaZAL8jbJ3oS2MLvb6gDKyoMG39U4TjsoR4uKj1Rl+oIXl02z4pNA/SmnsroJQ3U7ZrPe/FAJCZqLphr0hQZWlSlLtdgggKA/zmWgawM3ns00XyekAsvnGIISRr90pcUjzIXF+h5ctRsnyiSNvYnFbLwocQI=; cfidsgib-w-vprok=iv8amQL/oUlyaZAL8jbJ3oS2MLvb6gDKyoMG39U4TjsoR4uKj1Rl+oIXl02z4pNA/SmnsroJQ3U7ZrPe/FAJCZqLphr0hQZWlSlLtdgggKA/zmWgawM3ns00XyekAsvnGIISRr90pcUjzIXF+h5ctRsnyiSNvYnFbLwocQI=; cfidsgib-w-vprok=iv8amQL/oUlyaZAL8jbJ3oS2MLvb6gDKyoMG39U4TjsoR4uKj1Rl+oIXl02z4pNA/SmnsroJQ3U7ZrPe/FAJCZqLphr0hQZWlSlLtdgggKA/zmWgawM3ns00XyekAsvnGIISRr90pcUjzIXF+h5ctRsnyiSNvYnFbLwocQI=; gsscgib-w-vprok=nb3FrOwtcjjKCcuclcmsaDsyXAoWVZhqlteseX8MccGa+Sm0FWSxacn42nagf1yaMWaoK5dRhuyF8GvSSuSsCQumAbbXjqOaD886mox/LNaMCg73kblZXg1IyIrKtvPna7wzca2AxTUJpqVMatI7nK5leNwAog3rnK8DhCkv/bdhBtcQeBbPpSgcQRkTCXYD/g3RJVZVCsagvqRxuUdu2KwRjI+5dU6P+lSc2lM2U4s+LhphFTF+bNnyv0TfpxICeA4/0y+92taMY38elGjW; gsscgib-w-vprok=nb3FrOwtcjjKCcuclcmsaDsyXAoWVZhqlteseX8MccGa+Sm0FWSxacn42nagf1yaMWaoK5dRhuyF8GvSSuSsCQumAbbXjqOaD886mox/LNaMCg73kblZXg1IyIrKtvPna7wzca2AxTUJpqVMatI7nK5leNwAog3rnK8DhCkv/bdhBtcQeBbPpSgcQRkTCXYD/g3RJVZVCsagvqRxuUdu2KwRjI+5dU6P+lSc2lM2U4s+LhphFTF+bNnyv0TfpxICeA4/0y+92taMY38elGjW; _ga_B122VKXXJE=GS1.1.1677083572.28.1.1677083812.28.0.0; _POBP_s=rum=1&id=a45064e1-22d3-435f-9114-4ec44ae9e673&created=1677083566994&expire=1677084716800; fgsscgib-w-vprok=qZGW5d2917096ec81d48edccbdcca2a78b1ab678; fgsscgib-w-vprok=qZGW5d2917096ec81d48edccbdcca2a78b1ab678"""

In [239]:
cookies_1 = {}

for el in cookie_str.split("; "):
    key, val = el.split("=", 1)
    cookies_1[key] = val

In [236]:
cookie_str_2 = "luuid=b6415383-72e6-46fb-beb2-8c8c1394cb8d; suuid=d4cdf4a8-997e-406f-b961-cc6ba95b832b; split_segment=9; split_segment_amount=11; tmr_lvid=f140f2d66e129d40e4de4201c33196b8; tmr_lvidTS=1675346400028; _ym_uid=1675346400242416480; _ym_d=1675346400; flocktory-uuid=0f155bb5-6a5d-4a16-bf8e-890a2bd727e5-0; iap.uid=9365c0a9c73945d28c6909efdbe2b002; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; __zzatgib-w-vprok=MDA0dBA=Fz2+aQ==; noHouse=0; fcf=3; isUserAgreeCookiesPolicy=true; hide_banner_block_1=true; ngenix_valid=633e3888e19035e396ed68f8522b7e42; addressChange=1; pickupAvailable=0; _slid=63e4c981c29837d7f10a66f9; _slid_server=63e4c981c29837d7f10a66f9; regionChange=1; region=2; is_pickup=0; deliveryTypeId=1; standardShopId=2246; has_elevator=0; is_house=0; deliveryZone=%D0%93%D0%9E%D0%A0%D0%9E%D0%94%20%D0%A1%D0%9F%D0%91%20%D0%A1%D0%B5%D0%B2%D0%B5%D1%80; access_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiI5IiwianRpIjoidXVpZGI2NDE1MzgzLTcyZTYtNDZmYi1iZWIyLThjOGMxMzk0Y2I4ZGNjNDAxNGEyYTVhMTQ5OTIzNWUzYTdiMjY2NGZhNWQ2M2U5YjMzYmIiLCJpYXQiOjE2NzcwNzk2NTUuNTUzMDYyLCJuYmYiOjE2NzcwNzk2NTUuNTUzMDY0LCJleHAiOjE2NzcwOTQwNTUuNTQzNDg5LCJzdWIiOiIiLCJzY29wZXMiOltdLCJzcGxpdF9zZWdtZW50Ijo5fQ.BaynD5_FZWrNiO44VKz5GDujDxHdWSpU3C5Lhv7XWjHLI5cuaIRejB-ZR9-VrRAl7KyN7S0LNVg3TKeb9GrXbP_5AHToPnULKVBy_Y0jx4tl_dJjO9TmWVO9qqHpa8VFf2UMMaaM9hC-8cHmP55LWKAGN63ObpZPFBif1OnzAFdYmpcZV8mNYBG0F5RcW-xmgDaI6wZ4-hLmqkYynSLPvDzDqvBQ2LT27QAQxWEsGsvhrY7Ulpm7iQn1-vqMrx8ltqzzrof-VoCYm6WkHBqmo84xnmpecnJiNM10SuU-QDMpqEVMyq-yWpl-bgAkInRJc0VClptNy-BJWDBxDgU490N2V8GzE3AmbGvBg45Bm3zzUMzXdTKMgQEd7COV355sKz1xAAPakmlWQUcEeow1vce-W__xONr_5iuSljwYPfIhiCABwDOQABSqQobxjwFAKt0JGZORE6Xq-WA-93C-6rNxcO74MQ7X1zQmzSpJYQ-5FmUpcAZEQs7j8dRdBU8TBNWZkVKL7CGBNEHcPPJb8Q2J_vye53VtIX2n2lFS8H_JF7b5BdEnlEtY0Ha3pl-irJEHjvXHdVfD-dZaj1g7T_pI7lEtK9QD1MtIeZ5EJw09XzYjViQ6n5GLN6TRBbF6heV6YyAOlHuMC3_-IvTLXXgV6QlDSSs5Lv9H962UBGQ; _gid=GA1.2.1986293396.1677079658; _ym_isad=1; _ym_visorc=b; ngenix_jscv_a68b51100641=cookie_signature=quCZR8wYufIMGZJ4PVDnJkMFs9w%3D&cookie_expires=1677083414; x-next-route-destination=%2Fcatalog%2F1304%2Fryba-i-moreprodukty%3Fsort%3Dpopularity_desc%26page%3D2; address=%D0%A1%D0%B0%D0%BD%D0%BA%D1%82-%D0%9F%D0%B5%D1%82%D0%B5%D1%80%D0%B1%D1%83%D1%80%D0%B3%2C%20%D0%93%D1%80%D0%B0%D0%B6%D0%B4%D0%B0%D0%BD%D1%81%D0%BA%D0%B8%D0%B9%20%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%2C%2041%D0%BA2%D0%91; latLng=60.012251%2C30.398576; flat=2; XSRF-TOKEN=eyJpdiI6ImJPUlVabTFwdjVRSFZadzNnSm1aXC9nPT0iLCJ2YWx1ZSI6ImNxUUxVSjdncUpzZ1dpcjJaRUV0Y1VZZ2kxSGo1RzV2bjhFY2VOSENpR1oreVhUbDN1OGd2U0R3OGNGMVhtOWFGXC83YzhpYzhPaDlEYWJpNkdiVWJmUT09IiwibWFjIjoiMWZlN2JiMTA5YWQ1ZDUwYjFmMDY3MTgxZDM0NTQ0NjEzZGRjOGMyOWM2YWY4M2FmYmEzZDUzMGM0YWMxMTVhOSJ9; short_address=%D0%93%D1%80%D0%B0%D0%B6%D0%B4%D0%B0%D0%BD%D1%81%D0%BA%D0%B8%D0%B9%20%D0%BF%D1%80%D0%BE%D1%81%D0%BF%D0%B5%D0%BA%D1%82%2C%2041%D0%BA2%D0%91; house=41%D0%BA2%D0%91; address_id=508307604; isHouse=eyJpdiI6ImJvdHFLWnVtVlNKM3NuOXhsa2Fwa3c9PSIsInZhbHVlIjoiSnJvTWZYNUFlMGNmbG5tQUdad0ZDUT09IiwibWFjIjoiMmQyNmU3MjNhMjE2MDY5M2IyOTMyZTEzYjE5YTljOTlmNWYxYzA4ZWQ4Y2ZiNmU2NmE4YjE5NTIzNDFjYzFmYSJ9; addressZone=16120; shop=2246; aid=eyJpdiI6IitmeDhUUjlGc3JiYnFVcitpRmFtNVE9PSIsInZhbHVlIjoiZ3VqOG9PQTZZN0cyVExkV0FrRFp6OXBidks5QXdpQXZmNU9HejlzVmRmZzBJTWFNWkxrMUJtblBUZzhyQ3JFdERaVHNTdHlucW1iUGp4TnVicW8zanc9PSIsIm1hYyI6IjM5ZGMxODcyNGQ0ZGZjZmM0MWFjODczZTM1ODM2Zjg5NTcyYjQzYmQ4MDQxYWVmZjU3MTQzODM0ZDQxYWYyMjMifQ%3D%3D; _gat_UA-93122031-1=1; cfidsgib-w-vprok=ojsqvd3oSlEq8AOJ8/+hGjvOIDZ+qJxgaWRjEYXCov7DiMMezG6lg1i8mCPDy6O6Ar9GfZmCHVkEyClChG5rFPxdz51nOLxF98Tv8vCPN8nzNGme5jpKonLhhezDBg1FKqEcZD3UDwtg/wtp1XUiEsRON28ZJp0QymoQKu4=; cfidsgib-w-vprok=ojsqvd3oSlEq8AOJ8/+hGjvOIDZ+qJxgaWRjEYXCov7DiMMezG6lg1i8mCPDy6O6Ar9GfZmCHVkEyClChG5rFPxdz51nOLxF98Tv8vCPN8nzNGme5jpKonLhhezDBg1FKqEcZD3UDwtg/wtp1XUiEsRON28ZJp0QymoQKu4=; cfidsgib-w-vprok=ojsqvd3oSlEq8AOJ8/+hGjvOIDZ+qJxgaWRjEYXCov7DiMMezG6lg1i8mCPDy6O6Ar9GfZmCHVkEyClChG5rFPxdz51nOLxF98Tv8vCPN8nzNGme5jpKonLhhezDBg1FKqEcZD3UDwtg/wtp1XUiEsRON28ZJp0QymoQKu4=; gsscgib-w-vprok=0f9/HUBtTZRbv/dUYJanuOTwBow5XcbGiRA+SLcQHbHA840X6lTQu7QuFJVmtrhlmqr6NGzmvN1nCZebPncLQ0wsNdHwk3GyKpFS/vdevmdK9ACYV2N81IhtrxY5qB7tVUik71ER6UxjKxQYhWM/mI52n6BXX8WSJZF3Jd+Gigb+H3dlCmRLWkZhygZz6Y9qgWafHZNsAIA5CWPHhDz9f1r6mS5lafRSMJ6+9vGfBlFwn69FJapAjxkLR2/d+ZrSoXDk9n2sq1nRtXx8XS5/HrH3LQ==; gsscgib-w-vprok=0f9/HUBtTZRbv/dUYJanuOTwBow5XcbGiRA+SLcQHbHA840X6lTQu7QuFJVmtrhlmqr6NGzmvN1nCZebPncLQ0wsNdHwk3GyKpFS/vdevmdK9ACYV2N81IhtrxY5qB7tVUik71ER6UxjKxQYhWM/mI52n6BXX8WSJZF3Jd+Gigb+H3dlCmRLWkZhygZz6Y9qgWafHZNsAIA5CWPHhDz9f1r6mS5lafRSMJ6+9vGfBlFwn69FJapAjxkLR2/d+ZrSoXDk9n2sq1nRtXx8XS5/HrH3LQ==; _ga=GA1.2.880547947.1675346399; mindboxDeviceUUID=4929db8d-93f6-47d7-ac97-fa45d82811c4; directCrm-session=%7B%22deviceGuid%22%3A%224929db8d-93f6-47d7-ac97-fa45d82811c4%22%7D; tmr_detect=0%7C1677080503244; _ga_B122VKXXJE=GS1.1.1677079663.27.1.1677080514.33.0.0; _POBP_s=rum=1&id=af97d328-900f-4d2c-87e3-eff2c6ea6232&created=1677079657033&expire=1677081418125; fgsscgib-w-vprok=n68746ffa950dd10f7c25a906b45a870f8709ee6; fgsscgib-w-vprok=n68746ffa950dd10f7c25a906b45a870f8709ee6"

In [238]:
cookies_2 = {}

for el in cookie_str_2.split("; "):
    key, val = el.split("=", 1)
    cookies_2[key] = val

In [242]:
for key in cookies_2:
    if cookies_1[key] != cookies_2[key]:
        print(key, sep="\n", end="\n\n")

ngenix_jscv_a68b51100641

x-next-route-destination

XSRF-TOKEN

address_id

isHouse

aid

cfidsgib-w-vprok

gsscgib-w-vprok

tmr_detect

_ga_B122VKXXJE

_POBP_s

fgsscgib-w-vprok



In [3]:
import os

In [37]:
df_1 = pd.read_json("/Users/borokoko/Crawlers/data/Vprok/vprok_2.json")
df_2 = pd.read_json("/Users/borokoko/Crawlers/data/Vprok/vprok_3.json")

In [38]:
df_1 = df_1.drop_duplicates()
df_2 = df_2.drop_duplicates()

In [40]:
df_1.shape, df_2.shape

((29848, 10), (29847, 10))

In [42]:
df_merge = df_1.merge(df_2, on="name", how="inner")

In [44]:
df_merge.head(3)

Unnamed: 0,name,price_x,oldPrice_x,rating_x,reviews_x,category_x,subcategory_x,delivery_type_x,express_available_x,courier_available_x,price_y,oldPrice_y,rating_y,reviews_y,category_y,subcategory_y,delivery_type_y,express_available_y,courier_available_y
0,Помидоры Черри медовые красные круглые 200г уп...,179.0,209.0,4.9,977,"Овощи, фрукты, ягоды",Овощи,courier,False,True,179.0,209.0,4.9,977,"Овощи, фрукты, ягоды",Овощи,courier,False,True
1,Молоко Parmalat Natura Premium ультрапастеризо...,89.9,114.0,4.9,1183,"Молоко, сыр, яйца",Молоко,courier,False,True,89.9,114.0,4.9,1183,"Молоко, сыр, яйца",Молоко,courier,False,True
2,Филе куриное 0.8-1.2кг,289.0,389.0,4.8,294,"Мясо, птица, колбасы",Птица,courier,False,True,289.0,389.0,4.8,294,"Мясо, птица, колбасы",Птица,courier,False,True


In [45]:
df_merge[df_merge.price_x != df_merge.price_y]

Unnamed: 0,name,price_x,oldPrice_x,rating_x,reviews_x,category_x,subcategory_x,delivery_type_x,express_available_x,courier_available_x,price_y,oldPrice_y,rating_y,reviews_y,category_y,subcategory_y,delivery_type_y,express_available_y,courier_available_y
6,Зубная паста Colgate Total 12 Профессиональная...,139.00,251.00,4.9,48,Красота и здоровье,Уход за полостью рта,courier,False,True,189.00,299.00,4.5,6,Красота и здоровье,Уход за полостью рта,courier,False,True
7,Зубная паста Colgate Total 12 Профессиональная...,189.00,299.00,4.5,6,Красота и здоровье,Уход за полостью рта,courier,False,True,139.00,251.00,4.9,48,Красота и здоровье,Уход за полостью рта,courier,False,True
93,Вода Borjomi минеральная лечебно-столовая гази...,79.99,99.99,4.7,394,"Воды, соки, напитки",Вода,courier,False,True,65.00,79.90,4.7,145,"Воды, соки, напитки",Вода,courier,False,True
94,Вода Borjomi минеральная лечебно-столовая гази...,65.00,79.90,4.7,145,"Воды, соки, напитки",Вода,courier,False,True,79.99,99.99,4.7,394,"Воды, соки, напитки",Вода,courier,False,True
103,Колбаса Мясной дом Бородина Докторская вареная...,38.90,99.90,4.6,246,"Мясо, птица, колбасы",Деликатесы и колбасные изделия,courier,False,True,987.00,0.00,4.3,3,"Мясо, птица, колбасы",Деликатесы и колбасные изделия,courier,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29842,Водка Kremlin Award 40% 0.5л,739.00,0.00,5.0,5,Алкоголь,Водка,courier,False,True,929.00,0.00,4.8,5,Алкоголь,Водка,courier,False,True
29943,Коньяк Российский Пять звездочек 5 лет 40% 0.5л,439.00,0.00,4.5,4,Алкоголь,Коньяк и коньячные напитки,courier,False,True,499.00,0.00,4.8,5,Алкоголь,Коньяк и коньячные напитки,courier,False,True
29944,Коньяк Российский Пять звездочек 5 лет 40% 0.5л,499.00,0.00,4.8,5,Алкоголь,Коньяк и коньячные напитки,courier,False,True,439.00,0.00,4.5,4,Алкоголь,Коньяк и коньячные напитки,courier,False,True
30052,Пиво Степан Разин Петровское 4.7% 0.45л,47.90,0.00,4.8,4,Алкоголь,Пиво,courier,False,True,44.90,0.00,4.0,3,Алкоголь,Пиво,courier,False,True
