In [2]:
!pip install loguru

Collecting loguru
  Downloading loguru-0.5.3-py3-none-any.whl (57 kB)
Collecting win32-setctime>=1.0.0; sys_platform == "win32"
  Downloading win32_setctime-1.0.3-py3-none-any.whl (3.5 kB)
Installing collected packages: win32-setctime, loguru
Successfully installed loguru-0.5.3 win32-setctime-1.0.3


In [3]:
import argparse
from collections import namedtuple, defaultdict
from csv import DictWriter
import json
from pathlib import Path

import requests
from loguru import logger

In [4]:
columns = ['bodyType', 'brand', 'car_url', 'color', 'complectation_dict', 'description', 'engineDisplacement',
           'enginePower', 'location',
           'equipment_dict', 'fuelType', 'image', 'mileage', 'modelDate', 'model_info', 'model_name', 'name',
           'numberOfDoors', 'parsing_unixtime', 'priceCurrency', 'productionDate', 'sell_id', 'super_gen',
           'vehicleConfiguration', 'vehicleTransmission', 'vendor', 'Владельцы', 'Владение', 'ПТС', 'Привод',
           'Руль', 'Состояние', 'Таможня', 'price']

In [5]:
CarInfo = namedtuple('Car', columns)

In [8]:
brand_in_test = {
    'BMW': 4473,
    'VOLKSWAGEN': 4404,
    'NISSAN': 4393,
    'MERCEDES': 4180,
    'TOYOTA': 3913,
    'AUDI': 3421,
    'MITSUBISHI': 2843,
    'SKODA': 2741,
    'VOLVO': 1463,
    'HONDA': 1150,
    'INFINITI': 871,
    'LEXUS': 834
}

In [9]:
cars_in_test = sum(val for val in brand_in_test.values())
print(cars_in_test)

34686


In [10]:
proportion_in_test = {key: val / cars_in_test for key, val in brand_in_test.items()}
print(proportion_in_test)

{'BMW': 0.12895692786715102, 'VOLKSWAGEN': 0.12696765265524995, 'NISSAN': 0.12665052182436717, 'MERCEDES': 0.12050971573545523, 'TOYOTA': 0.11281208556766419, 'AUDI': 0.09862768840454363, 'MITSUBISHI': 0.08196390474543043, 'SKODA': 0.07902323704088104, 'VOLVO': 0.04217840050740933, 'HONDA': 0.033154586865017585, 'INFINITI': 0.02511099579080897, 'LEXUS': 0.02404428299602145}


In [11]:
def get_headers():
    headers = '''
Host: auto.ru
Connection: keep-alive
Content-Length: 99
x-requested-with: fetch
x-client-date: 1603066469874
x-csrf-token: c23073bb4cd65413662a41bd460fd8317459fe3ce6d83db1
x-page-request-id: 3c4800b60eb9e8c568e5a515f5cd4872
content-type: application/json
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36
x-client-app-version: 202010.16.122434
Accept: */*
Origin: https://auto.ru
Sec-Fetch-Site: same-origin
Sec-Fetch-Mode: same-origin
Sec-Fetch-Dest: empty
Referer: https://auto.ru/cars/bmw/all/?output_type=list&page=1
Accept-Encoding: gzip, deflate, br
Accept-Language: ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7
Cookie: autoru_sid=a%3Ag5f88f7d72b1bif6t0m99h0krigjgs2e.a783a85b576c8a7acaea4faafaa81ffc%7C1602811863959.604800.QhhqH0HWfM4BPWrsvjyfIg.8EXrpUR7Bq1a2gOSCnsb0HnXxvmHbYB9eF5Uz5o_bZE; autoruuid=g5f88f7d72b1bif6t0m99h0krigjgs2e.a783a85b576c8a7acaea4faafaa81ffc; suid=63abf8672f4a9e550bb96dd00b95ad21.8e155bfb838a5227bdd9dea1d2cbdc3e; _ym_uid=1602811867719278329; yuidcs=1; crookie=PZwS3/iYq2PFIw/dbrsqDVB/0e2v79Xe/8RsG6ySC8Djcl+mh/UCjYohgODaSkw7rMa6O9v7+RD56YSQKE2fhSkWxV8=; cmtchd=MTYwMjgyODMxMjMwNQ==; bltsr=1; yuidlt=1; yandexuid=360949521578055883; my=YwA%3D; _ym_isad=1; promo-app-banner-shown=1; promo-header-counter=4; _csrf_token=c23073bb4cd65413662a41bd460fd8317459fe3ce6d83db1; from=direct; X-Vertis-DC=myt; _ym_wasSynced=%7B%22time%22%3A1603066366683%2C%22params%22%3A%7B%22eu%22%3A0%7D%2C%22bkParams%22%3A%7B%7D%7D; gdpr=0; _ym_visorc_22753222=b; from_lifetime=1603066463083; _ym_d=1603066470'''
    headers = {line.split(': ')[0]: line.split(': ')[1] for line in headers.strip().split('\n')}
    return headers

In [13]:
def get_data_from_site(pange_num: int, brand: str) -> dict:
    """ получает данные о тачках БМВ с авто.ру, номер страницы передан в параметре page_num
    возвращает полученный с сайта json """

    base_url = 'https://auto.ru/-/ajax/desktop/listing/'
    params = dict(category='cars', section="all", output_type="list", page=pange_num,
                  catalog_filter=[{'mark': brand}], geo_id=[213], geo_radius=800)
    r = requests.post(base_url, json=params, headers=get_headers())
    r.raise_for_status()
    return r.json()

In [23]:
try:
    get_data_from_site(1,'BMW')['offers'][0]['documents']['purchase_date']
except KeyError:
    purchase_date = None

In [24]:
def add_car_data(car_data: list, data_json: dict):
    """ разбирает данные из переданного объекта data_json
    из массива offers, формирует объекты CarInfo и
    добавляет их в список car_data"""

    if 'offers' not in data_json:
        return 

    for car in data_json['offers']:
        if 'configuration' not in car['vehicle_info']:
            continue

        body_type_human = car['vehicle_info']['configuration']['human_name']
        body_type = car['vehicle_info']['configuration']['body_type']
        transmission = car['vehicle_info']['tech_param']['transmission']
        engine_volume = car['vehicle_info']['tech_param']['displacement']
        engine_volume = round(float(engine_volume) / 1000, 1)

        try:
            purchase_date = car['documents']['purchase_date']
        except KeyError:
            purchase_date = None

        try:
            owners_number = car['documents']['owners_number']
        except KeyError:
            owners_number = None

        try:
            description = car['description']
        except KeyError:
            description = None
        try:
            pts = car['documents']['pts']
        except KeyError:
            pts = None
        try:
            price = car['price_info']['RUR']
        except KeyError:
            continue

        brand = car['vehicle_info']['mark_info']['name']
        model = car['vehicle_info']['model_info']['name']
        sell_id = car['saleId']
        section = car['section']
        car_url = f'https://auto.ru/cars/{section}/sale/{brand.lower()}/{model.lower()}/{sell_id}/'

        info = CarInfo(bodyType=body_type_human,
                       brand=brand,
                       car_url=car_url,
                       image=car['state']['image_urls'][0]['sizes']['small'],
                       color=car['color_hex'],
                       complectation_dict=car['vehicle_info']['complectation'],
                       equipment_dict=car['vehicle_info']['equipment'],
                       model_info=car['vehicle_info']['model_info'],
                       model_name=model,
                       location=car['seller']['location']['region_info']['name'],
                       parsing_unixtime=car['additional_info']['fresh_date'],
                       priceCurrency=car['price_info']['currency'],
                       sell_id=sell_id,
                       super_gen=car['vehicle_info']['super_gen'],
                       vendor=car['vehicle_info']['vendor'],
                       fuelType=car['vehicle_info']['tech_param']['engine_type'],
                       modelDate=car['vehicle_info']['super_gen']['year_from'],
                       name=car['vehicle_info']['tech_param']['human_name'],
                       numberOfDoors=car['vehicle_info']['configuration']['doors_count'],
                       productionDate=car['documents']['year'],
                       vehicleConfiguration=body_type + " " + transmission + " " + str(engine_volume),
                       vehicleTransmission=transmission,
                       engineDisplacement=str(engine_volume) + ' LTR',
                       enginePower=str(car['vehicle_info']['tech_param']['power']) + ' N12',
                       description=description,
                       mileage=car['state']['mileage'],
                       Привод=car['vehicle_info']['tech_param']['gear_type'],
                       Руль=car['vehicle_info']['steering_wheel'],
                       Состояние=car['state']['state_not_beaten'],
                       Владельцы=owners_number,
                       ПТС=pts,
                       Таможня=car['documents']['custom_cleared'],
                       Владение=purchase_date,
                       price=price
                       )
        car_data.append(info)

In [25]:
def write_to_csv(car_data: list, output_folder_path: Path, page_num: int):
    """Записывает информацию о машинах из car_data в файл формата csv
    output_folder_path задает папку для сохранения
    page_num - нужен для наименования файла"""

    if not car_data:
        return

    filename = output_folder_path / f'train_{page_num}.csv'
    with open(filename, 'w', encoding='utf-8', newline='') as f:
        logger.info(f'writing {filename}, содержащий {len(car_data)} записей')
        writer = DictWriter(f, fieldnames=columns)
        writer.writeheader()
        writer.writerows([car._asdict() for car in car_data])

In [26]:
def pickup_brand(brand_stats: dict) -> str:
    cars_count = sum(val for val in brand_stats.values())
    brand_proportion = {key: val / cars_count for key, val in brand_stats.items()}

    proportion_in_test_copy = proportion_in_test.copy()
    for key, val in brand_proportion.items():
        proportion_in_test_copy[key] -= val

    return sorted(proportion_in_test_copy.items(),
                  key=lambda x: x[1],
                  reverse=True)[0][0] # first element, key of pair key:value

In [27]:
def parse_data(n_pages, output_folder, save_json, json_folder):
    """Входная точка в программу. Содержит верхнеуровневую логику.
    Собирает информацию с сайта, парсит и сохраняет в формате csv"""

    logger.info('Starting parse data from auto.ru about cars')

    if save_json:
        json_folder_path = Path(json_folder)
        json_folder_path.mkdir(parents=True, exist_ok=True)

    output_folder_path = Path(output_folder)
    output_folder_path.mkdir(parents=True, exist_ok=True)

    car_data = []
    brand_stats = defaultdict(lambda: 1)

    for page_num in range(1, n_pages + 1):

        brand = pickup_brand(brand_stats)

        logger.info(f'processing page: {page_num} {brand} {brand_stats[brand]}')
        try:
            data_json = get_data_from_site(brand_stats[brand], brand)
        except Exception as e:
            logger.error(f"Error in parsing: {e}")
            continue

        brand_stats[brand] += 1

        if save_json:
            with open(json_folder_path / f'page_{page_num}.json', 'w', encoding='utf-8') as f:
                json.dump(data_json, f)

        add_car_data(car_data, data_json)

        if page_num % 50 == 0:
            write_to_csv(car_data, output_folder_path, page_num)
            car_data = []

    write_to_csv(car_data, output_folder_path, page_num)

    logger.info('parsing successfully finished')

In [48]:
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("n_pages", help="number of pages to parse", type=int)
    parser.add_argument("-o", "-train_folder", help='folder to save parsed csv files', type=str, default='.')
    parser.add_argument("-j", "--save_json", help="save json files", action="store_true")
    parser.add_argument("--json_folder", help='folder for json files', type=str, default='json')
    args = parser.parse_args()
    print(args)
    ##parse_data(args.n_pages,
    #           args.output_folder,
    #           args.save_json,
    #           args.json_folder)

In [46]:
if __name__ == '__main__':
    main()

usage: ipykernel_launcher.py [-h] [-o O] [-j] [--json_folder JSON_FOLDER] n_pages
ipykernel_launcher.py: error: argument n_pages: invalid int value: 'C:\\Users\\User-PC\\AppData\\Roaming\\jupyter\\runtime\\kernel-0674999f-5e03-4dbe-ba45-b670ca61e533.json'
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\User-PC\anaconda3\lib\argparse.py", line 2422, in _get_value
    result = type_func(arg_string)
ValueError: invalid literal for int() with base 10: 'C:\\Users\\User-PC\\AppData\\Roaming\\jupyter\\runtime\\kernel-0674999f-5e03-4dbe-ba45-b670ca61e533.json'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\User-PC\anaconda3\lib\argparse.py", line 1800, in parse_known_args
    namespace, args = self._parse_known_args(args, namespace)
  File "C:\Users\User-PC\anaconda3\lib\argparse.py", line 2009, in _parse_known_args
    stop_index = consume_positionals(start_index)
  File "C:\Users\User-PC\anaconda3\lib\argparse.py", line 1965, in consume_positionals
    take_action(action, args)
  File "C:\Users\User-PC\anaconda3\lib\argparse.py", line 1858, in take_action
    argument_values = self._get_values(action, argument_strings)
  File "C:\Users\User-PC\anaconda3\lib\argpars

TypeError: object of type 'NoneType' has no len()

In [57]:
parse_data(2000,'train_folder',1,'json_folder')

2021-03-06 12:37:01.195 | INFO     | __main__:parse_data:5 - Starting parse data from auto.ru about cars
2021-03-06 12:37:01.196 | INFO     | __main__:parse_data:21 - processing page: 1 BMW 1
2021-03-06 12:37:01.837 | INFO     | __main__:parse_data:21 - processing page: 2 VOLKSWAGEN 1
2021-03-06 12:37:02.350 | INFO     | __main__:parse_data:21 - processing page: 3 NISSAN 1
2021-03-06 12:37:03.023 | INFO     | __main__:parse_data:21 - processing page: 4 MERCEDES 1
2021-03-06 12:37:03.523 | INFO     | __main__:parse_data:21 - processing page: 5 TOYOTA 1
2021-03-06 12:37:04.275 | INFO     | __main__:parse_data:21 - processing page: 6 AUDI 1
2021-03-06 12:37:04.973 | INFO     | __main__:parse_data:21 - processing page: 7 MITSUBISHI 1
2021-03-06 12:37:05.596 | INFO     | __main__:parse_data:21 - processing page: 8 SKODA 1
2021-03-06 12:37:06.213 | INFO     | __main__:parse_data:21 - processing page: 9 VOLVO 1
2021-03-06 12:37:06.812 | INFO     | __main__:parse_data:21 - processing page: 10 

2021-03-06 12:37:56.905 | INFO     | __main__:parse_data:21 - processing page: 89 LEXUS 2
2021-03-06 12:37:57.730 | INFO     | __main__:parse_data:21 - processing page: 90 TOYOTA 11
2021-03-06 12:37:58.462 | INFO     | __main__:parse_data:21 - processing page: 91 HONDA 3
2021-03-06 12:37:58.994 | INFO     | __main__:parse_data:21 - processing page: 92 MITSUBISHI 8
2021-03-06 12:37:59.552 | INFO     | __main__:parse_data:21 - processing page: 93 MERCEDES 12
2021-03-06 12:38:00.108 | INFO     | __main__:parse_data:21 - processing page: 94 BMW 13
2021-03-06 12:38:00.740 | INFO     | __main__:parse_data:21 - processing page: 95 VOLVO 4
2021-03-06 12:38:01.499 | INFO     | __main__:parse_data:21 - processing page: 96 VOLKSWAGEN 13
2021-03-06 12:38:02.019 | INFO     | __main__:parse_data:21 - processing page: 97 NISSAN 13
2021-03-06 12:38:02.680 | INFO     | __main__:parse_data:21 - processing page: 98 AUDI 10
2021-03-06 12:38:03.257 | INFO     | __main__:parse_data:21 - processing page: 99 

2021-03-06 12:38:51.977 | INFO     | __main__:parse_data:21 - processing page: 175 NISSAN 23
2021-03-06 12:38:52.545 | INFO     | __main__:parse_data:21 - processing page: 176 MERCEDES 22
2021-03-06 12:38:53.150 | INFO     | __main__:parse_data:21 - processing page: 177 AUDI 18
2021-03-06 12:38:53.654 | INFO     | __main__:parse_data:21 - processing page: 178 MITSUBISHI 15
2021-03-06 12:38:54.141 | INFO     | __main__:parse_data:21 - processing page: 179 BMW 24
2021-03-06 12:38:54.678 | INFO     | __main__:parse_data:21 - processing page: 180 TOYOTA 21
2021-03-06 12:38:55.301 | INFO     | __main__:parse_data:21 - processing page: 181 VOLKSWAGEN 24
2021-03-06 12:38:55.828 | INFO     | __main__:parse_data:21 - processing page: 182 NISSAN 24
2021-03-06 12:38:56.399 | INFO     | __main__:parse_data:21 - processing page: 183 HONDA 6
2021-03-06 12:38:56.935 | INFO     | __main__:parse_data:21 - processing page: 184 MERCEDES 23
2021-03-06 12:38:57.468 | INFO     | __main__:parse_data:21 - pro

2021-03-06 12:39:39.743 | INFO     | __main__:parse_data:21 - processing page: 261 VOLKSWAGEN 34
2021-03-06 12:39:40.157 | INFO     | __main__:parse_data:21 - processing page: 262 NISSAN 34
2021-03-06 12:39:40.713 | INFO     | __main__:parse_data:21 - processing page: 263 SKODA 21
2021-03-06 12:39:41.298 | INFO     | __main__:parse_data:21 - processing page: 264 MITSUBISHI 22
2021-03-06 12:39:41.641 | INFO     | __main__:parse_data:21 - processing page: 265 BMW 35
2021-03-06 12:39:42.135 | INFO     | __main__:parse_data:21 - processing page: 266 MERCEDES 33
2021-03-06 12:39:42.709 | INFO     | __main__:parse_data:21 - processing page: 267 AUDI 27
2021-03-06 12:39:43.127 | INFO     | __main__:parse_data:21 - processing page: 268 TOYOTA 31
2021-03-06 12:39:43.644 | INFO     | __main__:parse_data:21 - processing page: 269 VOLKSWAGEN 35
2021-03-06 12:39:44.070 | INFO     | __main__:parse_data:21 - processing page: 270 NISSAN 35
2021-03-06 12:39:44.513 | INFO     | __main__:parse_data:21 - 

2021-03-06 12:40:22.341 | INFO     | __main__:parse_data:21 - processing page: 348 NISSAN 45
2021-03-06 12:40:22.804 | INFO     | __main__:parse_data:21 - processing page: 349 AUDI 35
2021-03-06 12:40:23.275 | INFO     | __main__:parse_data:21 - processing page: 350 BMW 46
2021-03-06 12:40:23.732 | INFO     | __main__:write_to_csv:11 - writing train_folder\train_350.csv, содержащий 1850 записей
2021-03-06 12:40:23.996 | INFO     | __main__:parse_data:21 - processing page: 351 MERCEDES 43
2021-03-06 12:40:24.524 | INFO     | __main__:parse_data:21 - processing page: 352 SKODA 28
2021-03-06 12:40:24.917 | INFO     | __main__:parse_data:21 - processing page: 353 VOLVO 15
2021-03-06 12:40:25.450 | INFO     | __main__:parse_data:21 - processing page: 354 VOLKSWAGEN 46
2021-03-06 12:40:25.896 | INFO     | __main__:parse_data:21 - processing page: 355 NISSAN 46
2021-03-06 12:40:26.265 | INFO     | __main__:parse_data:21 - processing page: 356 TOYOTA 41
2021-03-06 12:40:26.743 | INFO     | __m

2021-03-06 12:41:03.334 | INFO     | __main__:parse_data:21 - processing page: 434 VOLKSWAGEN 56
2021-03-06 12:41:03.896 | INFO     | __main__:parse_data:21 - processing page: 435 BMW 57
2021-03-06 12:41:04.397 | INFO     | __main__:parse_data:21 - processing page: 436 NISSAN 56
2021-03-06 12:41:04.848 | INFO     | __main__:parse_data:21 - processing page: 437 TOYOTA 50
2021-03-06 12:41:05.229 | INFO     | __main__:parse_data:21 - processing page: 438 SKODA 35
2021-03-06 12:41:05.683 | INFO     | __main__:parse_data:21 - processing page: 439 AUDI 44
2021-03-06 12:41:06.072 | INFO     | __main__:parse_data:21 - processing page: 440 MERCEDES 54
2021-03-06 12:41:06.505 | INFO     | __main__:parse_data:21 - processing page: 441 VOLKSWAGEN 57
2021-03-06 12:41:06.948 | INFO     | __main__:parse_data:21 - processing page: 442 BMW 58
2021-03-06 12:41:07.438 | INFO     | __main__:parse_data:21 - processing page: 443 NISSAN 57
2021-03-06 12:41:07.832 | INFO     | __main__:parse_data:21 - process

2021-03-06 12:41:44.307 | INFO     | __main__:parse_data:21 - processing page: 520 BMW 68
2021-03-06 12:41:44.740 | INFO     | __main__:parse_data:21 - processing page: 521 VOLKSWAGEN 67
2021-03-06 12:41:45.100 | INFO     | __main__:parse_data:21 - processing page: 522 AUDI 52
2021-03-06 12:41:45.687 | INFO     | __main__:parse_data:21 - processing page: 523 NISSAN 67
2021-03-06 12:41:46.208 | INFO     | __main__:parse_data:21 - processing page: 524 MERCEDES 64
2021-03-06 12:41:46.681 | INFO     | __main__:parse_data:21 - processing page: 525 TOYOTA 60
2021-03-06 12:41:47.034 | INFO     | __main__:parse_data:21 - processing page: 526 INFINITI 13
2021-03-06 12:41:47.503 | INFO     | __main__:parse_data:21 - processing page: 527 SKODA 42
2021-03-06 12:41:47.895 | INFO     | __main__:parse_data:21 - processing page: 528 BMW 69
2021-03-06 12:41:48.461 | INFO     | __main__:parse_data:21 - processing page: 529 VOLKSWAGEN 68
2021-03-06 12:41:48.927 | INFO     | __main__:parse_data:21 - proce

2021-03-06 12:42:29.212 | INFO     | __main__:parse_data:21 - processing page: 606 BMW 79
2021-03-06 12:42:29.616 | INFO     | __main__:parse_data:21 - processing page: 607 HONDA 20
2021-03-06 12:42:30.121 | INFO     | __main__:parse_data:21 - processing page: 608 MERCEDES 74
2021-03-06 12:42:30.941 | INFO     | __main__:parse_data:21 - processing page: 609 VOLKSWAGEN 78
2021-03-06 12:42:31.328 | INFO     | __main__:parse_data:21 - processing page: 610 NISSAN 78
2021-03-06 12:42:32.049 | INFO     | __main__:parse_data:21 - processing page: 611 AUDI 61
2021-03-06 12:42:32.746 | INFO     | __main__:parse_data:21 - processing page: 612 BMW 80
2021-03-06 12:42:33.185 | INFO     | __main__:parse_data:21 - processing page: 613 TOYOTA 70
2021-03-06 12:42:33.756 | INFO     | __main__:parse_data:21 - processing page: 614 SKODA 49
2021-03-06 12:42:34.266 | INFO     | __main__:parse_data:21 - processing page: 615 VOLKSWAGEN 79
2021-03-06 12:42:34.631 | INFO     | __main__:parse_data:21 - processi

2021-03-06 12:43:17.877 | INFO     | __main__:parse_data:21 - processing page: 693 AUDI 69
2021-03-06 12:43:18.284 | INFO     | __main__:parse_data:21 - processing page: 694 TOYOTA 79
2021-03-06 12:43:18.756 | INFO     | __main__:parse_data:21 - processing page: 695 VOLKSWAGEN 89
2021-03-06 12:43:19.329 | INFO     | __main__:parse_data:21 - processing page: 696 NISSAN 89
2021-03-06 12:43:19.762 | INFO     | __main__:parse_data:21 - processing page: 697 HONDA 23
2021-03-06 12:43:20.128 | INFO     | __main__:parse_data:21 - processing page: 698 MERCEDES 85
2021-03-06 12:43:20.662 | INFO     | __main__:parse_data:21 - processing page: 699 BMW 91
2021-03-06 12:43:21.163 | INFO     | __main__:parse_data:21 - processing page: 700 MITSUBISHI 58
2021-03-06 12:43:21.536 | INFO     | __main__:write_to_csv:11 - writing train_folder\train_700.csv, содержащий 1850 записей
2021-03-06 12:43:21.828 | INFO     | __main__:parse_data:21 - processing page: 701 VOLKSWAGEN 90
2021-03-06 12:43:22.340 | INFO 

2021-03-06 12:44:00.964 | INFO     | __main__:parse_data:21 - processing page: 779 VOLVO 33
2021-03-06 12:44:01.356 | INFO     | __main__:parse_data:21 - processing page: 780 VOLKSWAGEN 100
2021-03-06 12:44:01.874 | INFO     | __main__:parse_data:21 - processing page: 781 MERCEDES 95
2021-03-06 12:44:02.314 | INFO     | __main__:parse_data:21 - processing page: 782 TOYOTA 89
2021-03-06 12:44:02.863 | INFO     | __main__:parse_data:21 - processing page: 783 NISSAN 100
2021-03-06 12:44:03.390 | INFO     | __main__:parse_data:21 - processing page: 784 BMW 102
2021-03-06 12:44:04.549 | INFO     | __main__:parse_data:21 - processing page: 785 AUDI 78
2021-03-06 12:44:04.978 | INFO     | __main__:parse_data:21 - processing page: 786 HONDA 26
2021-03-06 12:44:05.448 | INFO     | __main__:parse_data:21 - processing page: 787 MITSUBISHI 65
2021-03-06 12:44:05.808 | INFO     | __main__:parse_data:21 - processing page: 788 VOLKSWAGEN 101
2021-03-06 12:44:06.553 | INFO     | __main__:parse_data:21

2021-03-06 12:44:50.397 | INFO     | __main__:parse_data:21 - processing page: 864 MERCEDES 105
2021-03-06 12:44:50.984 | INFO     | __main__:parse_data:21 - processing page: 865 AUDI 86
2021-03-06 12:44:51.449 | INFO     | __main__:parse_data:21 - processing page: 866 VOLKSWAGEN 111
2021-03-06 12:44:52.073 | INFO     | __main__:parse_data:21 - processing page: 867 SKODA 69
2021-03-06 12:44:52.489 | INFO     | __main__:parse_data:21 - processing page: 868 BMW 113
2021-03-06 12:44:53.126 | INFO     | __main__:parse_data:21 - processing page: 869 NISSAN 111
2021-03-06 12:44:53.630 | INFO     | __main__:parse_data:21 - processing page: 870 TOYOTA 99
2021-03-06 12:44:54.198 | INFO     | __main__:parse_data:21 - processing page: 871 MITSUBISHI 72
2021-03-06 12:44:54.756 | INFO     | __main__:parse_data:21 - processing page: 872 MERCEDES 106
2021-03-06 12:44:55.264 | INFO     | __main__:parse_data:21 - processing page: 873 HONDA 29
2021-03-06 12:44:55.643 | INFO     | __main__:parse_data:21 

2021-03-06 12:45:39.749 | INFO     | __main__:parse_data:21 - processing page: 950 NISSAN 121
2021-03-06 12:45:40.416 | INFO     | __main__:write_to_csv:11 - writing train_folder\train_950.csv, содержащий 1850 записей
2021-03-06 12:45:40.702 | INFO     | __main__:parse_data:21 - processing page: 951 TOYOTA 108
2021-03-06 12:45:41.377 | INFO     | __main__:parse_data:21 - processing page: 952 VOLKSWAGEN 122
2021-03-06 12:45:41.920 | INFO     | __main__:parse_data:21 - processing page: 953 BMW 124
2021-03-06 12:45:42.511 | INFO     | __main__:parse_data:21 - processing page: 954 MERCEDES 116
2021-03-06 12:45:43.204 | INFO     | __main__:parse_data:21 - processing page: 955 NISSAN 122
2021-03-06 12:45:43.842 | INFO     | __main__:parse_data:21 - processing page: 956 SKODA 76
2021-03-06 12:45:44.282 | INFO     | __main__:parse_data:21 - processing page: 957 AUDI 95
2021-03-06 12:45:44.806 | INFO     | __main__:parse_data:21 - processing page: 958 MITSUBISHI 79
2021-03-06 12:45:45.231 | INF

2021-03-06 12:46:29.316 | INFO     | __main__:parse_data:21 - processing page: 1035 NISSAN 132
2021-03-06 12:46:29.831 | INFO     | __main__:parse_data:21 - processing page: 1036 INFINITI 26
2021-03-06 12:46:30.259 | INFO     | __main__:parse_data:21 - processing page: 1037 AUDI 103
2021-03-06 12:46:30.944 | INFO     | __main__:parse_data:21 - processing page: 1038 MERCEDES 126
2021-03-06 12:46:31.511 | INFO     | __main__:parse_data:21 - processing page: 1039 TOYOTA 118
2021-03-06 12:46:32.087 | INFO     | __main__:parse_data:21 - processing page: 1040 BMW 135
2021-03-06 12:46:32.809 | INFO     | __main__:parse_data:21 - processing page: 1041 VOLKSWAGEN 133
2021-03-06 12:46:33.516 | INFO     | __main__:parse_data:21 - processing page: 1042 VOLVO 44
2021-03-06 12:46:33.981 | INFO     | __main__:parse_data:21 - processing page: 1043 NISSAN 133
2021-03-06 12:46:34.645 | INFO     | __main__:parse_data:21 - processing page: 1044 MITSUBISHI 86
2021-03-06 12:46:35.122 | INFO     | __main__:p

2021-03-06 12:47:17.296 | INFO     | __main__:parse_data:21 - processing page: 1119 VOLKSWAGEN 143
2021-03-06 12:47:17.938 | INFO     | __main__:parse_data:21 - processing page: 1120 AUDI 111
2021-03-06 12:47:18.626 | INFO     | __main__:parse_data:21 - processing page: 1121 SKODA 89
2021-03-06 12:47:19.137 | INFO     | __main__:parse_data:21 - processing page: 1122 MERCEDES 136
2021-03-06 12:47:19.777 | INFO     | __main__:parse_data:21 - processing page: 1123 NISSAN 143
2021-03-06 12:47:20.261 | INFO     | __main__:parse_data:21 - processing page: 1124 INFINITI 28
2021-03-06 12:47:20.550 | INFO     | __main__:parse_data:21 - processing page: 1125 BMW 146
2021-03-06 12:47:21.261 | INFO     | __main__:parse_data:21 - processing page: 1126 VOLKSWAGEN 144
2021-03-06 12:47:21.815 | INFO     | __main__:parse_data:21 - processing page: 1127 TOYOTA 128
2021-03-06 12:47:22.391 | INFO     | __main__:parse_data:21 - processing page: 1128 LEXUS 27
2021-03-06 12:47:22.835 | INFO     | __main__:pa

2021-03-06 12:48:05.310 | INFO     | __main__:parse_data:21 - processing page: 1203 MITSUBISHI 99
2021-03-06 12:48:05.750 | INFO     | __main__:parse_data:21 - processing page: 1204 MERCEDES 146
2021-03-06 12:48:06.362 | INFO     | __main__:parse_data:21 - processing page: 1205 VOLKSWAGEN 154
2021-03-06 12:48:07.048 | INFO     | __main__:parse_data:21 - processing page: 1206 HONDA 40
2021-03-06 12:48:07.424 | INFO     | __main__:parse_data:21 - processing page: 1207 TOYOTA 137
2021-03-06 12:48:08.041 | INFO     | __main__:parse_data:21 - processing page: 1208 VOLVO 51
2021-03-06 12:48:08.524 | INFO     | __main__:parse_data:21 - processing page: 1209 NISSAN 154
2021-03-06 12:48:09.166 | INFO     | __main__:parse_data:21 - processing page: 1210 SKODA 96
2021-03-06 12:48:09.549 | INFO     | __main__:parse_data:21 - processing page: 1211 BMW 157
2021-03-06 12:48:10.269 | INFO     | __main__:parse_data:21 - processing page: 1212 AUDI 120
2021-03-06 12:48:10.912 | INFO     | __main__:parse_

2021-03-06 12:48:54.767 | INFO     | __main__:parse_data:21 - processing page: 1288 NISSAN 164
2021-03-06 12:48:55.294 | INFO     | __main__:parse_data:21 - processing page: 1289 BMW 167
2021-03-06 12:48:56.004 | INFO     | __main__:parse_data:21 - processing page: 1290 MITSUBISHI 106
2021-03-06 12:48:56.525 | INFO     | __main__:parse_data:21 - processing page: 1291 AUDI 128
2021-03-06 12:48:57.098 | INFO     | __main__:parse_data:21 - processing page: 1292 VOLKSWAGEN 165
2021-03-06 12:48:57.652 | INFO     | __main__:parse_data:21 - processing page: 1293 LEXUS 31
2021-03-06 12:48:58.088 | INFO     | __main__:parse_data:21 - processing page: 1294 BMW 168
2021-03-06 12:48:58.665 | INFO     | __main__:parse_data:21 - processing page: 1295 NISSAN 165
2021-03-06 12:48:59.330 | INFO     | __main__:parse_data:21 - processing page: 1296 MERCEDES 157
2021-03-06 12:48:59.834 | INFO     | __main__:parse_data:21 - processing page: 1297 TOYOTA 147
2021-03-06 12:49:00.344 | INFO     | __main__:pars

2021-03-06 12:49:45.505 | INFO     | __main__:parse_data:21 - processing page: 1372 MITSUBISHI 113
2021-03-06 12:49:46.300 | INFO     | __main__:parse_data:21 - processing page: 1373 BMW 178
2021-03-06 12:49:47.022 | INFO     | __main__:parse_data:21 - processing page: 1374 SKODA 109
2021-03-06 12:49:47.623 | INFO     | __main__:parse_data:21 - processing page: 1375 NISSAN 175
2021-03-06 12:49:48.311 | INFO     | __main__:parse_data:21 - processing page: 1376 VOLVO 58
2021-03-06 12:49:48.966 | INFO     | __main__:parse_data:21 - processing page: 1377 TOYOTA 156
2021-03-06 12:49:49.643 | INFO     | __main__:parse_data:21 - processing page: 1378 LEXUS 33
2021-03-06 12:49:50.156 | INFO     | __main__:parse_data:21 - processing page: 1379 MERCEDES 167
2021-03-06 12:49:50.871 | INFO     | __main__:parse_data:21 - processing page: 1380 VOLKSWAGEN 176
2021-03-06 12:49:51.433 | INFO     | __main__:parse_data:21 - processing page: 1381 BMW 179
2021-03-06 12:49:52.160 | INFO     | __main__:parse

2021-03-06 12:50:36.029 | INFO     | __main__:parse_data:21 - processing page: 1456 TOYOTA 165
2021-03-06 12:50:36.740 | INFO     | __main__:parse_data:21 - processing page: 1457 VOLKSWAGEN 186
2021-03-06 12:50:37.457 | INFO     | __main__:parse_data:21 - processing page: 1458 BMW 189
2021-03-06 12:50:38.120 | INFO     | __main__:parse_data:21 - processing page: 1459 MITSUBISHI 120
2021-03-06 12:50:38.760 | INFO     | __main__:parse_data:21 - processing page: 1460 LEXUS 35
2021-03-06 12:50:39.275 | INFO     | __main__:parse_data:21 - processing page: 1461 NISSAN 186
2021-03-06 12:50:39.893 | INFO     | __main__:parse_data:21 - processing page: 1462 MERCEDES 177
2021-03-06 12:50:40.577 | INFO     | __main__:parse_data:21 - processing page: 1463 SKODA 116
2021-03-06 12:50:41.199 | INFO     | __main__:parse_data:21 - processing page: 1464 AUDI 145
2021-03-06 12:50:41.729 | INFO     | __main__:parse_data:21 - processing page: 1465 TOYOTA 166
2021-03-06 12:50:42.327 | INFO     | __main__:pa

KeyboardInterrupt: 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1849 entries, 0 to 1848
Data columns (total 34 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   bodyType              1849 non-null   object 
 1   brand                 1849 non-null   object 
 2   car_url               1849 non-null   object 
 3   color                 1849 non-null   object 
 4   complectation_dict    1849 non-null   object 
 5   description           1847 non-null   object 
 6   engineDisplacement    1849 non-null   object 
 7   enginePower           1849 non-null   object 
 8   location              1849 non-null   object 
 9   equipment_dict        1849 non-null   object 
 10  fuelType              1849 non-null   object 
 11  image                 1849 non-null   object 
 12  mileage               1849 non-null   int64  
 13  modelDate             1849 non-null   int64  
 14  model_info            1849 non-null   object 
 15  model_name           