# In this notebook I am going to load data about tires from the emex.ru 

### First of all, we need to add simple libraries for working with dataframes, like writing or reading csv files

In [1]:
import pandas as pd
import numpy as np
import csv

### Then we need to install libraries for web scraping
- Selenium: for requesting and getting string from the ulr with the JavaScript 
- BeautifulSoup: used to seperate all items and tags from the given string
- WebDriver-manager: used to work with Chrome(in this case) browser

In [2]:
pip install beautifulsoup4


Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install selenium

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install webdriver-manager


Note: you may need to restart the kernel to use updated packages.


In [5]:

from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
import re
import time


### Now we are ready to work with web page
1) We need to select url
<br>2) Request to that url
<br>3) Clicking the button 'eще' five times (in this case five is enough)
<br>4) Get html code as string from the url
<br>5) Seperate all tags by using BeatifulSoup


In [6]:
url = 'https://emex.ru/Accessories/Accessories?CAT_ID=17&s%5B0%5D.i=Price&s%5B0%5D.d=0&s%5B1%5D.i=Rating&s%5B1%5D.d=2'



In [7]:
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get(url)

[WDM] - Current google-chrome version is 83.0.4103
[WDM] - Get LATEST driver version for 83.0.4103
[WDM] - Driver [/Users/sabina/.wdm/drivers/chromedriver/mac64/83.0.4103.39/chromedriver] found in cache


 


In [8]:
for i in range(5):
    try:
        loadMoreButton = driver.find_element_by_xpath("//div[contains(text(), 'Ещё')]")
        time.sleep(2)
        loadMoreButton.click()
        time.sleep(5)

    except Exception as e:
        print (e)


In [9]:
page = driver.page_source
page_soup = BeautifulSoup(page,'html.parser')

In [10]:
title = page_soup.title
title

<title>Шины для легковых автомобилей от ведущих производителей: Michelin, Pirelli, Dunlop, Good year</title>

### Now we need to get following info about tires:
1) Title
<br>2) Link to image
<br>3) Width
<br>4) Height
<br>5) Diameter
<br>6) Cost
And we need to save all of this info in dictionary

In [11]:
content = page_soup.find_all('div', 'item')
rows = []
for item in content:
    imges = item.find_all('img')
    link = []
    for i in imges:
        link.append(i['src'])
        
    title = item.find('span', 'name').get_text()
    info = item.find_all('div', 'desc')
    radius = ''
    for i in info:
        Width = i.find_all('span')[0].get_text()
        Height = i.find_all('span')[2].get_text()
        Diameter = i.find_all('span')[5].get_text()
    cost = item.find('a', 'styledbutton').get_text().replace('\xa0', '').replace('\n            ','').replace('\n', '')
    row = {
        'title': title,
        'link': link,
        'Width': Width,
        'Height': Height,
        'Diameter':Diameter,
        'Cost': cost
    }
    rows.append(row)
    
rows
    


[{'title': 'R15 185/60 Cordiant SPORT-2 PS-501 (лето)',
  'link': ['https://www.emex.ru/Catalog/Catalog/GetImage?id=10164250&h=150'],
  'Width': '185',
  'Height': '60',
  'Diameter': '15',
  'Cost': 'Цены от 2583 ₽'},
 {'title': '195/65R15 91T AE01',
  'link': ['https://www.emex.ru/Catalog/Catalog/GetImage?id=5092633&h=150'],
  'Width': '195',
  'Height': '65',
  'Diameter': '15',
  'Cost': 'Цены от 4431 ₽'},
 {'title': 'Michelin 91V Energy Saver',
  'link': ['https://www.emex.ru/Catalog/Catalog/GetImage?id=5093385&h=150'],
  'Width': '205',
  'Height': '55',
  'Diameter': '16',
  'Cost': 'Цены от 4307 ₽'},
 {'title': 'R13 175/70 Amtel Planet DC В-105 (лето)',
  'link': ['https://www.emex.ru/Catalog/Catalog/GetImage?id=5098448&h=150'],
  'Width': '175',
  'Height': '70',
  'Diameter': '13',
  'Cost': 'Цены от 2808 ₽'},
 {'title': '185/60R14 82T AE01',
  'link': ['https://www.emex.ru/Catalog/Catalog/GetImage?id=5092614&h=150'],
  'Width': '185',
  'Height': '60',
  'Diameter': '14',
  

### And last step, we need to save our dictionary to csv file row by row

In [12]:
with open('tires.csv', 'a+') as csvfile:
    fieldnames = ['title', 'link', 'Width', 'Height', 'Diameter', 'Cost']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    
    for row in rows:
        writer.writerow(row)
        

### In the future we can automate this process by writing functions, however first of all we nee