# Scraping [Cultura](https://www.cultura.com/boutiques/des-prix-des-promos.html)

## Initialisation

In [None]:
import time
import random
import re
from pprint import pprint

from selenium import webdriver 
from selenium.webdriver.common.keys import Keys
from pymongo import MongoClient

In [None]:
browser = webdriver.Chrome('/usr/local/bin/chromedriver')

url_to_scrap = "https://www.ikea.com/fr/fr/campaigns/les-indispensables-a-petits-prix-pub0d9cd6c0?icid=fr|20210315|menu|indispensables"

browser.get(url_to_scrap)

# Click to accept cookies
time.sleep(2)
accept_cookie_button = browser.find_element_by_css_selector('#onetrust-accept-btn-handler')
accept_cookie_button.click()

## Parcourir un site

In [None]:
def random_sleep() :
    ''' Pause aléatoire de maximum 3 secondes '''
    random_number = random.random() * 3 # entre 0 et 3 secondes
    time.sleep(random_number)

In [None]:
def get_products(browser, title):
    elements = browser.find_elements_by_css_selector('.range-revamp-product-compact')
    data_elements = []
    for elt in elements:
        data_element = {}
        data_element['title'] = title
        data_element['brand'] =  elt.find_element_by_class_name('range-revamp-header-section__title--small').text
        data_element['price'] = elt.find_element_by_class_name('range-revamp-price').text
        data_element['name'] = elt.find_element_by_class_name('range-revamp-header-section__description-text').text
        data_element['rating'] = elt.find_element_by_class_name('range-revamp-average-rating').get_attribute('aria-label')
        data_element['comments'] = elt.find_element_by_class_name('range-revamp-average-rating__reviews').text

        data_element['price'] = re.findall('([\d+,]+)', data_element['price'])[0]
        data_element['comments'] = data_element['comments'].strip('()')
        data_element['rating'] = (re.findall('Avis:\s([\d+.]+)', data_element['rating'])[0])
        data_elements.append(data_element)

    return data_elements

In [None]:
def parse_urls() :
    ''' Clique sur les urls '''
    print("Browsing...")

    selector_css = ".c130l69h .hqs7k6k > a"
    links = browser.find_elements_by_css_selector(selector_css)

    data_list = []

    for index in range(0, len(links)) :
        random_sleep()
        url = browser.find_elements_by_css_selector(selector_css)[index]
        title = url.text
        url.click()
        # An implicit wait tells WebDriver to poll the DOM for a certain amount of time when trying to find
        # any element (or elements) not immediately available.
        browser.implicitly_wait(10) 
        products = get_products(browser, title)
        data_list = data_list + products
        browser.back()

    print("Browsing done.")

    return data_list

# Execution
IKEA_data = parse_urls()
pprint(IKEA_data)

## MongoDB

In [None]:
# Connexion MongoDB
try:
    client = MongoClient('localhost', 27017)
    db = client.IKEA_scraping
    products = db.products
except:
    print("Ooups! la connexion n'a pas pu abouti")

In [None]:
# Envoi vers MongoDB
for index, data in enumerate(IKEA_data):
    json_to_import = data
    
    products.insert_one(json_to_import)

In [154]:
# Récupération depuis MongoDB
my_products_mongo = products.find({}).limit(12)
[pprint(product) for product in my_products_mongo]

# Le produit le mieux noté pour chaque page
#[product for product in products.find({},{ "_id": 0, "title": 0}).sort("data.ratings", -1)]

{'_id': ObjectId('60659bbbcaee7d1cc9dd62f1'),
 'brand': 'BÄSTIS',
 'comments': '164',
 'name': 'Brosse adhésive',
 'price': '1',
 'rating': '4.6',
 'title': 'Indispensables à moins de 5€'}
{'_id': ObjectId('60659bbbcaee7d1cc9dd62f2'),
 'brand': 'BLASKA',
 'comments': '60',
 'name': 'Panier à linge',
 'price': '3,99',
 'rating': '4.8',
 'title': 'Indispensables à moins de 5€'}
{'_id': ObjectId('60659bbbcaee7d1cc9dd62f3'),
 'brand': 'BLASKA',
 'comments': '184',
 'name': 'Pelle et balayette',
 'price': '1,50',
 'rating': '3.9',
 'title': 'Indispensables à moins de 5€'}
{'_id': ObjectId('60659bbbcaee7d1cc9dd62f4'),
 'brand': 'TORKIS',
 'comments': '59',
 'name': 'Panier avec 30 pinces à linge',
 'price': '2,99',
 'rating': '4.1',
 'title': 'Indispensables à moins de 5€'}
{'_id': ObjectId('60659bbbcaee7d1cc9dd62f5'),
 'brand': 'ÅBYÅN',
 'comments': '117',
 'name': 'Éponge de toilette',
 'price': '2,99',
 'rating': '4.2',
 'title': 'Indispensables à moins de 5€'}
{'_id': ObjectId('60659bbbc

[None, None, None, None, None, None, None, None, None, None, None, None]