In [5]:
from config import *
from db import *

In [6]:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import pymongo
import os

from time import sleep

In [7]:
client = pymongo.MongoClient(f"mongodb+srv://{mongo_user}:{mongo_password}@{mongo_url}")
db = client.test

In [3]:
try:
    current_path = os.path.dirname(os.path.abspath(__file__))
except:
    current_path = '.'

In [4]:
def init_driver(gecko_driver, load_images = True, user_agent = '', is_headless = False):
    firefox_profile = webdriver.FirefoxProfile()
    
    firefox_profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so', False)
    firefox_profile.set_preference('media.volume_scale', "0.0")
    firefox_profile.set_preference("dom.webnotifications.enabled", False)
    
    if not load_images:
        firefox_profile.set_preference('permissions.default.image', 2)
    if user_agent != '':
        firefox_profile.set_preference("general.useragent.override", user_agent)
    
    options = Options()
    options.headless = is_headless
    
    driver = webdriver.Firefox(executable_path = f"{current_path}/{gecko_driver}",
                              firefox_profile = firefox_profile,
                              options = options)
    
    return driver

In [5]:
def get_url(page_url, driver):
    driver.get(page_url)
    
    sleep(page_load)
    close_popup = driver.find_elements_by_css_selector('.acsFocusFirst')
    if len(close_popup) > 0:
        close_popup[0].click()

def get_products(driver):
    products = driver.find_elements_by_css_selector('div.shelf-thumbs .standard-thumb')

    products_info = []

    for product in products:
        # Get product title
        product_title = ''
        if len(product.find_elements_by_css_selector('div.product-details-container .details .title .thumb-header')) > 0:
            product_title = product.find_elements_by_css_selector('div.product-details-container .details .title .thumb-header')[0].text

        # Get product url
        product_url = ''
        if len(product.find_elements_by_css_selector('a.product-link')) > 0:
            product_url = product.find_elements_by_css_selector('a.product-link')[0].get_attribute('href')

        # Get product current price
        current_price = 0
        if len(product.find_elements_by_css_selector('div.product-details-container .all-price-sections .price-current')) > 0:
            current_price = product.find_elements_by_css_selector('div.product-details-container .all-price-sections .price-current')[0].text
            current_price = current_price.replace('\n', '')
            current_price = current_price.replace('$', '')
            current_price = current_price.replace(',', '')
            if "to" in current_price:
                current_price = current_price.split('.')
                current_price = float(current_price[0])
            current_price = float(current_price)

        # Get product old price
        old_price = 0
        if len(product.find_elements_by_css_selector('div.product-details-container .all-price-sections .pricing-spacer .price-was')) > 0:
            old_price = product.find_elements_by_css_selector('div.product-details-container .all-price-sections .pricing-spacer .price-was')[0].text
            old_price = old_price.replace('$', '')
            old_price = old_price.replace("Was ", "")
            if len(old_price) > 0:
                old_price = float(old_price)
            else: 
                old_price = 0

        discount_number = 0
        discount_percent = 0

        if current_price != 0 and old_price != 0 and old_price > current_price:
            discount_number = round(old_price - current_price)
            discount_percent = round(100 - (current_price/old_price) * 100)

        if current_price !=0 and len(product_url) > 0 and len(product_title) > 0:
            product_info = {
                'title': product_title,
                'url': product_url,
                'current_p': current_price,
                'old_p': old_price,
                'discount_n': discount_number,
                'discount_p': discount_percent
            }
            products_info.append(product_info)
        
    return products_info


In [6]:
driver = init_driver(gecko_driver, user_agent = user_agent)

In [7]:
# https://www.walmart.ca/en/top-deals-in-electronics/N-33983
categories = ['top-deals-in-electronics/N-33983']

In [9]:
for category in categories:
    category_url = f"{walmart_base_url}/{category}"
    
    for page in range(1,4):
        page_url = f"{category_url}/page-{str(page)}"
        get_url(page_url, driver)
        
        products = get_products(driver)

In [47]:
get_url('https://www.walmart.ca/en/top-deals-in-electronics/N-33983/page-2', driver)

In [15]:
driver.quit()