In [27]:
import selenium
from selenium.webdriver import Chrome
from webdriver_manager.chrome import ChromeDriverManager
#import requests
from selenium import webdriver
# from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.common import service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.chrome.options import Options
#from time import sleep, time
import time
from bs4 import BeautifulSoup as bs
#import pandas as pd
from pathlib import Path
import os
import csv
import uuid

'''
This module contains the scraper class and its methods.
'''

class Scraper:
    def __init__(self, url, headless=False):
        options = Options()
        if headless:
            options.add_argument('--headless')
            self.driver = Chrome(ChromeDriverManager().install(), options=options)
        else:
            self.driver = Chrome(ChromeDriverManager().install())
        self.url = url
        self.driver.get(self.url)
   
    def open_url(self, url):
        self.driver.get(url)
    
    def search(self, name=str, search_term=str):
        search_bar = self.driver.find_element(By.NAME, name)
        search_bar.click()
        search_bar.send_keys(search_term)
        search_bar.send_keys(u'\ue007')

    def click_button(self, XPATH):
        button = self.driver.find_element(By.XPATH, XPATH)
        button.click()

    def scroll_up_top(self):
        self.driver.execute_script("window.scrollTo(0,document.body.scrollTop)")

    def scroll_down_bottom(self):
        self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")

    def accept_cookies(self, frame_id, XPATH):
        #time.sleep(2)
        try:
            if frame_id!=None:
                self.switch_frame(frame_id)
            else: pass
            self.wait_for(XPATH)
            self.click_button(XPATH)
        except NoSuchElementException:
            pass

    def wait_for(self, XPATH, delay = 10):
        try:    
            WebDriverWait(self.driver, delay).until(EC.presence_of_element_located((By.XPATH, XPATH)))
        except TimeoutException:
            print('Loading took too long. Timeout occurred.')

    def switch_frame(self, frame_id):
        self.wait_for(frame_id)
        self.driver.switchTo().frame(frame_id)

    def quit(self):
        self.driver.quit()

    def next_page(self, url):
        self.open_url(url)

    def see_more(self, XPATH):
        self.scroll_down_bottom()
        self.click_button(XPATH)
        
    def explore_product_ideas(self, XPATH1, XPATH2):
        self.click_button(XPATH1)
        self.click_button(XPATH2)
    
    def infinite_scroll(self):
        last_height = self.driver.execute_script("return document.body.scrollHeight")
        while True:
            self.scroll_down_bottom()
            time.sleep(3)   
            new_height = self.driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

    def get_list_links(self, XPATH_container, XPATH_search_results, delay=10):
        try: 
            self.scroll_down_bottom()
            self.see_more('//*[@id="search-more"]/a')
            self.infinite_scroll()
            container = self.driver.find_element(By.XPATH, XPATH_container)
            search_list = container.find_elements(By.XPATH, XPATH_search_results)

            self.link_list = []

            for result in search_list:
                a_tag = result.find_element(By.TAG_NAME, 'a')
                link = a_tag.get_attribute('href')
                self.link_list.append(link)
            
            print(self.link_list)
            print(len(self.link_list))

        except NoSuchElementException:
            print('No results found.')
            pass

    def create_id(self):
        self.link_id = []
        self.link_uuid = []
        for i in range(len(self.link_list)):
            ID = self.link_list[i][-12:]
            UUID = uuid.uuid4()
            self.link_id.append(ID)
            self.link_uuid.append(UUID)
    
    def collate_info(self):
        self.info = {"id": self.link_id,
                "uuid": self.link_uuid,
                "URL": self.link_list}
        print(self.info)
        return self.info




if __name__ == '__main__': 

    def web_scraper():
        scraper = Scraper('https://ideas.lego.com')
        try:
            scraper.accept_cookies(frame_id=None, XPATH= '//button[@aria-label="Reject cookies"]')
            #scraper.explore_product_ideas('//a[@class="sub-menu"][1]', '//div[@class="header-link"][1]')
            scraper.search(name='query', search_term='piano')
            scraper.get_list_links('//*[@id="search_results"]', './div')
            time.sleep(2)
            scraper.create_id()
            scraper.collate_info()
            #scraper.create_uuid()
            # scraper.scroll_down_bottom()
            # time.sleep(2)
            # scraper.see_more('//*[@id="search-more"]/a')
            # #scraper.scroll_up_top()
            # time.sleep(4)
        finally: scraper.quit()


    web_scraper()





Current google-chrome version is 100.0.4896
Get LATEST chromedriver version for 100.0.4896 google-chrome
Driver [/Users/ESheldon/.wdm/drivers/chromedriver/mac64/100.0.4896.60/chromedriver] found in cache
  self.driver = Chrome(ChromeDriverManager().install())


['https://ideas.lego.com/projects/24f16de1-496f-4b1c-a94b-0ffa31b26b7d', 'https://ideas.lego.com/projects/cd090350-67ba-4035-b15d-b42bd19d1f8e', 'https://ideas.lego.com/projects/41dbbf6b-b4da-4411-8ebe-5a82833d6f6d', 'https://ideas.lego.com/projects/edfb549b-09dc-4553-b6e2-432557e8cd46', 'https://ideas.lego.com/projects/eacb8034-7bda-425c-b822-df8fe35718a9', 'https://ideas.lego.com/projects/df545ace-2d23-4d70-a9c9-e22e15fa681e', 'https://ideas.lego.com/projects/bc1bd414-b55e-45d9-a3df-88ad24af9997', 'https://ideas.lego.com/projects/fd979f9c-e246-42f6-8f41-a60933654ddd', 'https://ideas.lego.com/projects/c55cb25c-e9b0-4630-96c9-f9a9ba3d1620', 'https://ideas.lego.com/projects/36dd7532-2174-46e5-8a72-7ea26a30b937', 'https://ideas.lego.com/projects/2c815d02-e32d-4b4d-8c34-a5970e587b86', 'https://ideas.lego.com/projects/5c2712c4-11b0-4e0c-9182-f43cd702eee2', 'https://ideas.lego.com/projects/9193e8c1-0546-4e73-99a2-2f89c5e2ddd3', 'https://ideas.lego.com/projects/40a5de26-68e6-4767-b521-330768