In [28]:
# Python Web Scraper

# Utilizes Selenium to scrape image links from a website by searching for items using SKU# from an Excel
# Extracts the image links for each item and organizes them into separate rows, then saved in a new spreadsheet
# It simplifies the collection and organization of image links, saving time and improving efficiency
#---------------------------------------------------------------------------------------------------------------


# import libraries --- using selenium and pandas
import os
import pandas as pd
from selenium.common.exceptions import NoSuchElementException
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time

# set the path to the chromedriver executable
chromedriver_path = "/usr/local/bin/chromedriver"

# set the URL of the page to scrape
url = "https://www.xxxxxxxxxxxxx.com/"

# create a webdriver service and start the browser
service = Service(executable_path=chromedriver_path)
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
driver = webdriver.Chrome(service=service, options=options)
driver.get(url)

# read the item numbers from the "list" excel file in the Downloads folder
list_path = os.path.expanduser("~/Downloads/list.xlsx")
try:
    df_list = pd.read_excel(list_path)
except Exception as e:
    print(f"Failed to read list.xlsx: {e}")
    driver.quit()
    exit()

# create an empty list to store the image links
image_links_list = []

for i, item_number in enumerate(df_list["Item Number"]):
    try:
        # find the search input field and enter the item number
        search_input = driver.find_element(By.ID, "J_search")
        search_input.clear()
        search_input.send_keys(item_number)
        search_input.submit()

        # wait for the search results to load
        time.sleep(2)

        # find the first search result and click on it
        product_link = driver.find_element(By.CSS_SELECTOR, ".product-item__img.square.banner__area.img--zoom-in")
        product_link.click()

        # wait for the product page to load
        time.sleep(3)

        # find the image elements and extract their src attribute values
        image_elements = driver.find_elements(By.CSS_SELECTOR, "#UpperBox img")
        image_links = [element.get_attribute("src") for element in image_elements]
        
        # append the image links to the image_links_list
        image_links_list.append(image_links)
      
        # go back to the home page
        driver.back()

    except NoSuchElementException:
        # if the search input field is not found, append an empty list to image_links_list
        image_links_list.append([])
        print(f"Item number {item_number} not found. Appending empty list...")

    except Exception as e:
        print(f"Error processing item number {item_number}: {e}")

# create a pandas dataframe with the item numbers and image links
df_result = pd.DataFrame({"Item Number": df_list["Item Number"], "Image Links": image_links_list})

# save the dataframe to an Excel file in the Downloads folder
result_path = os.path.expanduser("~/Downloads/result.xlsx")
try:
    df_result.to_excel(result_path, index=False)
except Exception as e:
    print(f"Failed to save result.xlsx: {e}")
    driver.quit()
    exit()

# close the browser
driver.quit()
