In [None]:
# declare global variables

WAIT_TIME = 10 # seconds
ALLOW, BLOCK = 1, 2 # for chrome experimental options
HEADLESS = False

In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-infobars")
if HEADLESS:
  chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_experimental_option("prefs", { \
    "profile.default_content_setting_values.media_stream_mic": 2,     # 1:allow, 2:block 
    "profile.default_content_setting_values.media_stream_camera": 2,  # 1:allow, 2:block 
    "profile.default_content_setting_values.geolocation": 2,          # 1:allow, 2:block 
    "profile.default_content_setting_values.notifications": 2         # 1:allow, 2:block 
  })

import pandas as pd

In [None]:
# default input & output file names
input_file = ''
output_file = 'black_lion_output.csv'
url = 'https://eriksonaudioonline.com'

username = 'instrumusic@hotmail.com'
password = 'Talia4400'

In [None]:
# create empty dataframe
df = pd.DataFrame(columns=['Variant SKU', 'Variant Price'])

if input_file:
    product_df = pd.read_csv(input_file)
    product_list = product_df['Variant SKU'].tolist()
else:
    product_list = [
        'PBR-TRS-3',
                ]

In [None]:
print(f'Scraping data for {len(product_list)} products:')
for product in product_list:
    print(f'\t{product}')

In [None]:
service = Service()
driver = webdriver.Chrome(service=service, options=chrome_options)

print(f'Opening {url} in Chrome browser...')
driver.get(url)
products_not_found = 0
products_not_found_list = []

In [None]:
# click login link
login_link = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'a[id="loginLink"]'))).click()

In [None]:
# enter username
username_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[id="Username"]')))
username_input.clear()
username_input.send_keys(username)

In [None]:
# enter password
password_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[id="Password"]')))
password_input.clear()
password_input.send_keys(password)
    

In [None]:
# click login button
login_button = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[value="Login"]'))).click()

In [None]:
for product_model in product_list:
    # enter product model in search box
    search_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[id="searchBox"]')))
    search_input.clear()
    search_input.send_keys(product_model)
    search_input.send_keys(Keys.RETURN)

    # click product link
    try:
        product_link = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.XPATH, f"//a[contains(@href, '{product_model}')]"))).click()
    except:
            print(f'Product {product_model} not found')
            products_not_found += 1
            products_not_found_list.append(product_model)
            continue
    
    try:
        product_vendor = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='catalogTileBrand']"))).text.strip()
    except:
        product_vendor = 'Not found'
        print('Product vendor not found')
    
    try:
        product_title = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[id='itemTitle']"))).text.strip()
    except:
        product_title = ''
        print('Product title not found')

    try:
        dealer_price = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[id='dealerPrice']"))).text.strip().replace('$','')
    except:
        dealer_price = 0
        print('Dealer price not found')

    try:
        list_price = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='productDetailListPrice']"))).text.split(':')[1].strip().replace('$','')
        map_price = ''
    except:
        list_price = ''
        map_price = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='productDetailMapPrice']"))).text.split(':')[1].strip().replace('$','')

    try:
        product_description = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='prodcutDescriptionLong']"))).text
    except:
        product_description = ''
        print('Product description not found')

    try:
        product_bullets = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "ul[id='itemBullets']"))).text
    except:
        product_bullets = ''
    
    try:
        img_url = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "img[id='mainImage']"))).get_attribute('src')
    except:
        img_url = 'Not found'
        print('Image url not found')

    print(f'vendor: {product_vendor}')
    print(f'title: {product_title}')
    print(f'model: {product_model}')
    print(f'dealer price: {dealer_price}')
    if len(map_price) > 0:
        print(f'map price: {map_price}')
    else:
        print(f'list price: {list_price}')
    print(f'description: {product_description + product_bullets}')
    print(f'img url: {img_url}')
    print('------\n')

    # add row to dataframe
    row = {
        'Variant SKU': product_model, 
        'Vendor': product_vendor, 
        'Title': f'{product_vendor} {product_title}', 
        'Body (HTML)': product_description, 
        'Product Image': img_url, 
        'Variant Price': map_price, 
        'Variant Inventory Tracker': 'shopify', 
        'Variant Inventory Policy': 'continue',
        'Cost per item': dealer_price,
        }
    df_new_row = pd.DataFrame([row])
    df = pd.concat([df, df_new_row], axis=0, ignore_index=True)



In [None]:
# save dataframe to CSV file
df.to_csv(output_file, index=False)
print('Results saved to CSV file\n')

print('Results:')
print(df)

print(f'Number of products found: {len(df)}\n')
print(f'Number of products not found: {products_not_found}\n')
if products_not_found > 0:
    print('Products not found:')
    for product in products_not_found_list:
        print(f'\t{product}')

# close browser
driver.close()
driver.quit()