In [12]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-infobars")
# options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_experimental_option("prefs", { \
    "profile.default_content_setting_values.media_stream_mic": 2,     # 1:allow, 2:block 
    "profile.default_content_setting_values.media_stream_camera": 2,  # 1:allow, 2:block 
    "profile.default_content_setting_values.geolocation": 2,          # 1:allow, 2:block 
    "profile.default_content_setting_values.notifications": 2         # 1:allow, 2:block 
  })

import pandas as pd

In [13]:
# declare global variables

WAIT_TIME = 10 # seconds

# default input & output file names
input_file = 'rhythm_tech_products.csv'
output_file = 'rhythm_tech_output.csv'
url = 'https://coastmusiconline.com'

username = ''
password = ''

In [14]:
# create empty dataframe
df = pd.DataFrame(columns=['Product Name', 'Product Vendor', 'Product Model', 'Product List Price', 'Product MAP Price', 'Product Cost', 'Product Description', 'Product Image URL'])

product_df = pd.read_csv(input_file)
product_list = product_df['Variant SKU'].tolist()

In [15]:
print(f'Scraping data for {len(product_list)} products:')
for product in product_list:
    print(f'\t{product}')

Scraping data for 2 products:
	MC2
	TCFML


In [16]:
driver = webdriver.Chrome('./chromedriver.exe', chrome_options=options)
print(f'Opening {url} in Chrome browser...')
driver.get(url)
products_not_found = 0
products_not_found_list = []

  driver = webdriver.Chrome('./chromedriver.exe', chrome_options=options)
  driver = webdriver.Chrome('./chromedriver.exe', chrome_options=options)


Opening https://coastmusiconline.com in Chrome browser...


In [17]:
#click login link
login_link = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'a[id="loginLink"]'))).click()


In [18]:
# enter username
username_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[id="Username"]')))
username_input.clear()
username_input.send_keys(username)


In [19]:
# enter password
password_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[id="Password"]')))
password_input.clear()
password_input.send_keys(password)
    

In [20]:
# click login button
login_button = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[value="Login"]'))).click()

In [21]:
for product_model in product_list:
    # enter product model in search box
    search_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[id="searchBox"]')))
    search_input.clear()
    search_input.send_keys(product_model)
    search_input.send_keys(Keys.RETURN)

    # click product link
    try:
        product_link = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.XPATH, f"//a[contains(@href, '{product_model}')]"))).click()
    except:
            print(f'Product {product_model} not found')
            products_not_found += 1
            products_not_found_list.append(product_model)
            continue
    
    try:
        product_vendor = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='catalogTileBrand']"))).text.strip()
    except:
        product_vendor = 'Not found'
        print('Product vendor not found')
    
    try:
        product_title = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[id='itemTitle']"))).text.strip()
    except:
        product_title = ''
        print('Product title not found')

    # product_model = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='catalogTileID ']"))).text.split(':')[1].strip()

    try:
        dealer_price = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[id='dealerPrice']"))).text.split(':')[1].strip().replace('$','')
    except:
        dealer_price = 0
        print('Dealer price not found')

    try:
        list_price = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='productDetailListPrice']"))).text.split(':')[1].strip().replace('$','')
        map_price = ''
    except:
        list_price = ''
        map_price = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='productDetailMapPrice']"))).text.split(':')[1].strip().replace('$','')

    try:
        product_description = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='prodcutDescriptionLong']"))).text
    except:
        product_description = ''
        print('Product description not found')

    try:
        product_bullets = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "ul[id='itemBullets']"))).text
    except:
        product_bullets = ''
    
    try:
        img_url = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "img[id='mainImage']"))).get_attribute('src')
    except:
        img_url = 'Not found'
        print('Image url not found')

    print(f'vendor: {product_vendor}')
    print(f'title: {product_title}')
    print(f'model: {product_model}')
    print(f'dealer price: {dealer_price}')
    if len(map_price) > 0:
        print(f'map price: {map_price}')
    else:
        print(f'list price: {list_price}')
    print(f'description: {product_description + product_bullets}')
    print(f'img url: {img_url}')
    print('------\n')

    # add row to dataframe
    row = {'Product Name': product_title, 'Product Vendor': product_vendor, 'Product Model': product_model, 'Product List Price': list_price, 'Product MAP Price': map_price, 'Product Cost': dealer_price, 'Product Description': product_description + product_bullets, 'Product Image URL': img_url}
    df_new_row = pd.DataFrame([row])
    df = pd.concat([df, df_new_row], axis=0, ignore_index=True)



vendor: Boblen
title: Archtop A-style Mandolin Case Pear Shaped With Plush Lining
model: MC2
dealer price: 88.56
list price: 164.00
description: Made in Canada, Sturdy and Solid. Boblen MC221 is an archtop A-style mandolin case, with plush lining. Stylish with Plush Lining to protect your instrument.Fits Archtop A-Style Mandolins
Hardshell Case
Plush Lining
Multiple Closure Points
Sturdy
MADE IN CANADA
Dimensions: 27.5" (length), 10.5" (width), 2.5" (depth).
img url: https://s3.amazonaws.com/b2b-coastmusic/items/Boblen/MC221/MC221~lg.jpg
------

vendor: Rhythm Tech
title: Fiberglass Maracas - Large - Black
model: TCFML
dealer price: 10.70
list price: 26.75
description: Our Fiber Maracas come in two sizes and they each have a bright, present sound and lively attack.
img url: https://s3.amazonaws.com/b2b-coastmusic/items/RhythmTech/TCFML/TCFML~lg.jpg
------



In [22]:
# save dataframe to CSV file
df.to_csv(output_file, index=False)
print('Results saved to CSV file\n')

print('Results:')
print(df)

print(f'Number of products found: {len(df)}\n')
print(f'Number of products not found: {products_not_found}\n')
if products_not_found > 0:
    print('Products not found:')
    for product in products_not_found_list:
        print(f'\t{product}')

# close browser
driver.close()
driver.quit()

Results saved to CSV file

Results:
                                        Product Name Product Vendor  \
0  Archtop A-style Mandolin Case Pear Shaped With...         Boblen   
1                 Fiberglass Maracas - Large - Black    Rhythm Tech   

  Product Model Product List Price Product MAP Price Product Cost  \
0           MC2             164.00                          88.56   
1         TCFML              26.75                          10.70   

                                 Product Description  \
0  Made in Canada, Sturdy and Solid. Boblen MC221...   
1  Our Fiber Maracas come in two sizes and they e...   

                                   Product Image URL  
0  https://s3.amazonaws.com/b2b-coastmusic/items/...  
1  https://s3.amazonaws.com/b2b-coastmusic/items/...  
Number of products found: 2

Number of products not found: 0

