In [90]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait

import pandas as pd

In [91]:
# declare global variables

WAIT_TIME = 3 # seconds

# default input & output file names
input_file = 'coast_products.csv'
output_file = 'coast_webscrape_output.csv'
url = 'https://coastmusiconline.com'

username = ''
password = ''

In [92]:
# create empty dataframe
df = pd.DataFrame(columns=['Product Name', 'Product Vendor', 'Product Model', 'Product List Price', 'Product MAP Price', 'Product Cost', 'Product Description', 'Product Image URL'])

product_df = pd.read_csv(input_file)
product_list = product_df['SKU'].tolist()

In [93]:
print(f'Scraping data for {len(product_list)} products:')
for product in product_list:
    print(f'\t{product}')

Scraping data for 6 products:
	TR3356
	W05TX
	G05TX
	EJ16
	TR11B
	PRB-T13


In [94]:
driver = webdriver.Chrome('./chromedriver')
print(f'Opening {url} in Chrome browser...')
driver.get(url)
products_not_found = 0
products_not_found_list = []

Opening https://coastmusiconline.com in Chrome browser...


In [95]:
#click login link
login_link = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'a[id="loginLink"]'))).click()


In [96]:
# enter username
username_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[id="Username"]')))
username_input.clear()
username_input.send_keys(username)


In [97]:
# enter password
password_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[id="Password"]')))
password_input.clear()
password_input.send_keys(password)
    

In [98]:
# click login button
login_button = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[value="Login"]'))).click()

In [99]:
for product_model in product_list:
    # enter product model in search box
    search_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[id="searchBox"]')))
    search_input.clear()
    search_input.send_keys(product_model)
    search_input.send_keys(Keys.RETURN)

    # click product link
    try:
        product_link = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.XPATH, f"//a[contains(@href, '{product_model}')]"))).click()
    except:
            print(f'Product {product_model} not found')
            products_not_found += 1
            products_not_found_list.append(product_model)
            continue

    product_vendor = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='catalogTileBrand']"))).text.strip()
    product_title = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[id='itemTitle']"))).text.strip()
    product_model = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='catalogTileID ']"))).text.split(':')[1].strip()
    dealer_price = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[id='dealerPrice']"))).text.split(':')[1].strip().replace('$','')
    try:
        list_price = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='productDetailListPrice']"))).text.split(':')[1].strip().replace('$','')
        map_price = ''
    except:
        list_price = ''
        map_price = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='productDetailMapPrice']"))).text.split(':')[1].strip().replace('$','')


    product_description = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div[class='prodcutDescriptionLong']"))).text
    try:
        product_bullets = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "ul[id='itemBullets']"))).text
    except:
        product_bullets = ''
    img_url = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "img[id='mainImage']"))).get_attribute('src')
    print(f'vendor: {product_vendor}')
    print(f'title: {product_title}')
    print(f'model: {product_model}')
    print(f'dealer price: {dealer_price}')
    if len(map_price) > 0:
        print(f'map price: {map_price}')
    else:
        print(f'list price: {list_price}')
    print(f'description: {product_description + product_bullets}')
    print(f'img url: {img_url}')
    print('------\n')

    # add row to dataframe
    row = {'Product Name': product_title, 'Product Vendor': product_vendor, 'Product Model': product_model, 'Product List Price': list_price, 'Product MAP Price': map_price, 'Product Cost': dealer_price, 'Product Description': product_description + product_bullets, 'Product Image URL': img_url}
    df_new_row = pd.DataFrame([row])
    df = pd.concat([df, df_new_row], axis=0, ignore_index=True)



vendor: Grover
title: 14" Snappy Snare, 16 Strand
model: TR3356
dealer price: 7.25
list price: 14.50
description: Trophy Snappy Snares™ are made of special quality coiled steel wire. Trenched butt plates provide for better response, chrome plated, individually packaged.
img url: https://s3.amazonaws.com/b2b-coastmusic/items/Grover/TR3356/TR3356~lg.jpg
------

vendor: Profile
title: Dreadnought Guitar Bag for Beginners
model: W05TX
dealer price: 20.50
map price: 35.99
description: Profile® 05 Series bags offer great value in a classic black design.5mm foam padding• Deluxe Profile® zippers
Satin nylon lining
Adjustable shoulder straps and padded leatherette carrying handle
Dimesional zippered accessory pouch at the headstock with external safety reflector strip
Zippered storage front pouches for sheet music or books
112.98 x 43.99 x 14.99 cm; 997.9 Grams
img url: https://s3.amazonaws.com/b2b-coastmusic/items/Profile/W05TX/W05TX~lg.jpg
------

vendor: Profile
title: Soft Electric Guitar C

In [100]:
# save dataframe to CSV file
df.to_csv(output_file, index=False)
print('Results saved to CSV file\n')

print('Results:')
print(df)

print(f'Number of products found: {len(df)}\n')
print(f'Number of products not found: {products_not_found}\n')
if products_not_found > 0:
    print('Products not found:')
    for product in products_not_found_list:
        print(f'\t{product}')

# close browser
driver.close()
driver.quit()

Results saved to CSV file

Results:
                              Product Name Product Vendor Product Model  \
0              14" Snappy Snare, 16 Strand         Grover        TR3356   
1     Dreadnought Guitar Bag for Beginners        Profile         W05TX   
2  Soft Electric Guitar Case for Beginners        Profile         G05TX   
3            Trophy Maestro Baton Pear 12"         Grover         TR11B   
4                          13” Tom Tom Bag        Profile       PRB-T13   

  Product List Price Product MAP Price Product Cost  \
0              14.50                           7.25   
1                                35.99        20.50   
2                                35.99        20.50   
3              19.50                           9.75   
4              85.00                          42.50   

                                 Product Description  \
0  Trophy Snappy Snares™ are made of special qual...   
1  Profile® 05 Series bags offer great value in a...   
2  Quality ele