# Coast Music Webscrape v2.0
### Updated January 2024 for new B2B site

Script for webscraping product information from Coast B2B website and saving as CSV format suitable for importing into Shopify.

In [None]:
# declare constants

WAIT_TIME = 5 # seconds
ALLOW, BLOCK = 1, 2 # for chrome experimental options
HEADLESS = False

In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.service import Service

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument

import pandas as pd

In [None]:
input_file = ''
output_file = 'coast_test_products.csv'

url = 'https://coastmusiconline.com/account/login'

username = ''
password = ''

In [None]:
# create empty dataframe
df = pd.DataFrame(columns=['Variant SKU', 'Variant Price'])

if input_file:
    product_df = pd.read_csv(input_file)
    product_list = product_df['Variant SKU'].tolist()
else:
    product_list = [ # use product_list array if input_File is not specified
        '11075',
        'CR505',
    ]

In [None]:
print(f'Scraping data for {len(product_list)} products:')
for product in product_list:
    print(f'\t{product}')

In [None]:
service = Service()
driver = webdriver.Chrome(service=service, options=chrome_options)

print(f'Opening {url} in Chrome browser...')
driver.get(url)
products_not_found = 0
products_not_found_list = []

In [None]:
email_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[id="login-email_address"')))
email_input.clear()
email_input.send_keys(username)

In [None]:
password_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[id="login-password"]')))
password_input.clear()
password_input.send_keys(password)

In [None]:
login_button = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.center-buttons .btn-2'))).click()

In [None]:
for product_model in product_list:

    # enter product model in search box
    search_input = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[class="search-input"]')))
    search_input.clear()
    search_input.send_keys(product_model)
    search_input.send_keys(Keys.RETURN)

    # click product link
    try:
        # Find the product container by data-jam-itemid
        product_container = WebDriverWait(driver, WAIT_TIME).until(
            EC.visibility_of_element_located(
                (By.XPATH, f"//div[contains(@class, 'item') and @data-jam-itemid='{product_model}']")
            )
        )

        # Within that container, find the "View" button
        view_link = product_container.find_element(By.XPATH, ".//a[@class='btn view-button']")
        view_link.click()
    except:
        print(f'Product {product_model} not found')
        products_not_found += 1
        products_not_found_list.append(product_model)
    
    try:
        product_vendor = WebDriverWait(driver, WAIT_TIME).until(
            EC.visibility_of_element_located(
                (By.XPATH, "//div[@class='box w-product-brand-name' and @data-name='product\\BrandName']/a")
            )
        ).text.strip().title()

    except:
        product_vendor = 'Not found'
        print(f'Product vendor for {product_model} not found')

    try:
        product_title = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "h1"))).text.strip()
    except:
        product_title = 'Not found'
        print(f'Product title for {product_model} not found')

    try:
        product_barcode_element = WebDriverWait(driver, WAIT_TIME).until(
            EC.visibility_of_element_located((By.XPATH, "//div[@class='upc']/span[@itemprop='gtin12']"))
        )

        # Extract the UPC code from the span's text content
        product_barcode = product_barcode_element.text
    except:
        product_barcode = 'Not found'
        print(f'Product barcode for {product_model} not found')

    try:
        # Wait until the price element is present/visible
        price_element = WebDriverWait(driver, WAIT_TIME).until(
            EC.visibility_of_element_located(
                (By.CSS_SELECTOR, "div.price span[itemprop='price']")
            )
        )

        dealer_price = price_element.text

    except:
            dealer_price = 0
            print('Dealer price not found')

    try:
        # Wait for the list-price element to be visible
        list_price_element = WebDriverWait(driver, WAIT_TIME).until(
            EC.visibility_of_element_located(
                (By.XPATH, "//div[@class='product-price-list']//span[@itemprop='price']")
            )
        )

        list_price = list_price_element.text 

    except:
            list_price = ''
            print('List price not found')

    try:
        # Wait for the map-price element to be visible
        map_price_element = WebDriverWait(driver, WAIT_TIME).until(
            EC.visibility_of_element_located(
                (By.XPATH, "//div[@class='product-price-map']//span[@itemprop='price']")
            )
        )

        map_price = map_price_element.text

    except:
        map_price = None
        print('MAP price not found')
    
    try:
        # Wait until the description element is visible
        description_element = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located(
                (By.XPATH, "//div[@class='description-short' and @itemprop='disambiguatingDescription']")
            )
        )

        # Extract the text from the div
        product_description = description_element.text
    except:
        product_description = ''
        print('Product description not found')

    try:
        # 1. Wait for the bullet-point container to be in the DOM
        bullet_container = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR, "div.box.w-product-bullet-points div.description-short")
            )
        )

        # 2. Click "More" button if it's displayed
        try:
            more_button = driver.find_element(By.CSS_SELECTOR, "div.more_less span.more")
            if more_button.is_displayed():
                more_button.click()
        except:
            # If there's no "More" button or it isn't displayed, pass
            pass

        # 3. Now collect all <p> elements
        p_elements = bullet_container.find_elements(By.TAG_NAME, "p")

        # 4. Extract the text from each paragraph
        bullet_points = [p.text.strip() for p in p_elements if p.text.strip()]

        print("Bullet Points:")
        for bp in bullet_points:
            print("-", bp)
        if len(bullet_points) > 0:
            product_description += "\n\n" + "\n".join(bullet_points)

    except:
        bullet_points = ''
        print('no bullet points found')

    try:
        img_url = WebDriverWait(driver, WAIT_TIME).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "img[class='main-image']"))).get_attribute('src')
    except:
        img_url = 'Not found'
        print('Image url not found')
    
    try:
        # Wait for the stock element to appear in the DOM
        stock_element = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located(
                (By.CSS_SELECTOR, "div.box.w-product-stock .js-stock span")
            )
        )

        stock_text = stock_element.text.strip().lower()
        in_stock = "in stock" in stock_text
    except:
        pass

    print(f'vendor: {product_vendor}')
    print(f'title: {product_title}')
    print(f'model: {product_model}')
    print(f'barcode: {product_barcode}')
    print(f'dealer price: {dealer_price}')
    if map_price is not None:
        print(f'map price: {map_price}')
    else:
        print(f'list price: {list_price}')
    print(f'description: {product_description}')
    print(f'img url: {img_url}')
    print(f'in stock: ', in_stock)
    print('------\n')

    # add row to dataframe
    row = {
        'Variant SKU': product_model, 
        'Vendor': product_vendor, 
        'Title': f'{product_vendor} {product_title}', 
        'Body (HTML)': product_description, 
        'Variant Bacode': product_barcode,
        'Product Image': img_url, 
        'Variant Price': map_price if map_price is not None else list_price, 
        'Variant Inventory Tracker': 'shopify', 
        'Variant Inventory Policy': 'continue',
        'Cost per item': dealer_price,
        'In stock': in_stock,
        }
    df_new_row = pd.DataFrame([row])
    df = pd.concat([df, df_new_row], axis=0, ignore_index=True)

In [None]:
# save dataframe to CSV file
df.to_csv(output_file, index=False)
print('Results saved to CSV file\n')

print('Results:')
print(df)

print(f'Number of products found: {len(df)}\n')
print(f'Number of products not found: {products_not_found}\n')
if products_not_found > 0:
    print('Products not found:')
    for product in products_not_found_list:
        print(f'\t{product}')

# close browser
driver.close()
driver.quit()