# Godin Guitars Webscraping Script

In [None]:
# declare constants

WAIT_TIME = 5 # seconds
ALLOW, BLOCK = 1, 2 # for chrome experimental options
HEADLESS = False

In [None]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.service import Service
from selenium.webdriver import ActionChains

chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-infobars")
if HEADLESS: 
  chrome_options.add_argument("--headless")
  
chrome_options.add_argument("--disable-gpu")
chrome_options.add_experimental_option("prefs", { 
    "profile.default_content_setting_values.media_stream_mic": BLOCK,     
    "profile.default_content_setting_values.media_stream_camera": BLOCK,  
    "profile.default_content_setting_values.geolocation": BLOCK,          
    "profile.default_content_setting_values.notifications": BLOCK         
  })

import pandas as pd
from time import sleep

In [None]:
# default input & output file names
input_file = ''
output_file = 'godin_guitars1.csv'
url = 'https://godinguitars.com'


In [None]:
# create empty dataframe
df = pd.DataFrame(columns=['Variant SKU', 'Variant Price' ])

if input_file:
    product_df = pd.read_csv(input_file)
    product_list = product_df['Product Model'].tolist()
else:
    product_list = [ # if input_file is not specified, products can be added to product_list array to be scraped
        '051793',
        '049691',
        '050987',
        '051649',
        '052561',
        '050147',
    ]

In [None]:
print(f'Scraping data for {len(product_list)} products:')
for product in product_list:
    print(f'\t{product}')

In [None]:
service = Service()
driver = webdriver.Chrome(service=service, options=chrome_options)
print(f'Opening {url} in Chrome browser...')
driver.get(url)
products_not_found = 0
products_not_found_list = []

In [None]:

# Wait until the link is clickable using the data-dt-toggle attribute
search_link = WebDriverWait(driver, WAIT_TIME).until(
    EC.element_to_be_clickable((By.CSS_SELECTOR, "a[data-dt-toggle='search-form-open']"))
)

# Click the link
search_link.click()

In [None]:

# Example: search for the first product in your list
search_term = product_list[0]

# Wait for the search input to be visible
search_input = WebDriverWait(driver, WAIT_TIME).until(
    EC.visibility_of_element_located((By.ID, "woocommerce-product-search-field-0"))
)
search_input.clear()  # Clear if needed
search_input.send_keys(search_term)

search_input.send_keys(Keys.ENTER)

In [None]:

for product_sku in product_list:
    # Wait until the link is clickable using the data-dt-toggle attribute
    search_link = WebDriverWait(driver, WAIT_TIME).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "a[data-dt-toggle='search-form-open']"))
    )

    # Click the link
    search_link.click()

    # Example: search for the first product in your list
    search_term = product_sku

    # Wait for the search input to be visible
    search_input = WebDriverWait(driver, WAIT_TIME).until(
        EC.visibility_of_element_located((By.ID, "woocommerce-product-search-field-0"))
    )
    search_input.clear()  # Clear if needed
    search_input.send_keys(search_term)

    search_input.send_keys(Keys.ENTER)

    try:
        # 2. Wait for the main product container to ensure the page has loaded
        product_container = WebDriverWait(driver, WAIT_TIME).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.product.type-product"))
        )

        # 3. Wait for the SKU element, then extract its text
        found_sku_elem = WebDriverWait(driver, WAIT_TIME).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "span.sku"))
        )
        found_sku = found_sku_elem.text.strip()

        # 4. Compare with our desired SKU
        if found_sku != product_sku:
            print(f"SKU mismatch! Expected {product_sku} but found {found_sku}. Skipping.")
            continue  # or handle as needed

        # 5. If correct product, proceed to extract the rest

        # Title
        product_title_elem = WebDriverWait(driver, WAIT_TIME).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "h2.product_title.title.entry-title"))
        )
        product_title = product_title_elem.text.strip()

        # UPC
        upc_elem = WebDriverWait(driver, WAIT_TIME).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "span.upc"))
        )
        product_upc = upc_elem.text.strip()

        # Description - as an example, grab all <p> in .product-single-details except .stock
        # (Adjust as needed for your actual desired text).
        desc_paragraphs = WebDriverWait(driver, WAIT_TIME).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".product-single-details p:not(.stock)"))
        )
        description_text = "\n".join(el.text.strip() for el in desc_paragraphs if el.text.strip())

        # Main Image URL
        image_elem = WebDriverWait(driver, WAIT_TIME).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, ".woocommerce-product-gallery__image img"))
        )
        product_image_url = image_elem.get_attribute("src")

        # (Optional) Price extraction
        # If you want the "current" price, often it's inside 'ins .woocommerce-Price-amount'
        # or you might parse from a data attribute:
        try:
            price_elem = WebDriverWait(driver, WAIT_TIME).until(
                EC.visibility_of_element_located((By.CSS_SELECTOR, "ins .woocommerce-Price-amount bdi"))
            )
            product_price = price_elem.text.strip()  # e.g. "$899.00 CAD"
        except:
            product_price = ""  # handle if no sale price or not found

        row_data = {
            'Variant SKU': found_sku,
            'Vendor': 'Godin Guitars',
            'Title': f"Godin {product_title}",
            'Body (HTML)': description_text,
            'Product Image': product_image_url,
            'Variant Price': product_price,
            'Variant Barcode': product_upc,
            'Variant Inventory Tracker': 'shopify',
            'Variant Inventory Policy': 'continue'
        }
        df_new_row = pd.DataFrame([row_data])
        df = pd.concat([df, df_new_row], axis=0, ignore_index=True)
        print(f"Successfully scraped {found_sku} ({product_title}).")

    except Exception as e:
        print(f"Error scraping SKU {product_sku}: {e}")
        continue

print(df)

In [None]:
# save dataframe to CSV file
df.to_csv(output_file, index=False)
print('Results saved to CSV file\n')

print('Results:')
print(df)

# print(f'Number of products found: {len(df)}\n')
# print(f'Number of products not found: {products_not_found}\n')
# if products_not_found > 0:
#     print('Products not found:')
#     for product in products_not_found_list:
#         print(f'\t{product}')

# close browser
driver.close()
driver.quit()