In [2]:
import os
import csv
import time
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.common.by import By

# Get the current date
current_date = datetime.now().strftime('%Y-%m-%d')

# Set up Selenium WebDriver for Firefox
firefox_options = Options()
firefox_options.add_argument("--headless")  # Run in headless mode
driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()), options=firefox_options)

# URL of the product page
URL = 'https://www.amazon.ca/Science-Analytics-T-Shirt-Sleeve-Sweatshirt/dp/B0CK6B9T9V/ref=sr_1_5?crid=NKGDZP04HQ32&dib=eyJ2IjoiMSJ9.EHRviXvbnjvWNnL_WCUXdNItkSMXCb4yYHNfzrFzLio_Sm-P9YGPDBS7paMep-cC46KZUSX0dbv36vdtTsMKQkvI9HZ3fXegcK12K1umlUNICTcEN1OTZRlBHMKPcohcyUUs5GVt31NZCVx4u8mu8pWJKRaVp7iTj9p3EI84koh4dNp6q-1Zw89A7KxGaB-G3Za3O2lCjQeAZRuJSLlM5ft9y2ra-X-AosEZPQlIZd7yJ1MG1HoyCNhztm8MkFW5-WMJsAi8nLT6njTAd8iUZFEMl33K8iUcIexj-WgHFpg.32MDLkas_4ALAe-r9NN3OrkGS8dilY3SLl578pWBiXs&dib_tag=se&keywords=got+data+tshirt&qid=1722282658&sprefix=%2Caps%2C354&sr=8-5'

# Fetch the page
driver.get(URL)

# Extract the product title
try:
    title_element = driver.find_element(By.ID, 'productTitle')
    title = title_element.text.strip()
    print(f"Title: {title}")
except Exception as e:
    print(f"An error occurred while getting the title: {e}")
    title = None

# Extract the product price
try:
    # Locate the price element using class names
    price_element = driver.find_element(By.CSS_SELECTOR, 'span.a-price.aok-align-center.reinventPricePriceToPayMargin.priceToPay')
    
    # Extract the text from the child spans
    price_symbol = price_element.find_element(By.CSS_SELECTOR, '.a-price-symbol').text
    price_whole = price_element.find_element(By.CSS_SELECTOR, '.a-price-whole').text
    price_fraction = price_element.find_element(By.CSS_SELECTOR, '.a-price-fraction').text
    
    # Combine parts into a correctly formatted price string
    if price_fraction:
        price_text = f"{price_symbol}{price_whole}.{price_fraction}"
    else:
        price_text = f"{price_symbol}{price_whole}"
    
    # Print the price
    print(f"Price: {price_text}")
except Exception as e:
    print(f"An error occurred while getting the price: {e}")
    price_text = None

# Close the browser
driver.quit()

# Prepare the data to be saved
data = [title, price_text, current_date]

# Save the data to a CSV file
csv_file = 'product_data.csv'
try:
    with open(csv_file, mode='a+', newline='', encoding='UTF8') as f:
        writer = csv.writer(f)
        
        # Check if file is empty to write header
        if f.tell() == 0:
            writer.writerow(['Title', 'Price', 'Date'])
        
        writer.writerow(data)
    print(f"Data saved to {csv_file}")
except Exception as e:
    print(f"An error occurred while saving to CSV: {e}")

# Read and print the contents of the CSV file
try:
    with open(csv_file, mode='r', newline='', encoding='UTF8') as f:
        reader = csv.reader(f)
        
        # Print each row in the CSV file
        for row in reader:
            print(row)
except Exception as e:
    print(f"An error occurred while reading the CSV file: {e}")


Title: Data Science Analytics Funny Got Data T-Shirt, Long Sleeve Shirt, Sweatshirt, Hoodie
Price: $26.70
Data saved to product_data.csv
['Title', 'Price', 'Date']
['Data Science Analytics Funny Got Data T-Shirt, Long Sleeve Shirt, Sweatshirt, Hoodie', '$26.70', '2024-07-30']


In [5]:
def scrape_amazon_product(url, csv_file='product_data.csv'):
    # Get the current date
    current_date = datetime.now().strftime('%Y-%m-%d')

    # Set up Selenium WebDriver for Firefox
    firefox_options = Options()
    firefox_options.add_argument("--headless")  # Run in headless mode
    driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()), options=firefox_options)

    # Fetch the page
    driver.get(url)

    # Extract the product title
    try:
        title_element = driver.find_element(By.ID, 'productTitle')
        title = title_element.text.strip()
        print(f"Title: {title}")
    except Exception as e:
        print(f"An error occurred while getting the title: {e}")
        title = None

    # Extract the product price
    try:
        # Locate the price element using class names
        price_element = driver.find_element(By.CSS_SELECTOR, 'span.a-price.aok-align-center.reinventPricePriceToPayMargin.priceToPay')
        
        # Extract the text from the child spans
        price_symbol = price_element.find_element(By.CSS_SELECTOR, '.a-price-symbol').text
        price_whole = price_element.find_element(By.CSS_SELECTOR, '.a-price-whole').text
        price_fraction = price_element.find_element(By.CSS_SELECTOR, '.a-price-fraction').text
        
        # Combine parts into a correctly formatted price string
        if price_fraction:
            price_text = f"{price_symbol}{price_whole}.{price_fraction}"
        else:
            price_text = f"{price_symbol}{price_whole}"
        
        # Print the price
        print(f"Price: {price_text}")
    except Exception as e:
        print(f"An error occurred while getting the price: {e}")
        price_text = None

    # Close the browser
    driver.quit()

    # Prepare the data to be saved
    data = [title, price_text, current_date]

    # Save the data to a CSV file
    try:
        with open(csv_file, mode='a+', newline='', encoding='UTF8') as f:
            writer = csv.writer(f)
            
            # Check if file is empty to write header
            if f.tell() == 0:
                writer.writerow(['Title', 'Price', 'Date'])
            
            writer.writerow(data)
        print(f"Data saved to {csv_file}")
    except Exception as e:
        print(f"An error occurred while saving to CSV: {e}")

    # Read and print the contents of the CSV file
    try:
        with open(csv_file, mode='r', newline='', encoding='UTF8') as f:
            reader = csv.reader(f)
            
            # Print each row in the CSV file
            for row in reader:
                print(row)
    except Exception as e:
        print(f"An error occurred while reading the CSV file: {e}")

def print_csv_contents(csv_file='product_data.csv'):
    # Read and print the contents of the CSV file
    try:
        with open(csv_file, mode='r', newline='', encoding='UTF8') as f:
            reader = csv.reader(f)
            
            # Print each row in the CSV file
            print("\nContents of CSV file:")
            for row in reader:
                print(row)
    except Exception as e:
        print(f"An error occurred while reading the CSV file: {e}")

# Example usage
url = 'https://www.amazon.ca/Science-Analytics-T-Shirt-Sleeve-Sweatshirt/dp/B0CK6B9T9V/ref=sr_1_5?crid=NKGDZP04HQ32&dib=eyJ2IjoiMSJ9.EHRviXvbnjvWNnL_WCUXdNItkSMXCb4yYHNfzrFzLio_Sm-P9YGPDBS7paMep-cC46KZUSX0dbv36vdtTsMKQkvI9HZ3fXegcK12K1umlUNICTcEN1OTZRlBHMKPcohcyUUs5GVt31NZCVx4u8mu8pWJKRaVp7iTj9p3EI84koh4dNp6q-1Zw89A7KxGaB-G3Za3O2lCjQeAZRuJSLlM5ft9y2ra-X-AosEZPQlIZd7yJ1MG1HoyCNhztm8MkFW5-WMJsAi8nLT6njTAd8iUZFEMl33K8iUcIexj-WgHFpg.32MDLkas_4ALAe-r9NN3OrkGS8dilY3SLl578pWBiXs&dib_tag=se&keywords=got+data+tshirt&qid=1722282658&sprefix=%2Caps%2C354&sr=8-5'
scrape_amazon_product(url)

Title: Data Science Analytics Funny Got Data T-Shirt, Long Sleeve Shirt, Sweatshirt, Hoodie
Price: $26.70
Data saved to product_data.csv
['Title', 'Price', 'Date']
['Data Science Analytics Funny Got Data T-Shirt, Long Sleeve Shirt, Sweatshirt, Hoodie', '$26.70', '2024-07-30']
['Data Science Analytics Funny Got Data T-Shirt, Long Sleeve Shirt, Sweatshirt, Hoodie', '$26.70', '2024-07-30']


In [None]:
# automate to update once a day
while(True):
        scrape_amazon_product(url)
        time.sleep(86400)

In [9]:
# view current csv
print_csv_contents('product_data.csv')


Contents of CSV file:
['Title', 'Price', 'Date']
['Data Science Analytics Funny Got Data T-Shirt, Long Sleeve Shirt, Sweatshirt, Hoodie', '$26.70', '2024-07-30']
['Data Science Analytics Funny Got Data T-Shirt, Long Sleeve Shirt, Sweatshirt, Hoodie', '$26.70', '2024-07-30']
