In [65]:
import csv
import os
import time
from datetime import date
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from openpyxl.utils import get_column_letter


# Function to autofit columns in Excel before saving
def auto_fit_columns(ws):
    for column in ws.iter_cols():
        max_length = 0
        column = [cell for cell in column]
        for cell in column:
            try:
                if len(str(cell.value)) > max_length:
                    max_length = len(cell.value)
            except:
                pass
        adjusted_width = (max_length + 2) * 1.2
        ws.column_dimensions[get_column_letter(column[0].col_idx)].width = adjusted_width
    return ws

# Scraping rules for the two websites
def scrape_website(url, word):
    driver = webdriver.Chrome()
    driver.get(url)

    if "livingspaces.com" in url:
        input_field = driver.find_element(By.ID, 'search')
        input_field.send_keys(word)
        form = input_field.find_element(By.XPATH, './ancestor::form')
        form.submit()
    elif "rcwilley.com" in url:
        input_field = driver.find_element(By.ID, 'searchBox')
        input_field.send_keys(word)
        submit_button = driver.find_element(By.ID, 'searchSubmit')
        submit_button.click()

    time.sleep(5)  
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')
    prices = []

    if "livingspaces.com" in url:
        product_items = soup.find_all('div', class_='product-item-container')
        for item in product_items:
            name_element = item.find('span', class_='name')
            price_element = item.find('span', class_='price')
            if name_element and price_element:
                name = name_element.text.strip()
                price = price_element.text.strip()
                prices.append({'name': name, 'price': float(price.replace('$', '').replace(',', ''))})

    elif "rcwilley.com" in url:
        product_items = soup.find_all('div', class_='productContent')
        for item in product_items:
            name_element = item.find('div', class_='productName')
            price_element = item.find('span', class_='price')
            if name_element and price_element:
                name = name_element.text.strip()
                price = price_element.text.strip()
                prices.append({'name': name, 'price': float(price.replace('$', '').replace(',', ''))})

    driver.quit()

    return prices

# Calculating average price of items scraped
def calculate_average(prices):
    prices_float = [price['price'] for price in prices]
    average = sum(prices_float) / len(prices_float) if prices_float else 0
    rounded_average = round(average, 2)
    return rounded_average

# Get both min and max prices from the item list
def get_min_max(prices):
    if not prices:
        return None, None
    min_price = min(prices, key=lambda x: x['price'])
    max_price = max(prices, key=lambda x: x['price'])
    return min_price, max_price

# Function to add searchID to each search so we can better track our info over time.
def get_search_id(file1, file2):
    search_id = 0  # Initialize search_id as 0
    if os.path.isfile(file1) and os.path.getsize(file1) > 0:
        with open(file1, 'r') as f:
            last_line = f.readlines()[-1]
            try:
                search_id = max(int(last_line.split(',')[0]), search_id)
            except ValueError:  # handles case where value is not a valid integer
                pass
    if os.path.isfile(file2) and os.path.getsize(file2) > 0:
        with open(file2, 'r') as f:
            last_line = f.readlines()[-1]
            try:
                search_id = max(int(last_line.split(',')[0]), search_id)
            except ValueError:  # handles case where value is not a valid integer
                pass
    return search_id + 1  # Add 1 to the search_id after checking the files


# Final function that prints the comparisons, and adds them to the excel file
def compare_prices(word):
    website1_url = 'https://www.livingspaces.com/'
    website2_url = 'https://www.rcwilley.com/'

    prices_website1 = scrape_website(website1_url, word)
    prices_website2 = scrape_website(website2_url, word)

    average_website1 = calculate_average(prices_website1)
    average_website2 = calculate_average(prices_website2)

    # Currently not using. Was using in an earlier version of the code
    # rounded_average_website1 = round(average_website1, 2)
    # rounded_average_website2 = round(average_website2, 2)

    website1_name = website1_url.replace('https://www.', '').replace('.com/', '').capitalize()
    website2_name = website2_url.replace('https://www.', '').replace('.com/', '').capitalize()

    print(f"Item: {search_word}")
    print()

    # To print a list of all items on the first page.

    # print(f"Average price on {website1_name}: ${average_website1:.2f}")
    # for item in prices_website1:
    #     print(f"Name: {item['name']}, Price: {item['price']}")

    # print(f"\nAverage price on {website2_name}: ${average_website2:.2f}")
    # for item in prices_website2:
    #     print(f"Name: {item['name']}, Price: {item['price']}")

    print(f"Number of {word}s on {website1_name} front page: {len(prices_website1)}")
    print(f"Average price on {website1_name}: ${average_website1:.2f}")
    min_price, max_price = get_min_max(prices_website1)
    if min_price and max_price:
        print(f"Lowest price on {website1_name}: {min_price['name']} at ${min_price['price']:.2f}")
        print(f"Highest price on {website1_name}: {max_price['name']} at ${max_price['price']:.2f}")

    print(f"\nNumber of {word}s on {website2_name} front page: {len(prices_website2)}")
    print(f"Average price on {website2_name}: ${average_website2:.2f}")
    min_price, max_price = get_min_max(prices_website2)
    if min_price and max_price:
        print(f"Lowest price on {website2_name}: {min_price['name']} at ${min_price['price']:.2f}")
        print(f"Highest price on {website2_name}: {max_price['name']} at ${max_price['price']:.2f}")

    price_diff = abs(average_website1 - average_website2)
    print(f"\nPrice Comparison: {website1_name} is {'cheaper' if average_website1 < average_website2 else 'more expensive' if average_website1 > average_website2 else 'equally priced'} than {website2_name} by ${price_diff:.2f}")


    current_date = date.today().strftime('%m/%d/%Y')

    filename1 = 'item_prices.csv'
    filename2 = 'compared_prices.csv'

    search_id = get_search_id(filename1, filename2)

    # First CSV
    with open(filename1, 'a', newline='') as f:
        writer = csv.writer(f)
        if not os.path.getsize(filename1):  # file is empty, write headers
            writer.writerow(['SearchID', 'Website', 'Item', 'Name', 'Price', 'Date Added'])
        for item in prices_website1:
            writer.writerow([search_id, website1_name, word, item['name'], item['price'], current_date])
        for item in prices_website2:
            writer.writerow([search_id, website2_name, word, item['name'], item['price'], current_date])

    # Second CSV
    with open(filename2, 'a', newline='') as f:
        writer = csv.writer(f)
        if not os.path.getsize(filename2):  # file is empty, write headers
            writer.writerow(['SearchID', 'Website', 'Item', 'Lowest Price', 'Highest Price', 'Avg Price', 'Price Difference', 'Date Added'])
        if prices_website1:
            writer.writerow([search_id, website1_name, word, min_price['price'], max_price['price'], average_website1, price_diff, current_date])
        if prices_website2:
            writer.writerow([search_id, website2_name, word, min_price['price'], max_price['price'], average_website2, price_diff, current_date])
        

# The search_word function that runs first and gets our user input search word
# We have to first define all the functions before we start the calling process and executing the code.
# this looks like it is last in our code
# But it is actually first since everything else above is just defined
search_word = input("Enter word to search: ")
compare_prices(search_word)

Item: sofa

Number of sofas on Livingspaces front page: 36
Average price on Livingspaces: $920.28
Lowest price on Livingspaces: Reid Buff 80" Sofa at $350.00
Highest price on Livingspaces: Mason Leather 89" Sofa at $1695.00

Number of sofas on Rcwilley front page: 30
Average price on Rcwilley: $869.62
Lowest price on Rcwilley: Hannah Dark Blue Sofa at $399.99
Highest price on Rcwilley: Clearance Cade Sable Brown Leather-Match Power Reclining Sofa at $2699.99

Price Comparison: Livingspaces is more expensive than Rcwilley by $50.66


In [30]:
# Test mysql local connection

import mysql.connector

# You will need to install mysql and run mysql server locally

mydb = None  # Declare the variable outside the try block

try:
    mydb = mysql.connector.connect(
        host="localhost",
        user="root",  # Your username may be different
        passwd="admin" # Your password may be different
    )
    print("Connection to MySQL database successful.")
except mysql.connector.Error as error:
    print("Error connecting to MySQL database:", error)
finally:
    if mydb is not None and mydb.is_connected():
        mydb.close()
        print("Connection closed.")


Connection to MySQL database successful.
Connection closed.


In [72]:
import csv
from datetime import datetime
import mysql.connector
from mysql.connector import Error

def create_database():
    conn = None
    try:
        conn = mysql.connector.connect(
            host="localhost",
            user="root",
            passwd="admin"
        )
        if conn.is_connected():
            cursor = conn.cursor()
            cursor.execute("CREATE DATABASE PricesDB")
            print('Database created successfully....')
    except Error as e:
        print(e)
    finally:
        if conn:
            conn.close()

create_database()


def create_connection():
    conn = None
    try:
        conn = mysql.connector.connect(
            host="localhost",
            user="root",
            passwd="admin",
            database="PricesDB"
        )
        if conn.is_connected():
            print('Connected to MySQL database')
    except Error as e:
        print(e)
    return conn


def create_table(conn):
    try:
        cursor = conn.cursor()
        item_prices_table_query = """
            CREATE TABLE IF NOT EXISTS item_prices (
                ID INT AUTO_INCREMENT PRIMARY KEY,
                SearchID INT,
                Website VARCHAR(255),
                Item VARCHAR(255),
                Name VARCHAR(255),
                Price DECIMAL(10, 2),
                DateAdded DATE
            )
        """
        cursor.execute(item_prices_table_query)

        compared_prices_table_query = """
            CREATE TABLE IF NOT EXISTS compared_prices (
                ID INT AUTO_INCREMENT PRIMARY KEY,
                SearchID INT,
                Website VARCHAR(255),
                Item VARCHAR(255),
                LowestPrice DECIMAL(10, 2),
                HighestPrice DECIMAL(10, 2),
                AvgPrice DECIMAL(10, 2),
                PriceDifference DECIMAL(10, 2),
                DateAdded DATE
            )
        """
        cursor.execute(compared_prices_table_query)

        print('Tables Updated...')

    except Error as e:
        print(e)


# Check if SearchID exists
def search_id_exists(conn, search_id, table_name):
    query = f"SELECT 1 FROM {table_name} WHERE SearchID = %s LIMIT 1"
    try:
        cursor = conn.cursor()
        cursor.execute(query, (search_id,))
        return cursor.fetchone() is not None
    except Error as e:
        print(e)
        return False


def insert_into_item_prices(conn, data):
    search_id, _, _, _, _, _ = data
    query = """
        INSERT INTO item_prices(SearchID, Website, Item, Name, Price, DateAdded)
        VALUES (%s, %s, %s, %s, %s, %s)
    """
    try:
        cursor = conn.cursor()
        cursor.execute(query, data)
        conn.commit()
        print('Data inserted successfully...')
    except Error as e:
        print(e)


def close_connection(conn):
    if conn:
        conn.close()
        print('Database connection closed.')


def insert_into_compared_prices(conn, data):
    search_id, _, _, _, _, _, _, _ = data
    query = """
        INSERT INTO compared_prices(SearchID, Website, Item, LowestPrice, HighestPrice, AvgPrice, PriceDifference, DateAdded)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
    """
    try:
        cursor = conn.cursor()
        cursor.execute(query, data)
        conn.commit()
        print('Data inserted successfully...')
    except Error as e:
        print(e)


# Establish the MySQL connection
conn = create_connection()

# Create tables if they don't exist
create_table(conn)


# Read and insert data for each CSV
def read_csv_and_insert_item_prices(filename, insert_function):
    with open(filename, 'r') as f:
        reader = csv.reader(f)
        next(reader)  # Skip header row
        items = []
        unique_search_ids = set()
        for row in reader:
            # Convert price related data to float and date to proper date format
            for i in [4]:  # Convert 'Price' to float
                if '.' in row[i]:
                    row[i] = float(row[i])
            if '/' in row[5]:  # Convert 'Date Added' to date format
                row[5] = datetime.strptime(row[5], '%m/%d/%Y').date()
            items.append(row)
            unique_search_ids.add(row[0])  # Store unique SearchIDs

        for search_id in unique_search_ids:
            if search_id_exists(conn, search_id, "item_prices"):
                print(f"SearchID {search_id} already exists. Skipping...")
                continue
            for item in items:
                if item[0] == search_id:
                    insert_function(conn, tuple(item))


def read_csv_and_insert_compared_prices(filename, insert_function):
    with open(filename, 'r') as f:
        reader = csv.reader(f)
        next(reader)  # Skip header row
        items = []
        unique_search_ids = set()
        for row in reader:
            # Convert price related data to float and date to proper date format
            for i in [3, 4, 5, 6]:  # Convert 'Lowest Price', 'Highest Price', 'Avg Price', 'Price Difference' to float
                if '.' in row[i]:
                    row[i] = float(row[i])
            if '/' in row[7]:  # Convert 'Date Added' to date format
                row[7] = datetime.strptime(row[7], '%m/%d/%Y').date()
            items.append(row)
            unique_search_ids.add(row[0])  # Store unique SearchIDs

        for search_id in unique_search_ids:
            if search_id_exists(conn, search_id, "compared_prices"):
                print(f"SearchID {search_id} already exists. Skipping...")
                continue
            for item in items:
                if item[0] == search_id:
                    insert_function(conn, tuple(item))


# Read and insert data for each CSV
read_csv_and_insert_item_prices('item_prices.csv', insert_into_item_prices)
read_csv_and_insert_compared_prices('compared_prices.csv', insert_into_compared_prices)


# Close the database connection
close_connection(conn)


1007 (HY000): Can't create database 'pricesdb'; database exists
Connected to MySQL database
Tables Updated...
SearchID 2 already exists. Skipping...
SearchID 1 already exists. Skipping...
SearchID 2 already exists. Skipping...
SearchID 1 already exists. Skipping...
Database connection closed.
