In [7]:
import os
import time
from datetime import date
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
from openpyxl import Workbook
from openpyxl.utils import get_column_letter

# Function to autofit columns in Excel before saving
def auto_fit_columns(ws):
    for column in ws.iter_cols():
        max_length = 0
        column = [cell for cell in column]
        for cell in column:
            try:
                if len(str(cell.value)) > max_length:
                    max_length = len(cell.value)
            except:
                pass
        adjusted_width = (max_length + 2) * 1.2
        ws.column_dimensions[get_column_letter(column[0].col_idx)].width = adjusted_width
    return ws

# Scraping rules for the two websites
def scrape_website(url, word):
    driver = webdriver.Chrome()
    driver.get(url)

    if "livingspaces.com" in url:
        input_field = driver.find_element(By.ID, 'search')
        input_field.send_keys(word)
        form = input_field.find_element(By.XPATH, './ancestor::form')
        form.submit()
    elif "rcwilley.com" in url:
        input_field = driver.find_element(By.ID, 'searchBox')
        input_field.send_keys(word)
        submit_button = driver.find_element(By.ID, 'searchSubmit')
        submit_button.click()

    time.sleep(5)  
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')
    prices = []

    if "livingspaces.com" in url:
        product_items = soup.find_all('div', class_='product-item-container')
        for item in product_items:
            name_element = item.find('span', class_='name')
            price_element = item.find('span', class_='price')
            if name_element and price_element:
                name = name_element.text.strip()
                price = price_element.text.strip()
                prices.append({'name': name, 'price': float(price.replace('$', '').replace(',', ''))})

    elif "rcwilley.com" in url:
        product_items = soup.find_all('div', class_='productContent')
        for item in product_items:
            name_element = item.find('div', class_='productName')
            price_element = item.find('span', class_='price')
            if name_element and price_element:
                name = name_element.text.strip()
                price = price_element.text.strip()
                prices.append({'name': name, 'price': float(price.replace('$', '').replace(',', ''))})

    driver.quit()

    return prices

# Calculating average price of items scraped
def calculate_average(prices):
    prices_float = [price['price'] for price in prices]
    average = sum(prices_float) / len(prices_float) if prices_float else 0
    rounded_average = round(average, 2)
    return rounded_average

# Get both min and max prices from the item list
def get_min_max(prices):
    if not prices:
        return None, None
    min_price = min(prices, key=lambda x: x['price'])
    max_price = max(prices, key=lambda x: x['price'])
    return min_price, max_price


# Final function that prints the comparisons, and adds them to the excel file
def compare_prices(word):
    website1_url = 'https://www.livingspaces.com/'
    website2_url = 'https://www.rcwilley.com/'

    prices_website1 = scrape_website(website1_url, word)
    prices_website2 = scrape_website(website2_url, word)

    average_website1 = calculate_average(prices_website1)
    average_website2 = calculate_average(prices_website2)

    rounded_average_website1 = round(average_website1, 2)
    rounded_average_website2 = round(average_website2, 2)

    website1_name = website1_url.replace('https://www.', '').replace('.com/', '').capitalize()
    website2_name = website2_url.replace('https://www.', '').replace('.com/', '').capitalize()

    print(f"Item: {search_word}")
    print()

    # To print a list of all items on the first page.

    # print(f"Average price on {website1_name}: ${average_website1:.2f}")
    # for item in prices_website1:
    #     print(f"Name: {item['name']}, Price: {item['price']}")

    # print(f"\nAverage price on {website2_name}: ${average_website2:.2f}")
    # for item in prices_website2:
    #     print(f"Name: {item['name']}, Price: {item['price']}")

    print(f"Number of {word}s on {website1_name} front page: {len(prices_website1)}")
    print(f"Average price on {website1_name}: ${average_website1:.2f}")
    min_price, max_price = get_min_max(prices_website1)
    if min_price and max_price:
        print(f"Lowest price on {website1_name}: {min_price['name']} at ${min_price['price']:.2f}")
        print(f"Highest price on {website1_name}: {max_price['name']} at ${max_price['price']:.2f}")

    print(f"\nNumber of {word}s on {website2_name} front page: {len(prices_website2)}")
    print(f"Average price on {website2_name}: ${average_website2:.2f}")
    min_price, max_price = get_min_max(prices_website2)
    if min_price and max_price:
        print(f"Lowest price on {website2_name}: {min_price['name']} at ${min_price['price']:.2f}")
        print(f"Highest price on {website2_name}: {max_price['name']} at ${max_price['price']:.2f}")

    price_diff = abs(average_website1 - average_website2)
    print(f"\nPrice Comparison: {website1_name} is {'cheaper' if average_website1 < average_website2 else 'more expensive' if average_website1 > average_website2 else 'equally priced'} than {website2_name} by ${price_diff:.2f}")


    # Defines filename
    filename = 'prices.xlsx'
    file_exists = os.path.exists(filename)

    wb = Workbook()

    # Both sheets check if file exists to see if it ONLY appends or first creates header row and then adds data

    # First sheet
    sheet1 = wb.active
    sheet1.title = 'Item Prices'
    if not file_exists:
        sheet1.append(['Website', 'Item', 'Name', 'Price', 'Date Added'])
    current_date = date.today().strftime('%m/%d/%Y')
    for item in prices_website1:
        sheet1.append([website1_name, word, item['name'], item['price'], current_date])
    for item in prices_website2:
        sheet1.append([website2_name, word, item['name'], item['price'], current_date])

    # Second Sheet
    sheet2 = wb.create_sheet(title='Compared')
    if not file_exists:
        sheet2.append(['Website', 'Item', 'Lowest Price', 'Highest Price', 'Avg Price', 'Price Difference', 'Date Added'])
    current_date = date.today().strftime('%m/%d/%Y')

    min_price_website1, max_price_website1 = get_min_max(prices_website1)
    min_price_website2, max_price_website2 = get_min_max(prices_website2)

    sheet2.append([website1_name, word, 
                min_price_website1['price'] if min_price_website1 else None, 
                max_price_website1['price'] if max_price_website1 else None, 
                rounded_average_website1, 
                abs(average_website1 - average_website2), 
                current_date])

    sheet2.append([website2_name, word, 
                min_price_website2['price'] if min_price_website2 else None, 
                max_price_website2['price'] if max_price_website2 else None, 
                rounded_average_website2, 
                abs(average_website1 - average_website2), 
                current_date])
    
    # Autofit columns after appending all our data
    sheet1 = auto_fit_columns(sheet1)
    sheet2 = auto_fit_columns(sheet2)
    
    # Saves the file to the set filename. 
    # We could also ask for what we want the file to be called
    # Using 
    # >  filename = input("Enter filename to save as (without extension): ") + '.xlsx'  <
    # when we defined the 'filename' function
    wb.save(filename)


# The search_word function that runs first and gets our user input search word
# We have to first define all the functions before we start the calling process and executing the code.
# this looks like it is last in our code
# But it is actually first since everything else above is just defined
search_word = input("Enter word to search: ")
compare_prices(search_word)

Item: patio set

Number of patio sets on Livingspaces front page: 24
Average price on Livingspaces: $1194.38
Lowest price on Livingspaces: Cora Sand 3 Piece Outdoor Conversation Set at $350.00
Highest price on Livingspaces: Provence 79" Outdoor Dining Set For 6 at $2995.00

Number of patio sets on Rcwilley front page: 30
Average price on Rcwilley: $2084.82
Lowest price on Rcwilley: Bali 5 Piece Round Patio Dining Set with 4 Sling Chairs at $249.99
Highest price on Rcwilley: Montreal 9 Piece Patio Swivel Chair Dining Set at $4299.99

Price Comparison: Livingspaces is cheaper than Rcwilley by $890.44
