# Jewel Osco meat prices

### Find data 

In [4]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import os
import sys
import time
from datetime import datetime

header = { 
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:139.0) Gecko/20100101 Firefox/139.0'
}
page_url = 'https://www.jewelosco.com/aisle-vs/meat-seafood/meat-essentials.html?sort=price'

# Set up Selenium
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:139.0) Gecko/20100101 Firefox/139.0")

# Get page with Selenium
driver = webdriver.Chrome(options=options)
driver.get(page_url)

# Handle cookie consent popup
try:
    cookie_accept = WebDriverWait(driver, 3).until(
        EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Accept") or contains(text(), "Allow")]'))
    )
    cookie_accept.click()
    print("Accepted cookies")
except:
    print("No cookie popup found")

# exhaust load more button
last_count = 0
same_count = 0
max_attempts = 5

while same_count < 3 and max_attempts > 0:
    max_attempts -=1

    # load products and find total count of products
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a[data-qa="prd-itm-pttl"]')))
    current_products = driver.find_elements(By.CSS_SELECTOR, 'a[data-qa="prd-itm-pttl"]')
    current_count = len(current_products)

    # check product count to see if load more worked (loaded more items)
    if current_count == last_count:
        same_count += 1
    else:
        same_count = 0
        last_count = current_count

    # 'load more' button handling
    try:
        load_more_button = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.XPATH, '//button[contains(text(), "Load more")]'))
        )
        load_more_button.click()
        print("Clicked 'Load more' button successfully")
    except Exception as e:
        print(f"Could not find/click 'Load more' button: {str(e)}")
        break


# Parse with BeautifulSoup
product_name_list =[]
product_price_list=[]

# find product name
soup = BeautifulSoup(driver.page_source, 'lxml')
product_name = soup.findAll('a', attrs={'data-qa':'prd-itm-pttl'})

for product in product_name:
    product_name_list.append(product.text.strip())

# find all product price
price_div = soup.findAll('div', attrs={'class':'product-comp-v1__price'})

for product in price_div:
    item_price = product.find('span', attrs={'aria-hidden': 'true', 'class': 'color-neutral-90'})
    if item_price:
        product_price_list.append((item_price.text.strip()))

# check if previous scrape data exists 
date = datetime.now()
date = date.strftime('%Y-%m-%d')
data_filename = 'jewelosco_meat_products.csv'

# add data into existing file if it exists
if os.path.exists(data_filename):
    # import existing data
    existing_df = pd.read_csv(data_filename)
        
    # get the new data
    new_data = pd.DataFrame({
        'Product Name': product_name_list,
        f'{date}': product_price_list
    })
    # check if data has been taken with the same date
    if existing_df.columns[-1] == new_data.columns[-1]:
        print(f'data for {date} has already been taken')
        driver.quit()
        sys.exit()

    #merge the two df
    merged_df = pd.merge(existing_df, new_data, on='Product Name', how='outer')
    
    # Save merged data
    merged_df.to_csv(data_filename, index=False)
    print(f"Updated existing data in '{data_filename}'")
else:
    # Create DataFrame
    df = pd.DataFrame({
        'Product Name': product_name_list,
        f'{date}': product_price_list
    })
    
    # Export to CSV
    df.to_csv(data_filename, index=False)
    print(f"Data saved to '{data_filename}'")

driver.quit()

Accepted cookies
Clicked 'Load more' button successfully
Clicked 'Load more' button successfully
Clicked 'Load more' button successfully
Clicked 'Load more' button successfully
Could not find/click 'Load more' button: Message: 
Stacktrace:
	GetHandleVerifier [0x0x7ff78ccbcda5+78885]
	GetHandleVerifier [0x0x7ff78ccbce00+78976]
	(No symbol) [0x0x7ff78ca79bca]
	(No symbol) [0x0x7ff78cad0766]
	(No symbol) [0x0x7ff78cad0a1c]
	(No symbol) [0x0x7ff78cb24467]
	(No symbol) [0x0x7ff78caf8bcf]
	(No symbol) [0x0x7ff78cb2122f]
	(No symbol) [0x0x7ff78caf8963]
	(No symbol) [0x0x7ff78cac16b1]
	(No symbol) [0x0x7ff78cac2443]
	GetHandleVerifier [0x0x7ff78cf94eed+3061101]
	GetHandleVerifier [0x0x7ff78cf8f33d+3037629]
	GetHandleVerifier [0x0x7ff78cfae592+3165202]
	GetHandleVerifier [0x0x7ff78ccd730e+186766]
	GetHandleVerifier [0x0x7ff78ccdeb3f+217535]
	GetHandleVerifier [0x0x7ff78ccc59b4+114740]
	GetHandleVerifier [0x0x7ff78ccc5b69+115177]
	GetHandleVerifier [0x0x7ff78ccac368+10728]
	BaseThreadInitThunk [

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
