In [2]:
# Import necessary libraries
from splinter import Browser
from bs4 import BeautifulSoup as soup
import wget
import os
import re
import requests
import pandas as pd
import matplotlib.pyplot as plt
import time

from splinter import Browser
from selenium.webdriver.firefox.service import Service

In [3]:
def create_facebook_marketplace_query(params):
    '''Returns a URL for Facebook Marketplace given user parameters'''
    # check for the two required parameters, if they are not specified, then exit
    if 'location' not in params or 'item' not in params:
        raise ValueError('location or item was not specified.')
    
    # make default params if they are not specified
    if 'condition' not in params:
        params['condition'] = []
    
    if 'min_price' not in params:
        params['min_price'] = 0
    
    if 'max_price' not in params:
        params['max_price'] = 1000
    
    if 'days_since_listed' not in params:
        params['days_since_listed'] = 10

    # lookup table for conditions 
    condition_separator = "%2C"
    conditions = {0: "new", 1: "used_like_new", 2: "used_good", 3: "used_fair"}

    # create the conditions part of url
    condition = "" if params['condition'] == [] else "&itemCondition=" + conditions[params['condition'][0]]
    for index in params['condition']:
        condition += condition_separator + conditions[index]

    # create the query string
    base_url = f"https://www.facebook.com/marketplace/{params['location']}/search?"
    url = f"{base_url}minPrice={params['min_price']}&maxPrice={params['max_price']}&daysSinceListed={params['days_since_listed']}{condition}&query={params['item']}"
    return url

In [4]:
def fetch_marketplace_html(url):
    """Returns Facebook Marketplace Data in the format of BeautifulSoup Object"""
    browser = Browser('firefox')
    browser.visit(url)

    # close pop out window
    if browser.is_element_present_by_css('div[aria-label="Close"]', wait_time=10):
        # Click on the element once it's found
        browser.find_by_css('div[aria-label="Close"]').first.click()
    
    # Scroll down to load more results
    
    # Define the number of times to scroll the page
    scroll_count = 4

    # Define the delay (in seconds) between each scroll
    scroll_delay = 2

    # Loop to perform scrolling
    for _ in range(scroll_count):
        # Execute JavaScript to scroll to the bottom of the page
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        
        # Pause for a moment to allow the content to load
        time.sleep(scroll_delay)
    
    # Parse the HTML
    html = browser.html

    # close browser window
    time.sleep(1)
    browser.quit()

    # Create a BeautifulSoup object from the scraped HTML
    return soup(html, 'html.parser')

In [52]:
def parse_marketplace_data(market_soup):
    '''Returns a Pandas Data Frame with information about Facebook Marketplace listings'''

    # get listings

    listing_divs = market_soup.find_all('div', class_="x9f619 x78zum5 x1r8uery xdt5ytf x1iyjqo2 xs83m0k x1e558r4 x150jy0e x1iorvi4 xjkvuk6 xnpuxes x291uyu x1uepa24")
    data = []
    for listing_div in listing_divs:
        
        # Extract all the necessary info and insert into lists
        picture_div = listing_div.find('img', class_="xt7dq6l xl1xv1r x6ikm8r x10wlt62 xh8yej3")
        if picture_div == None:
            continue
        picture_url = picture_div.get('src')

        title_div = listing_div.find('span', class_="x1lliihq x6ikm8r x10wlt62 x1n2onr6")
        if title_div == None:
            continue
        title = title_div.text.strip()

        price_div = listing_div.find('span', class_="x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1lliihq x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x xudqn12 x676frb x1lkfr7t x1lbecb7 x1s688f xzsf02u")
        if price_div == None:
            continue
        price = price_div.text.strip()

        location_div = listing_div.find('span', class_="x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1lliihq x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x x4zkp8e x3x7a5m x1nxh6w3 x1sibtaa xo1l8bm xi81zsa")
        if location_div == None:
            continue
        location = location_div.text.strip()

        url_div = listing_div.find('a', class_="x1i10hfl xjbqb8w x1ejq31n xd10rxx x1sy0etr x17r0tee x972fbf xcfux6l x1qhh985 xm0m39n x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz x1heor9g x1sur9pj xkrqix3 x1lku1pv")
        if url_div == None:
            continue
        url = "www.facebook.com" + url_div.get('href')

        listing = {
            'title': title,
            'price': price,
            'location': location,
            'image_url': picture_url,
            'url': url
        }
        data.append(listing)

    return pd.DataFrame(data)
    

In [6]:
def get_ebay_listings(item):
    '''Returns a Pandas Data Frame with information about Ebay listings'''
    ebay_url = f"https://www.ebay.com/sch/i.html?_nkw={item}"
    response = requests.get(ebay_url)

    ebay_soup = soup(response.text, 'html.parser')
    
    item_divs = ebay_soup.find_all('li', class_='s-item s-item__pl-on-bottom')

    prices = [div.find('span', class_='s-item__price').text for div in item_divs]

    urls = [div.find('a', class_='s-item__link').get('href') for div in item_divs]

    
    image_divs = [div.find('div', class_='s-item__image-wrapper image-treatment') for div in item_divs]
    image_tags = [div.find('img') for div in image_divs]
    image_urls = [url.get('src') for url in image_tags]

    titles_div = [div.find('div', class_='s-item__title') for div in item_divs]
    titles = [div.find('span').text for div in titles_div]

    data = {
        'title': titles,
        'price': prices,
        'image_url': image_urls,
        'url': urls
    }

    return pd.DataFrame(data)

In [7]:
def filter_ebay_listings(df):
    """Returns a cleaned data frame without any outliers"""
    # remove any special characters and convert to float
    df['price'] = df['price'].replace('[\$,]', '', regex=True).astype(float)

    # remove the first two rows
    df = df.iloc[2:].reset_index(drop=True)

    # Remove outliers in the 'Price' column
    # Here we use the IQR method to identify outliers

    Q1 = df['price'].quantile(0.25)
    Q3 = df['price'].quantile(0.60)
    IQR = Q3 - Q1

    # Define lower and upper bound for outliers
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Filter out the outliers
    df_cleaned = df[(df['price'] >= lower_bound) & (df['price'] <= upper_bound)]

    return df_cleaned
    

In [8]:
def calc_ebay_stats(df):
    """Returns a data frame with statistics of ebay listings"""
    # Calculate key statistics
    mean_price = df['price'].mean()
    median_price = df['price'].median()
    std_dev_price = df['price'].std()
    fair_price = median_price * 1.05  # Adjust by 5% for current market trends

    # store and return states in data frame
    stats = {
        'mean_price': [mean_price],
        'median_price': [median_price],
        'std_dev': [std_dev_price],
        'fair_price': [fair_price]
    }

    return pd.DataFrame(stats).round(2)


In [9]:
def search_google_lens(image_url):
    """Returns Google Lens Data in the format of BeautifulSoup Object"""
    google_lens_url = f"https://lens.google.com/uploadbyurl?url={image_url}"
    browser = Browser('firefox')
    browser.visit(google_lens_url)
    
    time.sleep(1)
    # Scroll down to load more results
    
    # Define the number of times to scroll the page
    scroll_count = 2

    # Define the delay (in seconds) between each scroll
    scroll_delay = 2

    # Loop to perform scrolling
    for _ in range(scroll_count):
        # Execute JavaScript to scroll to the bottom of the page
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        
        # Pause for a moment to allow the content to load
        time.sleep(scroll_delay)
    
    # Parse the HTML
    html = browser.html

    # close browser window
    time.sleep(1)
    browser.quit()

    # Create a BeautifulSoup object from the scraped HTML
    return soup(html, 'html.parser')

In [10]:
# specify the parameters
params = {
    'location': "pittsburgh",
    'item': "jersey",
    'condition': [],
    'min_price': 0,
    'max_price': 100,
    'days_since_listed': 10
}

# create the facebook marketplace query
url = create_facebook_marketplace_query(params)

# Visit the website and gather data
market_soup = fetch_marketplace_html(url)

In [53]:
# parse html to create structured listing data
facebook_listings = parse_marketplace_data(market_soup)
facebook_listings

Unnamed: 0,title,price,location,image_url,url
0,Jerseys for sale,$20,"Pittsburgh, PA",https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/868062668532...
1,Soccer jerseys,$20,"Carnegie, PA",https://scontent-iad3-1.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/452439987599...
2,"Pirates ""Bae"" Jersey",$30,"Pittsburgh, PA",https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/162519227499...
3,Paul Skenes Pittsburgh Pirates Jersey,$71,"Pittsburgh, PA",https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/217612536610...
4,Pittsburgh Penguins Sidney Crosby Jersey,$20,"Enterprise, WV",https://scontent-iad3-1.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/876438061182...
...,...,...,...,...,...
115,dog jersey,$5,"Tarentum, PA",https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/117036414086...
116,Official Steeler Jersy (Med),$50,"Pittsburgh, PA",https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/460539336827...
117,**Like New** Harrison Smith Vikings #22 Purple...,$25,"Carnegie, PA",https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/503561708868...
118,Authentic Reebok Pittsburgh Steelers On Field ...,$75,"Pittsburgh, PA",https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/521558273771...


In [54]:
ebay_listings = get_ebay_listings(facebook_listings['title'][0])
ebay_listings = filter_ebay_listings(ebay_listings)
ebay_listings

ValueError: could not convert string to float: '9.90 to 35.90'

In [None]:
ebay_stats = calc_ebay_stats(ebay_listings)
ebay_stats

Unnamed: 0,mean_price,median_price,std_dev,fair_price
0,83.57,79.0,43.96,82.95


In [None]:
def upload_image(image_path):
    url = "https://lens.google.com/upload"
    headers = {
        'Content-Type': 'multipart/form-data',
    }

    with open(image_path, 'rb') as image_file:
        files = {
            'image': (image_path, image_file, 'image/jpeg')
        }
        
        response = requests.post(url, headers=headers, files=files)

    if response.status_code == 200:
        print('Image uploaded successfully')
        print('Response:', response.json())
    else:
        print('Failed to upload image')
        print('Status code:', response.status_code)
        print('Response:', response.text)
    return response.json()

# Replace 'your_image.jpg' with the path to your image file
# Replace 'your_oauth_token' with your actual OAuth token
response = upload_image('image.jpg')
response


Failed to upload image
Status code: 400
Response: <html lang=en><meta charset=utf-8><meta name=viewport content="initial-scale=1, minimum-scale=1, width=device-width"><title>Error 400 (Bad Request)!!1</title><style nonce="UCXaO5XPaGz3wdU3w45zzQ">*{margin:0;padding:0}html,code{font:15px/22px arial,sans-serif}html{background:#fff;color:#222;padding:15px}body{color:#222;text-align:unset;margin:7% auto 0;max-width:390px;min-height:180px;padding:30px 0 15px;}* > body{background:url(//www.google.com/images/errors/robot.png) 100% 5px no-repeat;padding-right:205px}p{margin:11px 0 22px;overflow:hidden}pre{white-space:pre-wrap;}ins{color:#777;text-decoration:none}a img{border:0}@media screen and (max-width:772px){body{background:none;margin-top:0;max-width:none;padding-right:0}}#logo{background:url(//www.google.com/images/branding/googlelogo/1x/googlelogo_color_150x54dp.png) no-repeat;margin-left:-5px}@media only screen and (min-resolution:192dpi){#logo{background:url(//www.google.com/images/bra

Exception ignored in: <function Service.__del__ at 0x00000229980BFCE0>
Traceback (most recent call last):
  File "c:\Users\shake\Desktop\Projects\Flip-Finder\server\env\Lib\site-packages\selenium\webdriver\common\service.py", line 189, in __del__
    self.stop()
  File "c:\Users\shake\Desktop\Projects\Flip-Finder\server\env\Lib\site-packages\selenium\webdriver\common\service.py", line 146, in stop
    self.send_remote_shutdown_command()
  File "c:\Users\shake\Desktop\Projects\Flip-Finder\server\env\Lib\site-packages\selenium\webdriver\common\service.py", line 126, in send_remote_shutdown_command
    request.urlopen(f"{self.service_url}/shutdown")
  File "C:\Program Files\Python311\Lib\urllib\request.py", line 216, in urlopen
    return opener.open(url, data, timeout)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Program Files\Python311\Lib\urllib\request.py", line 519, in open
    response = self._open(req, data)
               ^^^^^^^^^^^^^^^^^^^^^
  File "C:\Program Files\Pyt

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
# Start a browser session (this example uses Chrome, but you can use Firefox by changing 'chrome' to 'firefox')
with Browser('firefox') as browser:
    # Open the URL where the form is located
    url = "https://www.google.com"
    browser.visit(url)
    time.sleep(3)
    # find the search by image button
    image_button = browser.find_by_css('nDcEnd')
    image_button.click()

    time.sleep(1)
    # Find the file input element and upload the image
    file_input = browser.find_by_name('encoded_image')  # Adjust the selector based on your form
    print(file_input)
    
    file_input.fill('image.jpg')
    time.sleep(3)
    # Optionally, fill other form fields if necessary
    # browser.find_by_name('other_field').fill('value')

    # Submit the form
    # submit_button = browser.find_by_name('submit')  # Adjust the selector based on your form
    # submit_button.click()

    # Optionally, wait for some response or next page to load
    browser.is_text_present('Success', wait_time=10)  # Adjust based on the expected success message

    # Print the URL of the current page (can be used to verify the navigation)
    print(browser.url)


NameError: name 'Browser' is not defined

In [None]:
# NOTE: ONLY EBAY IMAGES ARE WORKING RIGHT NOW
# FACEBOOK IMAGES RETURN ERROR (Probably have bot auth.)
# MAYBE PASS THEM THROUGH A LINK GENERATOR FIRST

image_url = facebook_listings["image_url"][0]
print(image_url)
# image_filename = wget.download(image_url, "image.jpg")
image_url = "https://i.imgur.com/RQC5FHC.jpeg"
google_lens_soup = search_google_lens(image_url)

https://scontent-iad3-1.xx.fbcdn.net/v/t45.5328-4/451217840_498240572573817_8769391673754606793_n.jpg?stp=c1.0.260.260a_dst-jpg_p261x260&_nc_cat=104&ccb=1-7&_nc_sid=247b10&_nc_ohc=NdrGafwVUcAQ7kNvgFoDr3C&_nc_ht=scontent-iad3-1.xx&oh=00_AYCm4sE3NU_67iEHilS0Je-MLtQLmRPXTgCBtyYwBZNLng&oe=669CECEB
