In [60]:
# Import necessary libraries
from splinter import Browser
from bs4 import BeautifulSoup as soup
import re
import requests
import pandas as pd
import matplotlib.pyplot as plt
import time

from splinter import Browser
from selenium.webdriver.firefox.service import Service

In [61]:
def create_facebook_marketplace_query(params):
    '''Returns a URL for Facebook Marketplace given user parameters'''
    # check for the two required parameters, if they are not specified, then exit
    if 'location' not in params or 'item' not in params:
        raise ValueError('location or item was not specified.')
    
    # make default params if they are not specified
    if 'condition' not in params:
        params['condition'] = []
    
    if 'min_price' not in params:
        params['min_price'] = 0
    
    if 'max_price' not in params:
        params['max_price'] = 1000
    
    if 'days_since_listed' not in params:
        params['days_since_listed'] = 10

    # lookup table for conditions 
    condition_separator = "%2C"
    conditions = {0: "new", 1: "used_like_new", 2: "used_good", 3: "used_fair"}

    # create the conditions part of url
    condition = "" if params['condition'] == [] else "&itemCondition=" + conditions[params['condition'][0]]
    for index in params['condition']:
        condition += condition_separator + conditions[index]

    # create the query string
    base_url = f"https://www.facebook.com/marketplace/{params['location']}/search?"
    url = f"{base_url}minPrice={params['min_price']}&maxPrice={params['max_price']}&daysSinceListed={params['days_since_listed']}{condition}&query={params['item']}"
    return url

In [62]:
def fetch_marketplace_html(url):
    """Returns Facebook Marketplace Data in the format of BeautifulSoup Object"""
    browser = Browser('firefox')
    browser.visit(url)

    # close pop out window
    if browser.is_element_present_by_css('div[aria-label="Close"]', wait_time=10):
        # Click on the element once it's found
        browser.find_by_css('div[aria-label="Close"]').first.click()
    
    # Scroll down to load more results
    
    # Define the number of times to scroll the page
    scroll_count = 4

    # Define the delay (in seconds) between each scroll
    scroll_delay = 2

    # Loop to perform scrolling
    for _ in range(scroll_count):
        # Execute JavaScript to scroll to the bottom of the page
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        
        # Pause for a moment to allow the content to load
        time.sleep(scroll_delay)
    
    # Parse the HTML
    html = browser.html

    # close browser window
    time.sleep(1)
    browser.quit()

    # Create a BeautifulSoup object from the scraped HTML
    return soup(html, 'html.parser')

In [82]:
def parse_marketplace_data(market_soup):
    '''Returns a Pandas Data Frame with information about Facebook Marketplace listings'''
    # Extract all the necessary info and insert into lists
    pictures_div = market_soup.find_all('img', class_="xt7dq6l xl1xv1r x6ikm8r x10wlt62 xh8yej3")
    pictures_url_list = [url.get('src') for url in pictures_div]

    titles_div = market_soup.find_all('span', class_="x1lliihq x6ikm8r x10wlt62 x1n2onr6")
    titles_list = [title.text.strip() for title in titles_div]

    prices_div = market_soup.find_all('span', class_="x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1lliihq x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x xudqn12 x676frb x1lkfr7t x1lbecb7 x1s688f xzsf02u")
    prices_list = [price.text.strip() for price in prices_div]

    location_div = market_soup.find_all('span', class_="x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1lliihq x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x x4zkp8e x3x7a5m x1nxh6w3 x1sibtaa xo1l8bm xi81zsa")
    location_list = [location.text.strip() for location in location_div]

    urls_div = market_soup.find_all('a', class_="x1i10hfl xjbqb8w x1ejq31n xd10rxx x1sy0etr x17r0tee x972fbf xcfux6l x1qhh985 xm0m39n x9f619 x1ypdohk xt0psk2 xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r xexx8yu x4uap5 x18d9i69 xkhd6sd x16tdsg8 x1hl2dhg xggy1nq x1a2a7pz x1heor9g x1sur9pj xkrqix3 x1lku1pv")
    urls_list = [url.get('href') for url in urls_div]
    urls_list = ["www.facebook.com" + url for url in urls_list]

    data = {
        'title': titles_list,
        'price': prices_list,
        'location': location_list,
        'image_url': pictures_url_list,
        'url': urls_list
    }

    return pd.DataFrame(data)
    

In [64]:
def get_ebay_listings(item):
    '''Returns a Pandas Data Frame with information about Ebay listings'''
    ebay_url = f"https://www.ebay.com/sch/i.html?_nkw={item}"
    response = requests.get(ebay_url)

    ebay_soup = soup(response.text, 'html.parser')
    
    item_divs = ebay_soup.find_all('li', class_='s-item s-item__pl-on-bottom')

    prices = [div.find('span', class_='s-item__price').text for div in item_divs]

    urls = [div.find('a', class_='s-item__link').get('href') for div in item_divs]

    
    image_divs = [div.find('div', class_='s-item__image-wrapper image-treatment') for div in item_divs]
    image_tags = [div.find('img') for div in image_divs]
    image_urls = [url.get('src') for url in image_tags]

    titles_div = [div.find('div', class_='s-item__title') for div in item_divs]
    titles = [div.find('span').text for div in titles_div]

    data = {
        'title': titles,
        'price': prices,
        'image_url': image_urls,
        'url': urls
    }

    return pd.DataFrame(data)

In [65]:
def filter_ebay_listings(df):
    """Returns a cleaned data frame without any outliers"""
    # remove any special characters and convert to float
    df['price'] = df['price'].replace('[\$,]', '', regex=True).astype(float)

    # remove the first two rows
    df = df.iloc[2:].reset_index(drop=True)

    # Remove outliers in the 'Price' column
    # Here we use the IQR method to identify outliers

    Q1 = df['price'].quantile(0.25)
    Q3 = df['price'].quantile(0.60)
    IQR = Q3 - Q1

    # Define lower and upper bound for outliers
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Filter out the outliers
    df_cleaned = df[(df['price'] >= lower_bound) & (df['price'] <= upper_bound)]

    return df_cleaned
    

In [79]:
def calc_ebay_stats(df):
    """Returns a data frame with statistics of ebay listings"""
    # Calculate key statistics
    mean_price = df['price'].mean()
    median_price = df['price'].median()
    std_dev_price = df['price'].std()
    fair_price = median_price * 1.05  # Adjust by 5% for current market trends

    # store and return states in data frame
    stats = {
        'mean_price': [mean_price],
        'median_price': [median_price],
        'std_dev': [std_dev_price],
        'fair_price': [fair_price]
    }

    return pd.DataFrame(stats).round(2)


In [92]:
def search_google_lens(image_url):
    """Returns Google Lens Data in the format of BeautifulSoup Object"""
    google_lens_url = f"https://lens.google.com/uploadbyurl?url={image_url}"
    browser = Browser('firefox')
    browser.visit(google_lens_url)
    
    time.sleep(1)
    # Scroll down to load more results
    
    # Define the number of times to scroll the page
    scroll_count = 2

    # Define the delay (in seconds) between each scroll
    scroll_delay = 2

    # Loop to perform scrolling
    for _ in range(scroll_count):
        # Execute JavaScript to scroll to the bottom of the page
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        
        # Pause for a moment to allow the content to load
        time.sleep(scroll_delay)
    
    # Parse the HTML
    html = browser.html

    # close browser window
    time.sleep(1)
    browser.quit()

    # Create a BeautifulSoup object from the scraped HTML
    return soup(html, 'html.parser')

In [68]:
# specify the parameters
params = {
    'location': "pittsburgh",
    'item': "jersey",
    'condition': [],
    'min_price': 0,
    'max_price': 100,
    'days_since_listed': 10
}

# create the facebook marketplace query
url = create_facebook_marketplace_query(params)

# Visit the website and gather data
market_soup = fetch_marketplace_html(url)

In [83]:
# parse html to create structured listing data
facebook_listings = parse_marketplace_data(market_soup)
facebook_listings


Unnamed: 0,title,price,location,image_url,url
0,PGH Pirates Jersey,$40,"Pittsburgh, PA",https://scontent-iad3-1.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/475728978439...
1,Jerome Bettis Jersey,$50,"Pittsburgh, PA",https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/825731706230...
2,Andrew Mccutchen Pirates Jersey,$30,"Cranberry Twp, PA",https://scontent-iad3-1.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/104124033422...
3,Larry Bird Jersey,$35,"Pittsburgh, PA",https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/116362450817...
4,Russel Wilson Steeler Jersey,$60,"Pittsburgh, PA",https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/994103005791...
...,...,...,...,...,...
115,Dwayne Haskins Jr Autographed Jersey,$100,Ships to you,https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/486593460394...
116,Dirt Bike Racing Jersey and Pants,$60,"Aliquippa, PA",https://scontent-iad3-1.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/181536211562...
117,Penguins 4x Big men’s shirt,$10,"Pittsburgh, PA",https://scontent-iad3-1.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/123973004406...
118,Jagermeister 56 Yard Line Official Football Je...,$15,Ships to you,https://scontent-iad3-2.xx.fbcdn.net/v/t45.532...,www.facebook.com/marketplace/item/117906575995...


In [72]:
ebay_listings = get_ebay_listings(facebook_listings['title'][0])
ebay_listings = filter_ebay_listings(ebay_listings)
ebay_listings

Unnamed: 0,title,price,image_url,url
0,Pittsburgh Pirates PGH Authentic 2023 City Con...,157.79,https://i.ebayimg.com/images/g/RRMAAOSwPexmg1b...,https://www.ebay.com/itm/116236591605?hash=ite...
1,NWT adult customized jersey Pirates @21 Robert...,49.99,https://i.ebayimg.com/images/g/ts8AAOSw35ZmY4N...,https://www.ebay.com/itm/305607142297?hash=ite...
2,PGH #21 Embroidery Men’s Retro Casual V-neck C...,80.00,https://i.ebayimg.com/images/g/iEsAAOSwBRlmVnx...,https://www.ebay.com/itm/186464232672?hash=ite...
4,Rawlings Pittsburgh Pirates Stitched Grey MILB...,17.99,https://i.ebayimg.com/images/g/K4sAAOSwhUxmjDg...,https://www.ebay.com/itm/186549101941?hash=ite...
5,Pittsburgh Pirates Freddy Sanchez Authentic M...,135.00,https://i.ebayimg.com/images/g/y2AAAOSwNJNmVQi...,https://www.ebay.com/itm/156285084313?hash=ite...
...,...,...,...,...
66,Pittsburgh Pirates 1979 WS CELEBRATION AUTHENT...,124.79,https://i.ebayimg.com/images/g/M-4AAOSwWQ1lrz3...,https://www.ebay.com/itm/156022551282?hash=ite...
67,PITTSBURGH PIRATES JERSEY Authentic Majestic J...,44.99,https://i.ebayimg.com/images/g/j8UAAOSw719ffg-...,https://www.ebay.com/itm/386824648468?hash=ite...
68,Pittsburgh Pirates #7 Black Alternate Majestic...,40.00,https://i.ebayimg.com/images/g/CZoAAOSwMGhjFpO...,https://www.ebay.com/itm/265871704772?hash=ite...
69,majestic pittsburgh pirates jersey Cool Base XL,59.99,https://i.ebayimg.com/images/g/IIEAAOSwR7Rmeuh...,https://www.ebay.com/itm/395493683967?hash=ite...


In [80]:
ebay_stats = calc_ebay_stats(ebay_listings)
ebay_stats

Unnamed: 0,mean_price,median_price,std_dev,fair_price
0,81.83,75.0,43.92,78.74


In [95]:
# NOTE: ONLY EBAY IMAGES ARE WORKING RIGHT NOW
# FACEBOOK IMAGES RETURN ERROR (Probably have bot auth.)
# MAYBE PASS THEM THROUGH A LINK GENERATOR FIRST

image_url = ebay_listings["image_url"][0]
# print(image_url)
google_lens_soup = search_google_lens(image_url)