## Ebay Phone Listings Web Scrapping using selenium + MongoDB  
- **Author**  : sumashree Javaji

#### Importing necessary libraries 

In [1]:
# install required packages
# pip install pymongo

In [2]:
import os
from pymongo import MongoClient
import bson
import requests
import time

from bs4 import BeautifulSoup
import lxml

import re
import pandas as pd

### Database Programming

In [3]:
# Setting the path to my SampleCollections folder (where the data resides)
# root_dir='../SampleCollections/'
client = MongoClient("localhost", 27017) # establishing connection to the MongoDB server. facilitates querying with the MongoDB 

database = client["Ebay"] #Creates a database in mongodb using python
database_name = 'Ebay' #storing my database name in mongoDB

collection = database["ebay_phone_listings"] #Creating a table in Ebay DB

In [4]:
from selenium import webdriver

from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time

driver = webdriver.Chrome() # you will need to put your chromedriver here
# driver = webdriver.Chrome(service=Service('chromedriver/chromedriver121.exe')) # you will need to put your chromedriver here

time.sleep(4)

driver.get('https://www.ebay.com/')

time.sleep(1)

input = driver.find_element(By.CSS_SELECTOR, 'input[aria-label="Search for anything"]')
input.send_keys('Cell Phones\n')

time.sleep(5)

checkbox = driver.find_element(By.XPATH, '//input[@aria-label="Unlocked"]')
checkbox.click()
time.sleep(3)

checkbox = driver.find_element(By.XPATH, '//input[@aria-label="LG"]')
checkbox.click()
time.sleep(3)


checkbox = driver.find_element(By.XPATH, '//input[@aria-label="6 in or More"]')
checkbox.click()
time.sleep(3)

checkbox = driver.find_element(By.XPATH, '//input[@aria-label="128 GB"]')
checkbox.click()
time.sleep(3)

checkbox = driver.find_element(By.XPATH, '//input[@aria-label="Factory Unlocked"]')
checkbox.click()
time.sleep(3)

checkbox = driver.find_element(By.XPATH, '//input[@aria-label="Used"]')
checkbox.click()
time.sleep(3)

html_content = driver.page_source

# Save the HTML content to a file
with open("unlocked-lg-128gb-used.html", "w", encoding="utf-8") as file:
    file.write(html_content)
    
driver.quit()


In [5]:
file = '/Users/sumashreejavaji/PycharmProjects/DDR/unlocked-lg-128gb-used.html'
with open(file, 'r') as file : #reading from locally saved html file
    html_content = file.read()
    soup = BeautifulSoup(html_content, 'html.parser')

In [6]:
from bs4 import BeautifulSoup
from itertools import zip_longest

title_elements = soup.find_all('div', class_='s-item__title')
seller_elements = soup.find_all(class_="s-item__seller-info-text")
price_elements = soup.find_all(class_="s-item__price")
shipping_elements = soup.find_all(class_="s-item__shipping")
returns_elements = soup.find_all(class_="s-item__free-returns")
bidding_elements = soup.find_all(class_="s-item__bids")

rows = 0
for i, (title, seller, price, shipping, returns, bidding) in enumerate(zip_longest(title_elements, seller_elements, price_elements, shipping_elements, returns_elements, bidding_elements, fillvalue='na')):
    rows  = rows + 1
    if bidding == 'na':
        bidding = BeautifulSoup('<div> </div>', 'html.parser')
    if shipping == 'na':
        shipping = BeautifulSoup('<div> </div>', 'html.parser')
    if seller == 'na':
        seller = BeautifulSoup('<div> </div>', 'html.parser')
    if price == 'na':
        price = BeautifulSoup('<div> </div>', 'html.parser')
    if returns == 'na':
        returns = BeautifulSoup('<div> </div>', 'html.parser')

    seller_name = re.search(r'(.*?)\s*\((.*?)\)(.*)', seller.get_text())
    if seller_name:
        seller_name, seller_info = seller_name.group(1), seller_name.group(3)

    # Inserting the extracted data into the MongoDB table       
    listing_data = {
        "Title": title.get_text(),
        "Seller": seller_name,
        "Seller Rating": seller_info,
        "Price": price.get_text(),
        "Shipping": shipping.get_text(),
        "Returns": returns.get_text(),
        "No of Biddings": bidding.get_text()
    }
    collection.insert_one(listing_data)

    # Printing the results to the screen
    print("Title:", title.get_text())
    print("Seller:", seller_name)
    print("Seller Rating:", seller_info)
    print("Price:", price.get_text())
    print("Shipping:", shipping.get_text())
    print("Returns:", returns.get_text())
    print("No of Biddings:", bidding.get_text())
    print()
print("Total",rows)

Title: Shop on eBay
Seller: discountphonedepot
Seller Rating:  97.7%
Price: $20.00
Shipping: Free shipping
Returns: Free returns
No of Biddings:  

Title: LG Velvet 5G - LM-G900VMP 128GB Gray Verizon + GSM Unlocked LTE Smartphone GREAT
Seller: devicegiant
Seller Rating:  98.3%
Price: $119.32
Shipping: Free shipping
Returns: Free returns
No of Biddings:  

Title: LG G8 ThinQ 128GB G820UM CDMA/GSM Unlocked Smartphone, Good-Read
Seller: wedeliverwireless
Seller Rating:  98.8%
Price: $74.39
Shipping: Free shipping
Returns: Free returns
No of Biddings:  

Title: LG Velvet 5G VERIZON AT&T - 128GB Smartphone Aurora Gray / Silver  *Excellent*
Seller: teeerashop
Seller Rating:  98.9%
Price: $139.00
Shipping: Free shipping
Returns: Free returns
No of Biddings:  

Title: LG G8 ThinQ 6.1 In Screen Black Gray 128GB (Verizon) B Grade W/ Screen Shadow
Seller: wikiwoo
Seller Rating:  98.7%
Price: $85.00
Shipping: Free shipping
Returns: Free returns
No of Biddings:  

Title: New ListingLG Velvet 5G LM-

In [7]:
print(client.list_database_names()) # checking if the new DB is added in mongodb

['Ebay', 'Pokemon', 'admin', 'config', 'local', 'msba']


In [8]:
# checkin if the data is loaded into the DB successfully
cursor = collection.find().limit(5)
documents = list(cursor)
df = pd.DataFrame(documents)
df

Unnamed: 0,_id,Title,Seller,Seller Rating,Price,Shipping,Returns,No of Biddings
0,6604b30fa32beb8b6d882c20,Shop on eBay,discountphonedepot,97.7%,$20.00,Free shipping,Free returns,
1,6604b30fa32beb8b6d882c21,LG Velvet 5G - LM-G900VMP 128GB Gray Verizon +...,devicegiant,98.3%,$119.32,Free shipping,Free returns,
2,6604b30fa32beb8b6d882c22,LG G8 ThinQ 128GB G820UM CDMA/GSM Unlocked Sma...,wedeliverwireless,98.8%,$74.39,Free shipping,Free returns,
3,6604b30fa32beb8b6d882c23,LG Velvet 5G VERIZON AT&T - 128GB Smartphone A...,teeerashop,98.9%,$139.00,Free shipping,Free returns,
4,6604b30fa32beb8b6d882c24,LG G8 ThinQ 6.1 In Screen Black Gray 128GB (Ve...,wikiwoo,98.7%,$85.00,Free shipping,Free returns,
