In [12]:
#Import necessary libraries
import os
import time
import datetime
from collections import namedtuple
import selenium.webdriver as webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
import pandas as pd

# Define the user agent and chrome driver path
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36'
chrome_driver_path = os.path.join(os.getcwd(), 'chromedriver.exe')

# Set up the chrome service and options
chrome_service = Service(chrome_driver_path)
chrome_options = Options()
chrome_options.add_argument(f'user-agent={user_agent}')

# Create a new webdriver instance
browser = webdriver.Chrome(service=chrome_service, options=chrome_options)
browser.implicitly_wait(7)

# Set the URL to scrape
url = 'https://atlanta.craigslist.org/'
browser.get(url)
time.sleep(2)

# Define the Output Excel File name 
search_query = 'Laptop_Craigslist'

# Click the "For Sale" hyperlink
for_sale_element = browser.find_element(By.XPATH, "//a[@data-alltitle='all for sale']")
for_sale_element.click()
time.sleep(2)

# Select "All Atlanta" from the neighborhood dropdown menu
dropdown_neighborhood = browser.find_element("xpath", "//*[@class='cl-breadcrumb subarea-selector bd-combo-box static bd-vStat-valid']")
dropdown_neighborhood.click()
select_neighrborhood = browser.find_element("xpath", "//*[@class='bd-button bd-list-box-focused-item']")
select_neighrborhood.click()
time.sleep(2)

# Select "Computer" from the item category  dropdown menu
dropdown_item_catagory = browser.find_element("xpath", "//*[@class='cl-breadcrumb category-selector bd-combo-box static bd-vStat-valid']")
dropdown_item_catagory.click()
select_item_catagory = browser.find_element("xpath", "//*[@class='bd-button sya']")
select_item_catagory.click()
time.sleep(2)

# Find the search input element by its attributes
search_input = browser.find_element("xpath", "//input [@placeholder='search computers']")

# Search "Laptop" into the search input element
search_input.send_keys("laptop")

# Press the "Enter" key to submit the search query
search_input.send_keys(Keys.ENTER)
time.sleep(0.5)

# Create a list to store the scraped data
posts_html = []

# start a loop to navigate through pages
to_stop = False
while not to_stop:
    
    #Locate the Search Results List and parse the HTML
    search_results = browser.find_element(By.ID, 'search-results-page-1')
    soup = BeautifulSoup(search_results.get_attribute('innerHTML'), 'html.parser') 
    posts_html.extend(soup.find_all('li', {'class': 'cl-search-result cl-search-view-mode-gallery'}))
    
    #scroll the page to the top and click "Next Page" Button
    try:
        browser.execute_script('window.scrollTo(0, 0)')
        button_next = browser.find_element(By.XPATH, "//*[@class='bd-button cl-next-page icon-only']")
        button_next.click()
        time.sleep(0.5)
    except NoSuchElementException:
        to_stop = True

# Print the number of listings collected
print('Collected {0} listings'.format(len(posts_html)))
    

# Define the fields for each Craigslist post and initialize a list to store the posts
CraigslistPost = namedtuple('CraigslistPost', ['Title', 'Price','Date', 'Location', 'Post_url', 'Image_url'])
craigslist_posts = []



for post_html in posts_html:
    
    #Look for the "title"
    title_text = post_html.find('a', {'class': 'titlestring'})
    if title_text:
        title = title_text.text
    else:
        title = None
        
    #Look for the "price"
    price_text = post_html.find('span', {'class': 'priceinfo'})
    if price_text:
        price = price_text.text
    else:
        price = None
        
    #Look for the "date" and "location"
    meta_text = post_html.find('div', {'class': 'meta'})
    if meta_text:
        
        #Split the meta text using the separator
        parts = meta_text.text.strip().split('·')
        date = parts[0].strip()
        location = parts[1].strip()
    else:
        date = None
        location = None
        
    #Look for the "post url"
    post_url = post_html.find('a', 'titlestring').get('href')
    
    #Look for the "image url"
    image_url = post_html.find('img').get('src') if post_html.find('img') else ''
    
    #Append the List
    craigslist_posts.append(CraigslistPost(title, price, date, location, post_url, image_url))


# Convert the named tuple list to a Pandas DataFrame and save it to an Excel file
df = pd.DataFrame(craigslist_posts)
current_time = datetime.datetime.now().strftime("%m_%d_%Y %I_%M %p")
file_name = f'{search_query} ({current_time}).xlsx'
df.to_excel(file_name, index=False)

# Close the webdriver
browser.close()

# Open the file in Microsoft Excel
if os.name == 'nt':
    os.startfile(file_name, 'open')


Collected 170 listings
