# Homes.com Web Scraper Using Selenium


This code will scrape homes.com for houses/condos/apartments/etc. based on the URL given to scrape for. It looks for the property type, listing price, square footage, # of beds and baths, description of the house, the realtor selling the house, and the agency selling the house.


In [None]:
import pandas as pd
import numpy as np
import re
import selenium 
import time
from selenium import webdriver
from selenium.webdriver.common.by import By

In [None]:
path = "/Users/student/Downloads/chromedriver.exe"
driver = webdriver.Chrome(path)

url=str(input('URL to scrape: '))
house_type=str(input('House type: '))

#Link to homes.com website
driver.get(url);
time.sleep(10)

#Create variable and lists, each page is 25 listings so we will scrape for 25 listings and go to the next page
listings=[]
target_listings = 25
current_page = 1

while len(listings) < target_listings:
    # Wait for the search results to load
    time.sleep(5)
    
    #Print the program is starting if the current page is equal to one
    if current_page==1:
        print(f"Current Page:{current_page}")
        print("# of Listings Scraped:",len(listings))
        print('-------------------------------------')
    
    #Find all the listings on the page
    listings_on_page = driver.find_elements(By.CLASS_NAME, 'placard-container')

    # Iterate over the listings and extract the desired information
    for listing in listings_on_page:
    
        # Extract the property details
        try:
            address_element = listing.find_element(By.CLASS_NAME, 'property-name')
            address = address_element.text.strip()
        except:
            address = np.nan
        
        # Extract the listing price
        try:
            price_element = listing.find_element(By.CLASS_NAME, 'price-container')
            price = price_element.text.strip()
        except:
            price=np.nan
            
        #Extract the square footage number of beds and baths
        ul_element = listing.find_element(By.CLASS_NAME,'detailed-info-container')
        li_elements = ul_element.find_elements(By.TAG_NAME, 'li')
        
        info_list = [np.nan, np.nan, np.nan]
        
        for li in li_elements:
            li_text = li.text
    
            if 'Beds' in li_text:
                info_list[0] = li_text # Extract the number of beds
            elif 'Baths' in li_text:
                info_list[1] = li_text # Extract the number of baths
            elif 'Sq Ft' in li_text:
                info_list[2] = li_text # Extract the square footage
                
        #Extract the description of the house
        try:
            description_element = listing.find_element(By.CLASS_NAME, 'property-description')
            description = description_element.text.strip()
        except:
            description = np.nan
        
        #Extract the realtor selling the house
        try:
            realtor_element = listing.find_element(By.CLASS_NAME, 'agent-name')
            realtor = realtor_element.text.strip()
        except:
            realtor = np.nan
        
        #Extract the agency selling the house
        try:
            agency_element = listing.find_element(By.CLASS_NAME, 'agency-name')
            agency = agency_element.text.strip()
        except:
            agency = np.nan
            
        #append the information to the list
        listings.append([address, price, info_list[0], info_list[1], info_list[2], description, realtor, agency, house_type])
        
        #Once target listings are reached for the page the scraper will go to next page
        if len(listings) == target_listings:
            target_listings+=25
            break
    
    try:
        next_page_button = driver.find_element(By.CLASS_NAME, 'next')
        next_page_button.click()
        current_page += 1
        print(f"Current Page:{current_page}")
        print("# of Listings Scraped:",len(listings))
        print('-------------------------------------')
    except:
        print("# of Listings Scraped:",len(listings))
        print('Scraping Finished')
        break
    
driver.quit()

#Save the information to a dataframe and then to a csv
NJ_house = pd.DataFrame(listings,columns=['address', 'price', 'beds', 'baths', 'sq_ft', 'description', 'realtor', 'agency', 'house_type'])

NJ_house.to_csv(f'NJ_{house_type}.csv')