In [38]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

# ✅ SET CHROMEDRIVER PATH (Update this path to match your system)
CHROMEDRIVER_PATH = "chromedriver.exe"  # Update with full path if needed

# ✅ SET UP SELENIUM WEBDRIVER
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run without opening browser
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("start-maximized")
chrome_options.add_argument("disable-infobars")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.3")

# Initialize WebDriver with correct path
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# ✅ TARGET REALTOR.COM SEARCH PAGE (Modify for your city)
realtor_url = "https://www.realtor.com/realestateandhomes-search/New-York_NY"
driver.get(realtor_url)
time.sleep(5)  # Allow page to load

# ✅ SCRAPE PROPERTY LISTINGS
properties = driver.find_elements(By.CLASS_NAME, "component_property-card")

data = []
for prop in properties[:10]:  # Extract first 10 listings
    try:
        # Extract Price
        price = prop.find_element(By.CLASS_NAME, "price").text

        # Extract Address
        address = prop.find_element(By.CLASS_NAME, "address").text

        # Extract Bedrooms, Bathrooms, and Square Footage
        details = prop.find_elements(By.CLASS_NAME, "property-meta")  # Check for available details
        bedrooms, bathrooms, sqft = None, None, None
        for detail in details:
            text = detail.text.lower()
            if "bed" in text:
                bedrooms = text.split()[0]
            elif "bath" in text:
                bathrooms = text.split()[0]
            elif "sqft" in text:
                sqft = text.split()[0].replace(",", "")

        # Extract Property Type
        property_type = prop.find_element(By.CLASS_NAME, "property-type").text if prop.find_elements(By.CLASS_NAME, "property-type") else "Not Available"

        # Extract Lot Size
        lot_size = prop.find_element(By.CLASS_NAME, "property-meta lot-size").text if prop.find_elements(By.CLASS_NAME, "property-meta lot-size") else "Not Available"

        # Extract Year Built
        year_built = prop.find_element(By.CLASS_NAME, "year-built").text if prop.find_elements(By.CLASS_NAME, "year-built") else "Not Available"

        # Calculate Price per Sq Ft (if sqft and price available)
        price_cleaned = int(price.replace("$", "").replace(",", "")) if price.replace("$", "").replace(",", "").isdigit() else None
        price_per_sqft = round(price_cleaned / int(sqft), 2) if sqft and price_cleaned else "Not Available"

        # Extract Property Features
        features = [f.text for f in prop.find_elements(By.CLASS_NAME, "property-feature")] if prop.find_elements(By.CLASS_NAME, "property-feature") else ["Not Available"]

        # ✅ STORE DATA IN DICTIONARY
        data.append({
            "Price": price,
            "Address": address,
            "Bedrooms": bedrooms,
            "Bathrooms": bathrooms,
            "Square Footage": sqft,
            "Property Type": property_type,
            "Lot Size": lot_size,
            "Year Built": year_built,
            "Price per Sq Ft": price_per_sqft,
            "Property Features & Amenities": ", ".join(features)
        })

    except Exception as e:
        print(f"Error extracting data: {e}")
        continue

# ✅ PRINT FIRST 10 RESULTS
df = pd.DataFrame(data)
print(df.head(10))


Empty DataFrame
Columns: []
Index: []


In [32]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

# Setup Selenium WebDriver
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("start-maximized")
chrome_options.add_argument("disable-infobars")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.3")

# Initialize WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

# Function to extract details from the individual listing page
def extract_property_details(url):
    driver.get(url)
    try:
        status = driver.find_element(By.XPATH, "//span[contains(@class, 'status')]").text
        house_size = driver.find_element(By.XPATH, "//li[contains(text(),'sqft')]").text
        prev_sold_date = driver.find_element(By.XPATH, "//li[contains(text(),'Last Sold')]").text

        return {
            "status": status,
            "house_size": house_size,
            "prev_sold_date": prev_sold_date
        }
    except Exception as e:
        print(f"Error extracting details from {url}: {e}")
        return {}

# Function to scrape search results
def scrape_realtor(search_url, num_properties=10):
    driver.get(search_url)
    time.sleep(5)  # Allow page to load

    data = []
    properties = driver.find_elements(By.XPATH, "//li[@data-testid='result-card']")

    for prop in properties[:num_properties]:
        try:
            price = prop.find_element(By.XPATH, ".//span[contains(@data-label, 'pc-price')]").text
            address = prop.find_element(By.XPATH, ".//span[contains(@data-label, 'pc-address')]").text
            bed = prop.find_element(By.XPATH, ".//li[contains(@data-label, 'pc-meta-beds')]").text
            bath = prop.find_element(By.XPATH, ".//li[contains(@data-label, 'pc-meta-baths')]").text
            acre_lot = prop.find_element(By.XPATH, ".//li[contains(@data-label, 'pc-acre-lot')]").text
            city = address.split(",")[0]
            state_zip = address.split(",")[1].strip()
            state = state_zip.split(" ")[0]
            zip_code = state_zip.split(" ")[1]
            link = prop.find_element(By.TAG_NAME, "a").get_attribute("href")

            # Extract additional details from listing page
            details = extract_property_details(link)

            # Store data
            data.append({
                "status": details.get("status", None),
                "bed": bed,
                "bath": bath,
                "acre_lot": acre_lot,
                "city": city,
                "state": state,
                "zip_code": zip_code,
                "house_size": details.get("house_size", None),
                "prev_sold_date": details.get("prev_sold_date", None),
                "price": price,
                "Listing URL": link
            })
        except Exception as e:
            print(f"Error extracting search result: {e}")
            continue

    # Convert to DataFrame and print first 10 rows
    df = pd.DataFrame(data)
    print(df.head(10))

    # Close WebDriver
    driver.quit()

# Example usage
search_url = "https://www.realtor.com/realestateandhomes-search/New-York_NY"
scrape_realtor(search_url)


Empty DataFrame
Columns: []
Index: []
