In [31]:
import os
import math
import time
import pandas as pd
from selenium import webdriver
from datetime import datetime, timedelta
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException

In [32]:
os.chdir('../')

In [33]:
WEBSITE = "https://www.propstream.com/login"
OUTPUT_DATA = "data/auto_search.csv"

In [34]:
def login_to_propstream(username, password, driver):
    driver.get(WEBSITE)
    wait = WebDriverWait(driver, 10)  # Wait for up to 10 seconds

    # Wait for the username and password fields to be present
    username_field = wait.until(EC.presence_of_element_located((By.NAME, "username")))  # Using By.NAME
    password_field = wait.until(EC.presence_of_element_located((By.NAME, "password")))  # Using By.NAME
    
    username_field.send_keys(username)
    password_field.send_keys(password)

    # Submit the login form using the button text instead of ID
    login_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Login']")))
    login_button.click()

    # Wait for URL to change after login
    wait.until(EC.url_changes(driver.current_url))
    print("Login successful!")

In [35]:
def search_city(city, driver):
    wait = WebDriverWait(driver, 10)

    # Enter the city in the search box
    search_box = wait.until(EC.presence_of_element_located((By.XPATH, "//input[@placeholder='Enter County, City, Zip Code(s) or APN #']")))
    search_box.clear()
    search_box.send_keys(city)

    # Step 3: Wait for the suggestion box to be populated
    suggestion_xpath = "//input[@aria-activedescendant='react-autowhatever-1--item-0']"  # First suggestion
    wait.until(EC.presence_of_element_located((By.XPATH, suggestion_xpath))) # Navigate to the first suggestion
    search_box.send_keys(Keys.RETURN)  # Press Enter to select the suggestion
    
    print("First suggestion selected and search triggered.")
    print(f"Search completed for city: {city}")

In [36]:
def click_filter_button(driver):
    wait = WebDriverWait(driver, 15)  # Wait for up to 15 seconds for the element to be clickable

    try:
        # Step 1: Locate and click the filter button using XPath
        filter_button_xpath = "//*[@id='root']/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div"
        filter_button = wait.until(EC.element_to_be_clickable((By.XPATH, filter_button_xpath)))
        filter_button.click()  # Click on the filter button
        print("Filter button clicked and menu opened.")
    
    except Exception as e:
        print(f"An error occurred: {e}")


In [37]:
def select_filter_option(driver, option):
    wait = WebDriverWait(driver, 15)  # Wait for up to 15 seconds for elements to appear

    try:
        # Step 1: Locate the dropdown menu using XPath
        dropdown_xpath = "//*[@id='root']/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[1]/div[1]/div[2]/div[2]/div/div/div/select"
        dropdown_menu = wait.until(EC.element_to_be_clickable((By.XPATH, dropdown_xpath)))

        # Step 2: Click to open the dropdown menu
        dropdown_menu.click()

        # Step 3: Select the option based on string 
        select = Select(dropdown_menu)
        select.select_by_visible_text("Pre-Foreclosures")
        dropdown_menu.click()

        # Select Property Type
        property_xpath = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[2]/div[1]/span'
        property_span = wait.until(EC.element_to_be_clickable((By.XPATH, property_xpath)))
        property_span.click()
        dropdown_xpath = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[2]/div[2]/div[2]/div[2]/div/div/div/div[2]'
        dropdown_menu = wait.until(EC.element_to_be_clickable((By.XPATH, dropdown_xpath)))
        dropdown_menu.click()
        single = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[2]/div[2]/div[2]/div[2]/div/div/div/div[1]/div[2]/div/div[1]/label/span'
        single_span = wait.until(EC.element_to_be_clickable((By.XPATH, single)))
        single_span.click()
        condo = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[2]/div[2]/div[2]/div[2]/div/div/div/div[1]/div[2]/div/div[2]/label/span'
        condo_span = wait.until(EC.element_to_be_clickable((By.XPATH, condo)))
        condo_span.click()
        dropdown_menu.click()

        # Select On Market Status
        mls_status_span_xpath = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[3]/div[1]/span'
        mls_status_span = wait.until(EC.element_to_be_clickable((By.XPATH, mls_status_span_xpath)))
        mls_status_span.click()
        no_button_xpath = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[3]/div[2]/div/div[1]/div[3]/div/label/span'
        no_button = wait.until(EC.element_to_be_clickable((By.XPATH, no_button_xpath)))
        no_button.click()
        mls_status_span.click()
        
        # Pre-Foreclosure
        pfc_xpath = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[4]/div[1]/span'
        pfc = wait.until(EC.element_to_be_clickable((By.XPATH, pfc_xpath)))
        pfc.click()
        dropdown_xpath = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[4]/div[2]/div[1]/div[2]/div/div/div/div[2]'
        dropdown_menu = wait.until(EC.element_to_be_clickable((By.XPATH, dropdown_xpath)))
        dropdown_menu.click()
        notice = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[4]/div[2]/div[1]/div[2]/div/div/div/div[1]/div/div/div[1]/label/span'
        notice_span = wait.until(EC.element_to_be_clickable((By.XPATH, notice)))
        notice_span.click()
        auction = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[4]/div[2]/div[1]/div[2]/div/div/div/div[1]/div/div/div[2]/label/span'
        auction_span = wait.until(EC.element_to_be_clickable((By.XPATH, auction)))
        auction_span.click()
        dropdown_menu.click()
        date_input_xpath = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[4]/div[2]/div[2]/div[2]/div/div/div/div/div/input'
        date_input_field = wait.until(EC.element_to_be_clickable((By.XPATH, date_input_xpath)))
        three_months_ago = datetime.now() - timedelta(days=90)  # Approximately 3 months (90 days)
        formatted_date = three_months_ago.strftime("%m/%d/%y")  # Format date as MM/DD/YY
        date_input_field.clear()  # Clear the existing date if any
        date_input_field.send_keys(formatted_date)  # Enter the new date
        pfc.click()

        if option:
            owner_xpath = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[5]/div[1]/span'
            owner = wait.until(EC.element_to_be_clickable((By.XPATH, owner_xpath)))
            owner.click()
            dropdown_xpath = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[5]/div[2]/div[2]/div[2]/div/div/div/div[2]'
            dropdown_menu = wait.until(EC.element_to_be_clickable((By.XPATH, dropdown_xpath)))
            dropdown_menu.click()
            individual = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[5]/div[2]/div[2]/div[2]/div/div/div/div[1]/div/div/div[1]/label/span'
            individual_span = wait.until(EC.element_to_be_clickable((By.XPATH, individual)))
            individual_span.click()
            trust = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[5]/div[2]/div[2]/div[2]/div/div/div/div[1]/div/div/div[4]/label/span'
            trust_span = wait.until(EC.element_to_be_clickable((By.XPATH, trust)))
            trust_span.click()
            dropdown_menu.click()
            owner.click()

            loan_xpath = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[7]/div[1]/span'
            loan = wait.until(EC.element_to_be_clickable((By.XPATH, loan_xpath)))
            loan.click()
            loan_input_xpath = '//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/header/div[1]/div[1]/div[2]/div/div[2]/div[2]/div[1]/div[1]/div[7]/div[2]/div[8]/div[3]/div/div/input'
            loan_input_field = wait.until(EC.element_to_be_clickable((By.XPATH, loan_input_xpath)))
            loan_input_field.clear()
            loan_input_field.send_keys('75')
            loan.click()


        wait = WebDriverWait(driver, 10)

        # Use a CSS selector instead of XPath
        close_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".icon-iconClose.src-app-Search-Header-style__SAkaa__iconClose")))
        
        close_button.click()    
    except Exception as e:
        print(f"An error occurred: {e}")

In [38]:
def get_total_listings(driver):
    caption_element = driver.find_element(By.XPATH, "//*[@id='root']/div/div[2]/div/div/div[3]/div[1]/div/section/div[2]/div/div/div/div/div[1]/div[1]/div[2]")
    caption_text = caption_element.text
    # Extracting the number of listings from the text
    total_listings = int(caption_text.split('(')[1].strip(')'))
    print(f"Total listings: {total_listings}")
    return total_listings

In [39]:
def go_to_page(driver, page_number):
    try:
        input_field = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//*[@id='root']/div/div[2]/div/div/div[3]/div[1]/div/section/div[2]/div/div/div/div/div[3]/div/input"))  # Update the XPath for the page input field
        )
        # Clear the current input, enter the new page number, and trigger a change
        input_field.clear()
        input_field.send_keys(str(page_number))
        input_field.send_keys(Keys.RETURN)  # Simulate pressing Enter
        print(f"Navigated to page {page_number}")
    except StaleElementReferenceException:
        print(f"StaleElementReferenceException on page {page_number}, retrying...")
        go_to_page(driver, page_number)  # Retry if there's a StaleElementReferenceException

In [40]:
def save_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

In [41]:
def extract_details(driver):
    wait = WebDriverWait(driver, 15) 
    
    listings_data = {}

    # Owner Information
    for i in range(1, 5):
        label = wait.until(EC.presence_of_element_located((By.XPATH, f'/html/body/div[2]/div/div[2]/div/div/div[1]/div/div[2]/div/div/div[2]/div/div/div/div[2]/div/div/div[1]/div[1]/div/div/div/div/div[2]/div[{i}]/div/div[1]'))).text
        value = wait.until(EC.presence_of_element_located((By.XPATH, f'/html/body/div[2]/div/div[2]/div/div/div[1]/div/div[2]/div/div/div[2]/div/div/div/div[2]/div/div/div[1]/div[1]/div/div/div/div/div[2]/div[{i}]/div/div[2]/div'))).text
        listings_data.update({label: value})

    # Estimated Value
    label = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[2]/div/div/div/div/div/div[2]/div[1]/div[1]'))).text
    value = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[2]/div/div/div/div/div/div[2]/div[1]/div[2]'))).text
    listings_data.update({label: value})

    # Estimated Balance
    label = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[3]/div/div/div/div/div/div[1]/div[2]/div[2]/div[1]'))).text
    value = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[3]/div/div/div/div/div/div[1]/div[2]/div[2]/div[2]'))).text
    listings_data.update({label: value})

    # Equity
    label = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[4]/div/div/div/div/div/div[2]'))).text
    value = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[4]/div/div/div/div/div/ul/li[1]'))).text
    listings_data.update({label: value})

    # Recording Date
    pfc = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '/html/body/div[2]/div/div[2]/div/div/div[1]/div/div[2]/div/div/div[2]/div/div/div/div[2]/div/div/ul/li[2]'))
    )
    pfc.click()
    for i in range(1, 5):
        # Get the label and value divs inside each property detail
        label = wait.until(EC.presence_of_element_located((By.XPATH, f'/html/body/div[2]/div/div[2]/div/div/div[1]/div/div[2]/div/div/div[2]/div/div/div/div[2]/div/div/div[2]/div[1]/div/div/div/div/div[3]/div[{i}]/div/div[1]'))).text
        value = wait.until(EC.presence_of_element_located((By.XPATH, f'/html/body/div[2]/div/div[2]/div/div/div[1]/div/div[2]/div/div/div[2]/div/div/div/div[2]/div/div/div[2]/div[1]/div/div/div/div/div[3]/div[{i}]/div/div[2]/div'))).text
        listings_data.update({label: value})
    property = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '/html/body/div[2]/div/div[2]/div/div/div[1]/div/div[2]/div/div/div[2]/div/div/div/div[2]/div/div/ul/li[1]'))
    )
    property.click()

    # Property information
    for i in range(1, 12):
        label = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[1]/div/div/div/div/div[2]/div[{i}]/div[1]'))).text
        value = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[1]/div/div/div/div/div[2]/div[{i}]/div[2]'))).text
        listings_data.update({label: value})
    
    
    # Bed
    label = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[1]/div/div/div/div/div[1]/div[2]/div[1]/div[1]/div/span[1]'))).text
    value = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[1]/div/div/div/div/div[1]/div[2]/div[1]/div[1]/div/span[2]'))).text
    listings_data.update({label: value})
    
    # Bath
    label = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[1]/div/div/div/div/div[1]/div[2]/div[1]/div[2]/div/span[1]'))).text
    value = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[1]/div/div/div/div/div[1]/div[2]/div[1]/div[2]/div/span[2]'))).text
    listings_data.update({label: value})

    # Property information
    for i in range(2, 6):
        # Get the label and value divs inside each property detail
        label = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[1]/div/div/div/div/div[1]/div[2]/div[{i}]/div[1]'))).text
        value = wait.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="propertyDetail"]/div/div/div[2]/div/div/div/div[1]/div[1]/div/div/div/div/div[1]/div[2]/div[{i}]/div[2]'))).text
        listings_data.update({label: value})


    close_xpath = f'//*[@id="propertyDetail"]/div/div/div[1]/button'
    close = wait.until(EC.element_to_be_clickable((By.XPATH, close_xpath)))
    close.click()

    return listings_data

In [42]:
def get_listings_data(driver, listings):
    wait = WebDriverWait(driver, 15) 
    
    listings_data = []
    
    # Get all listings
    for i in range (1, listings+1):
        try:
            detail_xpath = f'//*[@id="root"]/div/div[2]/div/div/div[3]/div[1]/div/section/div[2]/div/div/div/div/div[2]/div/div[{i}]/div[3]/div[1]/div[1]/a'
            detail = wait.until(EC.element_to_be_clickable((By.XPATH, detail_xpath)))
            detail.click()

            # Wait for a specific element that indicates the new page has loaded
            time.sleep(2)
            data = extract_details(driver)
            listings_data.append(data)
        except Exception as e: print('Error Occured!', e)
        

    return listings_data

In [43]:
driver = webdriver.Chrome()
driver.set_window_size(1920, 1080)
username = "larealestatebuyers@gmail.com"
password = "Dreamteam21$"

# Perform login
login_to_propstream(username, password, driver)

# List of cities to search through
city = "Boston, MA"  

search_city(city, driver)

click_filter_button(driver)


select_filter_option(driver, True)

# Get the total number of listings
total_listings_text = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.XPATH, "//*[@id='root']/div/div[2]/div/div/div[3]/div[1]/div/section/div[2]/div/div/div/div/div[1]/div[1]/div[2]"))
).text

# Extract the number of listings and convert to int
total_listings = int(total_listings_text.split(" ")[-1].strip("()"))  # Use -1 to get the last element
listings_per_page = 50  # Assuming 50 listings per page
total_pages = math.ceil(total_listings / listings_per_page)  # Calculate total pages
all_listings_data = []

# Loop through all pages
for page in range(1, total_pages + 1):
    print(f"Scraping page {page} of {total_pages}...")
    
    # Get listings data from the current page
    if total_pages != 1:
        go_to_page(driver, page)

    listings_data = get_listings_data(driver, total_listings)

    all_listings_data.extend(listings_data)  # Add data to the overall list
    
    # Wait for the page to load
    time.sleep(2)  

# Save all the data to a CSV file
save_to_csv(all_listings_data, OUTPUT_DATA)

# Close the WebDriver after completion
driver.quit()

Login successful!
First suggestion selected and search triggered.
Search completed for city: Boston, MA
Filter button clicked and menu opened.
Scraping page 1 of 1...
Document Number *
Document Type Notice of Trustee's Sale
Lender Name US BANK NA
Recording Date 08/27/2024
Document Number 00024 SM 003
Document Type PreForeclosure Newly Filed Complaint
Lender Name TRUIST BK
Recording Date 09/06/2024
Document Number 00024 SM 003
Document Type PreForeclosure Newly Filed Complaint
Lender Name M&T BK
Recording Date 09/11/2024
Document Number *
Document Type Notice of Trustee's Sale
Lender Name FIDELITY BANK NA
Recording Date 08/13/2024
Document Number 00024 SM 002
Document Type PreForeclosure Newly Filed Complaint
Lender Name DEUTSCHE BK NATL TR CO
Recording Date 08/08/2024
Data saved to data/auto_search.csv
