In [21]:
import time
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


def wait_for_page_to_load(driver, wait):
    title = driver.title
    try:
        wait.until(lambda d: d.execute_script("return document.readyState") == "complete")
    except:
        print(f'The webpage "{title}" did not get fully loaded.')
    else:
        print(f'The webpage "{title}" did get fully loaded.')


# Chrome options
chrome_options = Options()
chrome_options.add_argument("--disable-http2")
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_argument("--enable-features=NetworkServiceInProcess")
chrome_options.add_argument("--disable-features=NetworkService")
chrome_options.add_argument(
    "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
)

# Initialize driver
driver = webdriver.Chrome(options=chrome_options)
driver.maximize_window()
wait = WebDriverWait(driver, 5)

# Go to the website
url = "https://www.99acres.com/"
driver.get(url)
wait_for_page_to_load(driver, wait)

# Enter search term
wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="keyword2"]'))).send_keys("Chennai")
time.sleep(2)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="0"]'))).click()
time.sleep(2)
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="searchform_search_btn"]'))).click()
wait_for_page_to_load(driver, wait)

# Adjust budget slider
slider = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="budgetLeftFilter_max_node"]')))
actions = ActionChains(driver)
actions.click_and_hold(slider).move_by_offset(-73, 0).release().perform()
time.sleep(2)

# Filters
wait.until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div[4]/div[3]/div[1]/div[3]/section[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[3]/span[2]'))).click()
time.sleep(1)
wait.until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div[4]/div[3]/div[1]/div[3]/section[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[5]/span[2]'))).click()
time.sleep(1)

# Move to next filters
while True:
    try:
        btn = wait.until(EC.presence_of_element_located((By.XPATH, "//i[contains(@class,'iconS_Common_24 icon_upArrow cc__rightArrow')]")))
    except:
        print("All filters visible.")
        break
    else:
        btn.click()
        time.sleep(1)

wait.until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div[4]/div[3]/div[1]/div[3]/section[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div[6]/span[2]'))).click()
time.sleep(1)
wait.until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[1]/div[1]/div[1]/div[4]/div[3]/div[1]/div[3]/section[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div[7]/span[2]'))).click()
time.sleep(3)

# Scrape data
page_count = 0
data = []

while True:
    page_count += 1
    print(f"Scraping page {page_count}...")

    rows = driver.find_elements(By.CLASS_NAME, "tupleNew__TupleContent")
    for row in rows:
        try:
            name = row.find_element(By.CLASS_NAME, "tupleNew__headingNrera").text
        except:
            name = np.nan

        try:
            location = row.find_element(By.CLASS_NAME, "tupleNew__propType").text
        except:
            location = np.nan

        try:
            price = row.find_element(By.CLASS_NAME, "tupleNew__priceValWrap").text
        except:
            price = np.nan

        try:
            elements = row.find_elements(By.CLASS_NAME, "tupleNew__area1Type")
            area, bhk = [ele.text for ele in elements]
        except:
            area, bhk = [np.nan, np.nan]

        data.append({"name": name, "location": location, "price": price, "area": area, "bhk": bhk})

    # Scroll to bottom to trigger next page button load
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)

    next_page_xpath = "//a[normalize-space()='Next Page >']"
    try:
        next_page_button = driver.find_element(By.XPATH, next_page_xpath)
    except:
        print(f"Reached last page {page_count}. Exiting.")
        break

    try:
        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", next_page_button)
        time.sleep(1)
        wait.until(EC.element_to_be_clickable((By.XPATH, next_page_xpath))).click()
        time.sleep(5)
    except:
        print(f"Could not click Next Page at page {page_count}. Exiting.")
        break

# Save to Excel
pd.DataFrame(data).drop_duplicates().to_excel("chennai-properties-99acres.xlsx", index=False)
driver.quit()
print("Scraping finished.")


The webpage "India Real Estate Property Site - Buy Sell Rent Properties Portal - 99acres.com" did get fully loaded.
The webpage "Property in Chennai - Real Estate in Chennai" did get fully loaded.
All filters visible.
Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
Scraping page 35...
Scraping page 36...
Scraping page 37...
Scraping page 38...
Scraping page 39...
Scraping pa