In [51]:
import time
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

In [52]:
# ----- SCRAPING THE DATA -----

def wait_for_page_to_load(driver, wait):
	title = driver.title
	try:
		wait.until(
			lambda d: d.execute_script("return document.readyState") == "complete"
		)
	except:
		print(f"The webpage \"{title}\" did not get fully laoded.\n")
	else:
		print(f"The webpage \"{title}\" did get fully laoded.\n")

In [53]:
# options
chrome_options = Options()
chrome_options.add_argument("--disable-http2")
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_argument("--enable-features=NetworkServiceInProcess")
chrome_options.add_argument("--disable-features=NetworkService")
chrome_options.add_argument(
    "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
)

driver = webdriver.Chrome(options=chrome_options)
driver.maximize_window()

# explicit wait
wait = WebDriverWait(driver, 5)

# accessing the target webpage
url = "https://www.99acres.com/"
driver.get(url)

In [54]:
# identify and enter text into search bar
try:
	search_bar = wait.until(
		EC.presence_of_element_located((By.XPATH, '//*[@id="keyword2"]'))
	)
except:
	print("Timeout while locating Search Bar.\n")
else:
	search_bar.send_keys("Kolkata")
	time.sleep(5)
 
# selecting valid option from list
try:
	valid_option = wait.until(
		EC.element_to_be_clickable((By.XPATH, '//*[@id="0"]'))
	)
except:
	print("Timeout while locating valid search option.\n")
else:
	valid_option.click()
	time.sleep(2)

# click on Search button
try:
	search_button = wait.until(
		EC.element_to_be_clickable((By.XPATH, '//*[@id="searchform_search_btn"]'))
	)
except:
	print("Timeout while clicking on \"Search\" button.\n")
else:
	search_button.click()
	wait_for_page_to_load(driver, wait)

The webpage "Property in Kolkata - Real Estate in Kolkata" did get fully laoded.



In [55]:
import numpy as np
# adjust the Budget slider
try:
	slider = wait.until(
		EC.element_to_be_clickable((By.XPATH, '//*[@id="budgetLeftFilter_max_node"]'))
	)
except:
	print("Timeout while clicking on Budget slider circle.\n")
else:
	actions = ActionChains(driver)
	(
		actions
		.click_and_hold(slider)
		.move_by_offset(-73, 0)
		.release()
		.perform()
	)
	time.sleep(2)

# filter results to show genuine listings
# 1. Verified
verified = wait.until(
	EC.element_to_be_clickable((By.XPATH, '/html[1]/body[1]/div[1]/div[1]/div[1]/div[4]/div[3]/div[1]/div[3]/section[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[3]/span[2]'))
)
verified.click()
time.sleep(1)

# 2. Ready To Move
ready_to_move = wait.until(
	EC.element_to_be_clickable((By.XPATH, '/html[1]/body[1]/div[1]/div[1]/div[1]/div[4]/div[3]/div[1]/div[3]/section[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[3]/span[2]'))
)
ready_to_move.click()
time.sleep(1)


# moving to the right side to unhide remaining filters
while True:
	try:
		filter_right_button = wait.until(
			EC.presence_of_element_located((By.XPATH, "/html[1]/body[1]/div[1]/div[1]/div[1]/div[4]/div[3]/div[1]/div[3]/section[1]/div[1]/div[1]/div[1]/div[1]/div[2]/i[1]"))
		)
	except:
		print("Timeout because we have uncovered all filters.\n")
		break
	else:
		filter_right_button.click()
		time.sleep(1)
  

# 3. With Photos
with_photos = wait.until(
	EC.element_to_be_clickable((By.XPATH, '/html[1]/body[1]/div[1]/div[1]/div[1]/div[4]/div[3]/div[1]/div[3]/section[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div[6]/span[2]'))
)
with_photos.click()
time.sleep(1)

# 4. With Videos
with_videos = wait.until(
	EC.element_to_be_clickable((By.XPATH, '/html[1]/body[1]/div[1]/div[1]/div[1]/div[4]/div[3]/div[1]/div[3]/section[1]/div[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div[7]/span[2]'))
)
with_videos.click()
time.sleep(4)

#navigate pages and extract data
data = []
page_count = 0
while True:
    page_count = page_count + 1
    try:
        next_page_button = driver.find_element(By.XPATH, "//a[normalize-space()='Next Page >']")
    except:
        print("Timeout beacuse we have navigated all the {page_count} pages. \n")
        break
    else:
        try:
           driver.execute_script("window.scrollBy(0, arguments[0].getBoundingClientRect().top - 100);", next_page_button)
           time.sleep(2)
           
           #Scrapping Data
           rows = driver.find_elements(By.CSS_SELECTOR, ".tupleNew__contentWrap, .PseudoTupleRevamp__contentWrapAb")
           
           for row in rows:
                        # property name
                        try:
                            name = row.find_element(By.CSS_SELECTOR, ".tupleNew__headingNrera, .PseudoTupleRevamp__headNrating").text
                        except:
                               name = np.nan
                        # property location
                        try:
                            location = row.find_element(By.CSS_SELECTOR, ".tupleNew__propType, .PseudoTupleRevamp__w400Ml4").text
                        except:
                               location = np.nan
                    
                        # property price
                        try:
                            price = row.find_element(By.CSS_SELECTOR, ".tupleNew__priceValWrap, .configs__ccl2").text
                        except:
                               price = np.nan
                        # for some cases bhk and area = tupleNew_area1Type and in some cases bhk's are = configs_ccl1
                        # property area and size
                        try:
    # Try to extract both area and BHK from tupleNew__area1Type (2 spans expected)
                            elements = row.find_elements(By.CSS_SELECTOR, ".tupleNew__area1Type")

                            if len(elements) >= 2:
                               area, bhk = [ele.text for ele in elements[:2]]
                            elif len(elements) == 1:
                               area = elements[0].text
        # Fallback: try to extract BHK from configs__ccl1
                            try:
                                bhk = row.find_element(By.CSS_SELECTOR, ".configs__ccl1").text
                            except:
                                  bhk = np.nan
                            else:
        # If neither tupleNew__area1Type found, fallback to configs__ccl1 only
                               area = np.nan
                               try:
                                   bhk = row.find_element(By.CSS_SELECTOR, ".configs__ccl1").text
                               except:
                                     bhk = np.nan

                        except:
    # If entire block fails (row broken or elements missing), fill both as NaN
                            area, bhk = [np.nan, np.nan]

                        
                        property = {
							"Name": name,
                            "Location": location,
                            "Price" : price,
                            "Area": area,
                            "BHK" : bhk
						}
                        data.append(property)
           
           
           wait.until(
			EC.element_to_be_clickable((By.XPATH, "//a[normalize-space()='Next Page >']"))
		   ).click()
           time.sleep(5)
        except:
              print("TimeOut on clicking on \"Next Page\".\n")
              break
 
 #Scrapping Data from the last page
rows = driver.find_elements(By.CSS_SELECTOR, ".tupleNew__contentWrap, .PseudoTupleRevamp__contentWrapAb")
           
for row in rows:
                        # property name
                        try:
                            name = row.find_element(By.CSS_SELECTOR, ".tupleNew__headingNrera, .PseudoTupleRevamp__headNrating").text
                        except:
                               name = np.nan
                        # property location
                        try:
                            location = row.find_element(By.CSS_SELECTOR, ".tupleNew__propType, .PseudoTupleRevamp__w400Ml4").text
                        except:
                               location = np.nan
                    
                        # property price
                        try:
                            price = row.find_element(By.CSS_SELECTOR, ".tupleNew__priceValWrap, .configs__ccl2").text
                        except:
                               price = np.nan
                        # for some cases bhk and area = tupleNew_area1Type and in some cases bhk's are = configs_ccl1
                        # property area and size
                        try:
    # Try to extract both area and BHK from tupleNew__area1Type (2 spans expected)
                            elements = row.find_elements(By.CSS_SELECTOR, ".tupleNew__area1Type")

                            if len(elements) >= 2:
                               area, bhk = [ele.text for ele in elements[:2]]
                            elif len(elements) == 1:
                               area = elements[0].text
        # Fallback: try to extract BHK from configs__ccl1
                            try:
                                bhk = row.find_element(By.CSS_SELECTOR, ".configs__ccl1").text
                            except:
                                  bhk = np.nan
                            else:
        # If neither tupleNew__area1Type found, fallback to configs__ccl1 only
                               area = np.nan
                               try:
                                   bhk = row.find_element(By.CSS_SELECTOR, ".configs__ccl1").text
                               except:
                                     bhk = np.nan

                        except:
    # If entire block fails (row broken or elements missing), fill both as NaN
                            area, bhk = [np.nan, np.nan]

                        
                        property = {
							"Name": name,
                            "Location": location,
                            "Price" : price,
                            "Area": area,
                            "BHK" : bhk
						}
                        data.append(property)

time.sleep(3)
driver.quit()

Timeout because we have uncovered all filters.

TimeOut on clicking on "Next Page".



In [56]:
len(data)

154

In [None]:
#print(page_count)

1
