In [50]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC

In [51]:
def wait_for_page_to_load(driver, wait):
    title = driver.title
    
    try:
        wait.until(
            lambda d: d.execute_script("return document.readyState") == "complete"
        )
        
    except:
        print(f"The webpage \"{title}\" did not get fully loaded.\n")
        
    else:
        print(f"The webpage \"{title}\" did get fully loaded.\n")

In [52]:
# Setting Options to bypass the website Automation block.

chrome_options = Options()
chrome_options.add_argument("--disable-http2")
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_argument("--enable-features=NetworkServiceInProcess")
chrome_options.add_argument("--disable-features=NetworkService")
chrome_options.add_argument(
    "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
)

In [53]:
driver = webdriver.Chrome(options=chrome_options)
driver.maximize_window()

In [54]:
# Explicit Wait
wait = WebDriverWait(driver, 10)

In [55]:
# Accessing the Target Webpage
url = "https://www.99acres.com/"
driver.get(url)
wait_for_page_to_load(driver, wait)

The webpage "India Real Estate Property Site - Buy Sell Rent Properties Portal - 99acres.com" did get fully loaded.



In [56]:
# Identifying and Entering Text to the Search Bar
try:
    search_bar = wait.until(
        EC.presence_of_element_located((By.XPATH, '//*[@id="keyword2"]'))
    )
except:
    print("Timeout while locating Search Bar.\n")
else:
    search_bar.send_keys("Kolkata")
    time.sleep(5)

In [57]:
# Selecting the Valid option from the List
try:
    valid_option = wait.until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="0"]'))
    )
except:
    print("Timeout while locating valid Search Option.\n")
else:
    valid_option.click()
    time.sleep(5)

In [58]:
# Click on Search Button
try:
    search_button = wait.until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="searchform_search_btn"]'))
    )
except:
    print("Timeout while clicking \"Search\" Button.\n")
else:
    search_button.click()
    wait_for_page_to_load(driver, wait)

The webpage "Property in Kolkata - Real Estate in Kolkata" did get fully loaded.



In [59]:
# Adjust the Budget Slider
try:
    slider = wait.until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="budgetLeftFilter_max_node"]'))
    )
except:
    print("Timeout while clicking in Budget Slider Circle")
else:
    actions = ActionChains(driver)
    (
        actions
        .click_and_hold(slider)
        .move_by_offset(-73,0)
        .release()
        .perform()
    )
    time.sleep(5)

In [60]:
# Filter Results

# 1. Verified
verified = wait.until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="app"]/div/div/div[4]/div[3]/div[1]/div[3]/section/div/div/div/div/div[1]/div/div[3]'))
)
verified.click()
time.sleep(2)

#. 2. Ready To Move
ready_to_move = wait.until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="app"]/div/div/div[4]/div[3]/div[1]/div[3]/section/div/div/div/div/div[1]/div/div[5]'))
)
ready_to_move.click()
time.sleep(2)

# Moving the slider the the right to unhide the remaining filters.

# For First Click
filter_right_button_1st = wait.until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="app"]/div/div/div[4]/div[3]/div[1]/div[3]/section/div/div/div/div/div[2]'))
)
filter_right_button_1st.click()
time.sleep(2)

# For Second Click
filter_right_button_2nd = wait.until(
    EC.presence_of_element_located((By.XPATH, '//*[@id="app"]/div/div/div[4]/div[3]/div[1]/div[3]/section/div/div/div/div/div[3]'))
)
filter_right_button_2nd.click()
time.sleep(2)


#. 3. With Photos
with_photos = wait.until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="app"]/div/div/div[4]/div[3]/div[1]/div[3]/section/div/div/div/div/div[2]/div/div[6]'))
)
with_photos.click()
time.sleep(2)


#. 4. With Videos
with_videos = wait.until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="app"]/div/div/div[4]/div[3]/div[1]/div[3]/section/div/div/div/div/div[2]/div/div[7]'))
)
with_videos.click()
time.sleep(5)

In [None]:
# Navigate Pages and Extract Data

while True:
    try:
        time.sleep(6)
        next_page_button = driver.find_element(By.XPATH, "//a[normalize-space()='Next Page >']")
    except:
        print("Timeout because we have navigated all the pages.\n")
        break
    else:
        print("ELSE\n")
        try:
            driver.execute_script("window.scrollBy(0, arguments[0].getBoundingClientRect().top - 100);", next_page_button)
            time.sleep(6)
        
        
            # Scraping Code
        
        
        
            wait.until(
                EC.element_to_be_clickable((By.XPATH, "//a[normalize-space()='Next Page >']"))
            ).click()
        except:
            print("Timeout while clicking on \"Next Page\".\n")

ELSE

Timeout while clicking on "Next Page".

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

ELSE

Timeout because we have navigated all the pages.

