In [21]:
pip install selenium webdriver-manager




In [14]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager


def setup_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver


def extract_text(driver, xpath):
    try:
        return driver.find_element(By.XPATH, xpath).text.strip()
    except:
        return "Not Found"


def scrape_projects():
    url = "https://rera.odisha.gov.in/projects/project-list"
    driver = setup_driver()
    wait = WebDriverWait(driver, 20)

    print("Opening main page...")
    driver.get(url)
    time.sleep(3)

    wait.until(EC.presence_of_element_located((By.XPATH, '//a[contains(text(), "View Details")]')))
    time.sleep(1)
    view_links = driver.find_elements(By.XPATH, '//a[contains(text(), "View Details")]')[:6]

    print(f"Found {len(view_links)} projects to scrape.")
    project_data = []

    for idx in range(len(view_links)):
        print(f"\nScraping project {idx + 1}...")

        view_links = driver.find_elements(By.XPATH, '//a[contains(text(), "View Details")]')

        driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", view_links[idx])
        time.sleep(1)
        driver.execute_script("arguments[0].click();", view_links[idx])

        try:
            wait.until(EC.presence_of_element_located((By.XPATH, '//strong[contains(text(), "RERA Regd. No")]')))
        except:
            print("❌ Timeout loading project page")
            driver.back()
            time.sleep(3)
            continue

        rera_no = extract_text(driver, '//strong[contains(text(), "RERA Regd. No")]/following-sibling::span')
        project_name = extract_text(driver, '//strong[contains(text(), "Project Name")]/following-sibling::span')

        try:
            promoter_tab = driver.find_element(By.XPATH, '//a[contains(text(), "Promoter Details")]')
            driver.execute_script("arguments[0].click();", promoter_tab)
            time.sleep(2)
        except:
            print("Promoter tab not clickable")

        promoter_name = extract_text(driver, '//strong[contains(text(), "Company Name")]/following-sibling::span')
        promoter_address = extract_text(driver, '//strong[contains(text(), "Registered Office Address")]/following-sibling::span')
        gst_no = extract_text(driver, '//strong[contains(text(), "GST No")]/following-sibling::span')

        project_data.append({
            "RERA Regd. No": rera_no,
            "Project Name": project_name,
            "Promoter Name": promoter_name,
            "Promoter Address": promoter_address,
            "GST No": gst_no
        })

        print("✅ Data extracted")

        driver.back()
        time.sleep(3)

    driver.quit()
    return project_data


if __name__ == "__main__":
    results = scrape_projects()
    print("\n======== FINAL RESULTS ========")
    for i, project in enumerate(results, 1):
        print(f"\nProject {i}:")
        for key, value in project.items():
            print(f"{key}: {value}")


Opening main page...
Found 6 projects to scrape.

Scraping project 1...
❌ Timeout loading project page

Scraping project 2...
❌ Timeout loading project page

Scraping project 3...
❌ Timeout loading project page

Scraping project 4...
❌ Timeout loading project page

Scraping project 5...
❌ Timeout loading project page

Scraping project 6...
❌ Timeout loading project page

