In [11]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

def setup_driver():
    chrome_options = Options()
    chrome_options.add_argument("--headless=new")  # For headless mode
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64)")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
    chrome_options.add_experimental_option('useAutomationExtension', False)

    driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options)
    return driver

def scrape_odisha_rera_projects():
    driver = setup_driver()
    url = "https://rera.odisha.gov.in/projects/project-list"
    driver.get(url)

    try:
        # Wait for any project row to appear
        WebDriverWait(driver, 60).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "#projectListTable tbody tr"))
        )
    except:
        driver.save_screenshot("error_screenshot.png")
        driver.quit()
        raise Exception("⚠️ Failed to load project table. Screenshot saved as 'error_screenshot.png'")

    # Select first 6 projects
    rows = driver.find_elements(By.CSS_SELECTOR, "#projectListTable tbody tr")[:6]
    project_data = []

    for row in rows:
        try:
            view_button = row.find_element(By.XPATH, ".//a[contains(text(), 'View Details')]")
            project_url = view_button.get_attribute("href")

            driver.execute_script("window.open(arguments[0]);", project_url)
            driver.switch_to.window(driver.window_handles[-1])

            WebDriverWait(driver, 30).until(
                EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'project-details')]"))
            )

            # Extract fields
            rera_reg_no = driver.find_element(By.XPATH, "//strong[contains(text(),'RERA Regd. No.')]/following-sibling::span").text.strip()
            project_name = driver.find_element(By.XPATH, "//strong[contains(text(),'Project Name')]/following-sibling::span").text.strip()

            # Switch to Promoter Details tab
            promoter_tab = driver.find_element(By.XPATH, "//a[contains(text(),'Promoter Details')]")
            promoter_tab.click()
            time.sleep(1)  # Wait for content

            promoter_name = driver.find_element(By.XPATH, "//strong[contains(text(),'Company Name')]/following-sibling::span").text.strip()
            promoter_address = driver.find_element(By.XPATH, "//strong[contains(text(),'Registered Office Address')]/following-sibling::span").text.strip()
            try:
                gst_no = driver.find_element(By.XPATH, "//strong[contains(text(),'GST No.')]/following-sibling::span").text.strip()
            except:
                gst_no = "N/A"

            project_data.append({
                "RERA Regd. No": rera_reg_no,
                "Project Name": project_name,
                "Promoter Name": promoter_name,
                "Promoter Address": promoter_address,
                "GST No": gst_no,
                "URL": project_url
            })

            driver.close()
            driver.switch_to.window(driver.window_handles[0])

        except Exception as e:
            print(f"Error while processing a project: {e}")
            driver.close()
            driver.switch_to.window(driver.window_handles[0])

    driver.quit()

    # Save to CSV
    df = pd.DataFrame(project_data)
    df.to_csv("odisha_rera_projects.csv", index=False)
    print("✅ Scraped data saved to 'odisha_rera_projects.csv'")

if __name__ == "__main__":
    scrape_odisha_rera_projects()


TypeError: WebDriver.__init__() got multiple values for argument 'options'