In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from PIL import Image
from io import BytesIO
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import time
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import warnings

warnings.filterwarnings('ignore', category=FutureWarning)

class PropertyScraper:
    def __init__(self, url, locality_list):
        self.driver = webdriver.Chrome()
        self.url = url  
        self.driver.get(url)
        self.all_data = pd.DataFrame()
        self.locality_list = locality_list

    def set_search_parameters(self, sro, local, year):
        self.select_element_by_id("ctl00_ContentPlaceHolder1_ddl_sro_s", sro)
        self.select_element_by_id("ctl00_ContentPlaceHolder1_ddl_loc_s", local)
        self.select_element_by_id("ctl00_ContentPlaceHolder1_ddl_year_s", year)

    def scrape_locality(self, sro, locality, year):  
        self.driver.get(self.url)
        self.set_search_parameters(sro, locality, year)  
        self.get_captcha_image()
        captcha_text = input(f"Please enter the captcha text for locality '{locality}': ")
        self.submit_captcha(captcha_text)
        data = self.scrape_current_page()
        if data is not None:
            self.scrape_all_data()
            return self.all_data
        else:
            return None

    def select_element_by_id(self, element_id, value):
        element = self.driver.find_element(By.ID, element_id)
        element.send_keys(value)

    def get_captcha_image(self):
        captcha_img_element = self.driver.find_element(By.XPATH, "//div[@class='btn btn-sm']//img")
        captcha_img_url = captcha_img_element.get_attribute("src")
        image = Image.open(BytesIO(urlopen(captcha_img_url).read()))
        image.show()

    def submit_captcha(self, captcha_text):
        self.driver.find_element(By.ID, "ctl00_ContentPlaceHolder1_txtcaptcha_s").send_keys(captcha_text)
        self.driver.find_element(By.ID, "ctl00_ContentPlaceHolder1_btn_search_s").click()

    def scrape_current_page(self):
        soup = BeautifulSoup(self.driver.page_source, "html.parser")
        table = soup.find("table")
        df = pd.read_html(str(table))[0]
        
        if 'First Party' not in df.columns:
            return None

        df = df.dropna(subset=['First Party'])
        df = df.iloc[:-1]
        df.drop(["Reg.No", "Deed Type"], axis=1, inplace=True)
        df.rename(columns={"Area": "Area (sq. feet)"}, inplace=True)
        df["Area (sq. feet)"] = df["Area (sq. feet)"].str.replace("Sq. Feet", "").str.strip()
        df.reset_index(drop=True, inplace=True)
        return df

    def wait_for_element(self, by, value, timeout=10):
        try:
            element_present = EC.presence_of_element_located((by, value))
            WebDriverWait(self.driver, timeout).until(element_present)
        except TimeoutException:
            return None
        return self.driver.find_element(by, value)

    def click_next_page(self):
        next_page = self.wait_for_element(By.ID, "ctl00_ContentPlaceHolder1_gv_search_ctl13_Button2")
        if next_page and next_page.is_enabled() and next_page.is_displayed():
            self.driver.execute_script("arguments[0].click();", next_page)
            return True
        return False

    def scrape_all_data(self):
        self.all_data = self.scrape_current_page()
        while True:
            try:
                time.sleep(2)
                if self.click_next_page():
                    time.sleep(2)
                    new_data = self.scrape_current_page()
                    self.all_data = pd.concat([self.all_data, new_data], ignore_index=True)
                else:
                    break
            except NoSuchElementException:
                break
        self.all_data = self.all_data.reset_index(drop=True)

    def close_driver(self):
        self.driver.quit()

def main():
    url = "https://esearch.delhigovt.nic.in/Complete_search.aspx"
    sro = "Central -Asaf Ali (SR III)"
    year = "2021-2022"
    localities = ['Abul Fazal Enclave*', 'Adarsh Nagar*', 'Ahata kidara*', 'Ajmal Khan Road', 'Ajmeri Gate']

    
    scraper = PropertyScraper(url, localities)
    
    # Iterate over the localities and scrape data for each one
    for i, locality in enumerate(scraper.locality_list):
        print(f"Scraping data for locality {i+1}/{len(scraper.locality_list)}: {locality}")
        locality_data = scraper.scrape_locality(sro, locality, year)
        if locality_data is not None:
            if not locality_data.empty:
                scraper.all_data = pd.concat([scraper.all_data, locality_data], ignore_index=True)
        else:
            print(f"No 'First Party' column found for locality '{locality}', moving to the next locality.")

    
    scraper.all_data.to_excel("property_data.xlsx", index=False)

    
    scraper.close_driver()

if __name__ == "__main__":
    main()

Scraping data for locality 1/5: Abul Fazal Enclave*
No 'First Party' column found for locality 'Abul Fazal Enclave*', moving to the next locality.
Scraping data for locality 2/5: Adarsh Nagar*
No 'First Party' column found for locality 'Adarsh Nagar*', moving to the next locality.
Scraping data for locality 3/5: Ahata kidara*
No 'First Party' column found for locality 'Ahata kidara*', moving to the next locality.
Scraping data for locality 4/5: Ajmal Khan Road
Scraping data for locality 5/5: Ajmeri Gate


In [None]:
# There are some localities that don't have any data. For example, 'Ahata kidara*' has no data. So, I've skipped those localities.

# I have only used a few localities for demonstration purposes. Here's the complete list of localities:

# ['Abul Fazal Enclave*', 'Adarsh Nagar*', 'Ahata kidara*', 'Ajmal Khan Road', 'Ajmeri Gate', 'Alipur Road*', 'Alipur*', 'Amarpuri*', 'Amrit Kaur Puri', 'Anand Niketan*', 'Andrews Ganj*', 'Anna Nagar (Minto Road)', 'Anoop Nagar*', 'Ansari Nagar*', 'Arakarshana Road', 'Aram Bagh', 'Aram Nagar', 'Arjun Nagar*', 'Arya Nagar (Pahar Ganj)', 'Arya Nagar*', 'Arya Samaj Road', 'Asaf Ali Road', 'Ashok Nagar*', 'Ashok Vihar*', 'Ashoka Pahari (Manakpura )', 'Ashram*', 'Asola*', 'Badarpur*', 'Baggichi Madhodas (Jama Masjid)', 'Bagh Raoji Colony (Manakpura)', 'Bagh Raoji*', 'Bagichi Allauddin (Qadam Shariff)', 'Bagichi Allauddin*', 'Bahadur Shah Zafar Marg', 'Bali Nagar*', 'Baljeet Nagar*', 'Balmiki Basti (Minto Road)', 'Balmiki Colony (Dev Nagar)', 'Bangali Market*', 'Bapa Nagar', 'Bara Hindu Rao*', 'Baradari (Ballimaran)', 'Basai Darapur*', 'Basant Nagar*', 'Basti Harphool Singh*', 'Basti Julahan*', 'Batla House*', 'Bazar Lal Kuan*', 'Bazar Sita Ram', 'Beadon Pura', 'Bela Road*', 'Beri Wala Bagh*', 'Bhagirath Palace*', 'Bharat Nagar*', 'Bharthal*', 'Bhikaji Cama Place*', 'Bhim Nagar*', 'Bhogal*', 'Bijwasan*', 'Bindapur*', 'Birla Lines*', 'Bulbuli Khan Darya Ganj', 'Chamelian Road*', 'Chanakya Puri*', 'Chandiwalan*', 'Chandni Mahal', 'Chandrawal Road*', 'Chatta Lal Mian*', 'Chatta Lal Miya (Darya Ganj)', 'Chawri Bazar*', 'Chhatta Lal Mian*', 'Chhawla*', 'Chirag Delhi*', 'Chitla Gate Area (Darya Ganj)', 'Chitli Qabar*', 'Chuna Mandi', 'Churiwalan*', 'Civil Lines*', 'Dakshinpuri Extension*', 'Dariba Kalan*', 'Darya Ganj*', 'Daryaganj*', 'DDU Marg*', 'Defence Colony*', 'Delhi Gate Bazar', 'Delhi Gate*', 'Dev Nagar', 'Dharam Pura, Chandni Chowk', 'Dharampura*', 'Dhaula Kuan*', 'Dilshad Garden*', 'Dori Walan', 'Doriwalan*', 'Dwarka*', 'East of Kailash*', 'East Patel Nagar*', 'Faiz Bazar*', 'Faiz Road*', 'Farash Khana', 'Fateh Nagar*', 'Fatehpur Beri*', 'Feroz Shah Kotla', 'G.B. Road', 'Gali Garhiya', 'Gali Imam Wali', 'Gali Madarsa Abdul Aziz', 'Gali Madarsa Hussain Bux', 'Gali Masjid Lal', 'Gali Matia Mahal', 'Gali Shahtara', 'Gandhi Market Area Minto Road)', 'Gandhi Nagar*', 'Ganeshpura*', 'Ganj Mir Khan*', 'Gau Shala (Manakpura)', 'Gaushala Marg*', 'Gautam Nagar*', 'Ghanta Ghar*', 'Gopal Nagar*', 'Govind Nagar', 'Greater Kailash*', 'Green Park Extension*', 'Green Park Main*', 'Green Park Market*', 'Gulabi Bagh*', 'Gulmohar Enclave*', 'Gulmohar Park*', 'Gurgaon Road*', 'Hardev Puri*', 'Hardhyan Singh Road', 'Hari Nagar Ashram*', 'Hari Nagar*', 'Hauz Khas Enclave*', 'Hauz Khas*', 'Hauz Qazi*', 'Hauz Quazi', 'Hauz Rani*', 'Haveli Azam Khan', 'Haveli Hissamuddin Haider*', 'Idgah Road*', 'Inderpuri*', 'Indira Nagar*', 'Indra Park*', 'Indraprasth Estate (Minto Road)', 'Issapur*', 'Jagatpur*', 'Jama Masjid', 'Jamia Nagar*', 'Janak Park*', 'Janak Puri*', 'Janakpuri*', 'Jangpura Lane*', 'Jangpura Mathura Road*', 'Jasola Village*', 'Jhandewalan', 'Jhandewalan Extn.(Manakpura)', 'Jhandewalan Road*', 'Jia Sarai*', 'Joga Bai*', 'Jogabai*', 'Jogiwara*', 'Joshi Road', 'Kailash Colony*', 'Kala Mahal', 'Kala Masjid', 'Kalinidi Colony*', 'Kalka Ji*', 'Kalkaji*', 'Kamla Market', 'Kapashera*', 'Karol Bagh', 'Kaseruwalan*', 'Kashmere Gate*', 'Katra Chhotey Lal (Darya Ganj', 'Kaushri Wallan', 'Kautilya Marg*', 'Khaira*', 'Khalsa Nagar', 'Khari Baoli*', 'Khirki Extension*', 'Kilokari*', 'Kinari Bazar*', 'Kirti Nagar*', 'Kishan Ganj*', 'Kotla Mubarak Pur*', 'Krishna Nagar', 'Krishna Park*', 'Kucha Chelan*', 'Kucha Pandit (Bazar Sita Ram)', 'Kucha Pati Ram (Pahar Ganj)', 'Kucha Sohan Lal (Pahar Ganj)', 'Kuchalal Man (Darya Ganj)', 'Kunde Walan', 'Laddu Ghati*', 'Lajpat Nagar I*', 'Lajpat Nagar*', 'Lal Kuan', 'Loha Mandi Naraina*', 'Madanpur Khadar*', 'Madipur*', 'Mahipal Pur*', 'Maidan Garhi*', 'Main Bazar Pahar Ganj*', 'Malka Ganj*', 'Malviya Nagar*', 'Man Singh Road*', 'Manak Pura', 'Mandawali*', 'Mantola*', 'Masih Garh*', 'Masjid Moth*', 'Masood Pur*', 'Mata Rameshwari Nagar', 'Mata Sundari Rly Colony (Minto Road)', 'Mata Sundri Road*', 'Matia Mahal*', 'May Fair Garden*', 'Meena Bazar (Jama Masjid)', 'Mehrauli*', 'Militry Road', 'Minto Road', 'Mirdard Road*', 'Mithapur*', 'Model Basti (Manakpura)', 'Molarband*', 'Mori Gate*', 'Moti Bagh -1*', 'Moti Nagar*', 'Motia Bagh*', 'Motia Khan*', 'Multani Dhanda', 'Munirka*', 'Nabi Karim', 'Nai Basti*', 'Nai Sarak*', 'Nai Wala', 'Nai Wara*', 'Naiwala*', 'Nanak Pura*', 'Nangal Raya*', 'Naraina Vihar*', 'Naraina*', 'Nawab Ganj*', 'Naya Bazar*', 'Nehru Nagar*', 'Netaji Nagar*', 'New Friends Colony*', 'New Moti Nagar*', 'New Rajinder Nagar', 'New Ranjit Nagar*', 'New Rohtak Road*', 'Niti Bagh*', 'Nizamuddin West*', 'North Extn. Area, Pusa Road', 'Okhla Village*', 'Old Daryaganj Area( Pataudi House)', 'Old Rajinder Nagar', 'Old Subzi Mandi*', 'Others', 'Padam Singh Road', 'Padmini Enclave*', 'Pahar Ganj', 'Pahari Bhojla*', 'Pahari Gajaan', 'Pahari Imli', 'Pai Walan (Chandni Chowk)', 'Pamposh Enclave*', 'Pandara Road*', 'Pandav Nagar*', 'Pant Nagar*', 'Parsad Nagar*', 'Partap Nagar*', 'Paschim Puri*', 'Paschim Vihar*', 'Patel Nagar*', 'Peshwa Road*', 'Pitampura*', 'Prashad Nagar', 'Prem Nagar*', 'Press Area (Darya Ganj)', 'Prithvi Raj Road*', 'Punjabi Bagh Extension*', 'Punjabi Bagh*', 'Punjabi Basti*', 'Pusa Institute*', 'Pusa Road', 'Pyare Lal Road', 'Qamra Bangush', 'Qasab Pura*', 'Qutab Road*', 'Raigar Pura*', 'Rajender Nagar*', 'Rajendra Place', 'Rajinder Nagar', 'Rajindra Park, Pusa Road', 'Rajouri Garden*', 'Rajpur Road*', 'Rakab Ganj (Darya Ganj)', 'Rakab Ganj*', 'Ram Nagar', 'Rama Krishna Ashram Marg*', 'Ramesh Nagar*', 'Ramjas Road*', 'Rampura*', 'Rangpuri*', 'Rani Jhansi Road', 'Ravi Nagar*', 'Regar Pura*', 'Reghar Pura', 'Rishi Nagar*', 'Rodgran*', 'Rohini*', 'Roshanara Road*', 'Rouse Avenue (I.T.O.)', 'Sabzi Mandi*', 'Sadar Bazar*', 'Sadar Nala Road*', 'Sadar Thana Road*', 'Safdarjung Enclave*', 'Sainik Farm*', 'Samalka*', 'Sanjay Amar Colony (Minto Road)', 'Sanjay Nagar*', 'Sant Nagar*', 'Sarai Kale Khan*', 'Sardar Patel Marg*', 'Sarita Vihar*', 'Sarojini Nagar*', 'Sarvodaya Enclave*', 'Sarvpriya Vihar*', 'Sat Nagar', 'Savitri Nagar*', 'Sewa Nagar*', 'Shadi Khampur*', 'Shah Ganj (Bazar Sita Ram)', 'Shahbad Mohammad Pur*', 'Shahpur Jat*', 'Shahurpur*', 'Shakti Nagar*', 'Shakur Basti*', 'Shalimar Bagh*', 'Shastri Park(Beadan Pura)', 'Sheesh Mahal (Darya Ganj)', 'Shiv Nagar*', 'Shivaji Park*', 'Shivalik*', 'Shora Kothi (Pahar Ganj)', 'Shora Kothi*', 'Shyam Nagar*', 'Siddi Pura', 'Sidharth Basti*', 'Sita Ram Bazar*', 'South Patel Nagar*', 'Sriniwaspuri*', 'Subhash Nagar', 'Subzi Mandi*', 'Sui Walan', 'Suiwalan*', 'Sukhdev Vihar*', 'Sunder Nagar*', 'Sunder Vihar*', 'Sunlight Colony*', 'Swami Ramtirth Nagar (manakpura)', 'Tagore Garden*', 'Tagore Road*', 'Taimoor Nagar*', 'Tajpur*', 'Thomson Road*', 'Tibbia College*', 'Tilak Bridge*', 'Tilak Nagar*', 'Timarpur*', 'Tiraha Behram Khan', 'Tiraha Behram Khan*', 'Tis Hazari*', 'Turkman Gate', 'Uday Park*', 'Uttam Nagar*', 'Varinder Nagar*', 'Vasant Kunj*', 'Vijay Nagar*', 'Vikas Puri*', 'Vikram Nagar (Minto Road)', 'Vikram Nagar*', 'W.E.A. Karol Bagh', 'Wazir Nagar*', 'West Patel Nagar*', 'Yamuna Bazar*', 'Yusuf Sarai*', 'Zakhira*', 'Zakir Nagar*', 'Zamrud Pur*']

# You can directly import this list from the locality.py momdule