In [1]:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import csv

# Function to save data to CSV file
def save_to_csv(data, filename):
    if data:
        keys = data[0].keys()
        with open(filename, 'w', newline='', encoding='utf-8') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=keys)
            writer.writeheader()
            writer.writerows(data)
    else:
        print("No data to save.")

# Start the browser
service = Service(executable_path='/snap/bin/geckodriver')
browser = webdriver.Firefox(service=service)

def extract_data(browser):
    data_list = []
    try:
        parent_div = WebDriverWait(browser, 20).until(EC.presence_of_element_located((By.CLASS_NAME, "list-mainarea")))
        
        project_wrappers = parent_div.find_elements(By.CLASS_NAME, "cardholder")
        for project_wrapper in project_wrappers:
            try:
                # Using find_element inside a try block to catch any exceptions if elements are not found
                try:
                    apartment_name = project_wrapper.find_element(By.CLASS_NAME, "projName").text
                except:
                    apartment_name = None
                try:
                    project_name = project_wrapper.find_element(By.CLASS_NAME, "seller-info").text
                except:
                    project_name = None
                try:
                    project_price = project_wrapper.find_element(By.CLASS_NAME, "price").text
                except:
                    project_price = None
                try:
                    builtup_area = project_wrapper.find_element(By.CLASS_NAME, "size").text
                except:
                    builtup_area = None
                try:
                    bhk = project_wrapper.find_element(By.CLASS_NAME, "val").text
                except:
                    bhk = None
                try:
                    address = project_wrapper.find_element(By.CLASS_NAME, "loclink").text
                except:
                    address = None

                # Extracting additional details
                details_ul = project_wrapper.find_element(By.CLASS_NAME, "listing-details")
                details_li = details_ul.find_elements(By.CLASS_NAME, "keypoint")

                deposit = None
                bathrooms = None
                facing = None

                for li in details_li:
                    title = li.get_attribute("title")
                    if title == "deposit":
                        deposit = li.text.strip()
                    elif title == "bathrooms":
                        bathrooms = li.find_element(By.TAG_NAME, "span").text
                    elif title == "facing":
                        facing = li.text.strip()

                # Extracting information from table with class "listing-highlights"
                status = None
                try:
                    listing_highlights_table = project_wrapper.find_element(By.CLASS_NAME, "listing-highlights")
                    status_element = listing_highlights_table.find_element(By.CLASS_NAME, "hcol.w44")
                    status = status_element.find_element(By.CLASS_NAME, "val").text.strip()
                except Exception as e:
                    print(f"Error extracting status: {e}")

                # Adding all details to the data list
                data_list.append({
                    "broker_consultancy": project_name,
                    "Rent": project_price,
                    "Builtup Area": builtup_area,
                    "appartment_type": bhk,
                    "Deposit": deposit,
                    "Bathrooms": bathrooms,
                    "Facing": facing,
                    "Address": address,
                    "Apartment Name": apartment_name,
                    "Status": status
                })
            except Exception as e:
                print(f"Error extracting data for a project: {e}")
    except Exception as e:
        print(f"Error scraping data: {e}")
    return data_list

base_url = 'https://www.makaan.com/listings?listingType=rent&pageType=CITY_URLS&cityName=Mumbai&cityId=18&templateId=MAKAAN_CITY_LISTING_BUY&page='
start_page =1
end_page =791
all_data = []

# Loop through each page
for page in range(start_page, end_page + 1):
    url = base_url + str(page)
    browser.get(url)
    # Add a delay to ensure the page loads completely
    time.sleep(10)  # Increased delay to 10 seconds
    page_data = extract_data(browser)
    print(f"Page {page} Data:")
    for project in page_data:
        print(project)
    all_data.extend(page_data)

# Close the browser properly
browser.quit()

# Write all_data to CSV file
save_to_csv(all_data, 'mumbai.csv')

print("-- done --")


Page 1 Data:
{'broker_consultancy': 'Kasturi Developers\nBUILDER\n-', 'Rent': '20,000', 'Builtup Area': '1132', 'appartment_type': '2', 'Deposit': 'No Deposit', 'Bathrooms': '2 bathrooms', 'Facing': 'NorthEast facing', 'Address': 'Ulwe, Mumbai', 'Apartment Name': 'EV Castle', 'Status': 'Unfurnished'}
{'broker_consultancy': 'Kasturi Developers\nBUILDER\n-', 'Rent': '20,000', 'Builtup Area': '1200', 'appartment_type': '2', 'Deposit': 'No Deposit', 'Bathrooms': '2 bathrooms', 'Facing': 'NorthEast facing', 'Address': 'Ulwe, Mumbai', 'Apartment Name': None, 'Status': 'Unfurnished'}
{'broker_consultancy': 'Kasturi Developers\nBUILDER\n-', 'Rent': '30,000', 'Builtup Area': '1700', 'appartment_type': '3', 'Deposit': 'No Deposit', 'Bathrooms': '3 bathrooms', 'Facing': 'NorthEast facing', 'Address': 'Ulwe, Mumbai', 'Apartment Name': None, 'Status': 'Semi-Furnished'}
{'broker_consultancy': 'Seller\nVERIFIED OWNER', 'Rent': '28,500', 'Builtup Area': '750', 'appartment_type': '2', 'Deposit': 'No De