In [5]:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import csv


# Function to save data to CSV file
def save_to_csv(data, filename):
    if data:
        keys = data[0].keys()
        with open(filename, 'w', newline='', encoding='utf-8') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=keys)
            writer.writeheader()
            writer.writerows(data)
    else:
        print("No data to save.")


# Start the browser
service = Service(executable_path='/snap/bin/geckodriver')
browser = webdriver.Firefox(service=service)


def extract_data(browser):
    data_list = []
    try:
        parent_div = WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "list-mainarea")))
        project_wrappers = parent_div.find_elements(By.CLASS_NAME, "cardholder")
        for project_wrapper in project_wrappers:
            try:
                project_name = project_wrapper.find_element(By.CLASS_NAME, "seller-info").text
                project_price = project_wrapper.find_element(By.CLASS_NAME, "price").text
                builtup_area = project_wrapper.find_element(By.CLASS_NAME, "size").text
                bhk = project_wrapper.find_element(By.CLASS_NAME, "val").text


                # Extracting additional details
                details_ul = project_wrapper.find_element(By.CLASS_NAME, "listing-details")
                details_li = details_ul.find_elements(By.CLASS_NAME, "keypoint")

                deposit = None
                bathrooms = None
                facing = None

                for li in details_li:
                    title = li.get_attribute("title")
                    if title == "deposit":
                        deposit = li.text.strip()
                    elif title == "bathrooms":
                        bathrooms = li.find_element(By.TAG_NAME, "span").text
                    elif title == "facing":
                        facing = li.text.strip()

                # Adding all details to the data list
                data_list.append({
                    "Project Name": project_name,
                    "Project Price": project_price,
                    "Builtup Area": builtup_area,
                    "BHK": bhk,
                    "Deposit": deposit,
                    "Bathrooms": bathrooms,
                    "Facing": facing
                })
            except Exception as e:
                print(f"Error extracting data for a project: {e}")
    except Exception as e:
        print(f"Error scraping data: {e}")
    return data_list


base_url = 'https://www.makaan.com/listings?listingType=rent&pageType=CITY_URLS&cityName=Mumbai&cityId=18&templateId=MAKAAN_CITY_LISTING_BUY&page='
page_count = 1
all_data = []

# Loop through each page
for page in range(1, page_count + 1):
    url = base_url + str(page)
    browser.get(url)
    # Add a delay to ensure page loads completely
    time.sleep(5)
    page_data = extract_data(browser)
    print(f"Page {page} Data:")
    for project in page_data:
        print(project)
    all_data.extend(page_data)

# Close the browser properly
browser.quit()

# Write all_data to CSV file
save_to_csv(all_data, 'data.csv')

print("-- done --")


Page 1 Data:
{'Project Name': 'Kasturi Developers\nBUILDER\n-', 'Project Price': '35,000', 'Builtup Area': '1700', 'BHK': '3', 'Deposit': 'No Deposit', 'Bathrooms': '3 bathrooms', 'Facing': 'NorthEast facing'}
{'Project Name': 'Kasturi Developers\nBUILDER\n-', 'Project Price': '20,000', 'Builtup Area': '1200', 'BHK': '2', 'Deposit': 'No Deposit', 'Bathrooms': '2 bathrooms', 'Facing': 'NorthEast facing'}
{'Project Name': 'Kasturi Developers\nBUILDER\n-', 'Project Price': '30,000', 'Builtup Area': '1700', 'BHK': '3', 'Deposit': 'No Deposit', 'Bathrooms': '3 bathrooms', 'Facing': 'NorthEast facing'}
{'Project Name': 'Seller\nVERIFIED OWNER', 'Project Price': '28,500', 'Builtup Area': '750', 'BHK': '2', 'Deposit': 'No Deposit', 'Bathrooms': '2 bathrooms', 'Facing': None}
{'Project Name': 'Seller\nVERIFIED OWNER', 'Project Price': '50,000', 'Builtup Area': '1400', 'BHK': '3', 'Deposit': 'No Deposit', 'Bathrooms': '3 bathrooms', 'Facing': 'East facing'}
{'Project Name': 'Seller\nVERIFIED OWN

In [1]:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
import time

# Function to save data to CSV file
def save_to_csv(data, filename):
    if data:
        keys = data[0].keys()
        with open(filename, 'w', newline='', encoding='utf-8') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=keys)
            writer.writeheader()
            writer.writerows(data)
    else:
        print("No data to save.")

# Start the browser
service = Service(executable_path='/snap/bin/geckodriver')
browser = webdriver.Firefox(service=service)

# URL of the webpage to scrape
url = "https://www.nobroker.in/property/rent/mumbai/Mumbai%20Central?searchParam=W3sibGF0IjoxOC45NjkwMjQ3LCJsb24iOjcyLjgyMDUyOTIsInBsYWNlSWQiOiJDaElKTjNHeG9XN081enNSNF9YTE83R09HZjQiLCJwbGFjZU5hbWUiOiJNdW1iYWkgQ2VudHJhbCJ9XQ==&radius=2.0&sharedAccomodation=0&city=mumbai&locality=Mumbai%20Central"

# Open the webpage
browser.get(url)

# Wait for the property listings to load
wait = WebDriverWait(browser, 10)
property_listings = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'card')))

# List to hold the scraped data
data = []

# Extract information for each property
for property in property_listings:
    try:
        rent = property.find_element(By.CLASS_NAME, 'heading-6').text.strip()
        location = property.find_element(By.CLASS_NAME, 'component__locality--fNwDM').text.strip()
        apartment_type = property.find_element(By.XPATH, ".//span[contains(@class, 'heading-6')]").text.strip()
        area = property.find_element(By.XPATH, ".//div[contains(@class, 'font-semi-bold') and contains(@class, 'margin-5')]").text.strip()
        deposit = property.find_element(By.XPATH, ".//div[contains(@class, 'font-semi-bold') and contains(@class, 'margin-5')]").text.strip()
        furnishing = property.find_element(By.XPATH, ".//div[contains(@class, 'font-semi-bold') and not(contains(@class, 'margin-5'))]").text.strip()
        preferred_tenant = property.find_element(By.XPATH, ".//div[contains(@class, 'font-semi-bold') and not(contains(@class, 'margin-5'))]").text.strip()
        age_of_building = property.find_element(By.XPATH, ".//div[contains(@class, 'font-semi-bold') and not(contains(@class, 'margin-5'))]").text.strip()

        # Append the data to the list
        data.append({
            "Rent": rent,
            "Location": location,
            "Apartment Type": apartment_type,
            "Area": area,
            "Deposit": deposit,
            "Furnishing": furnishing,
            "Preferred Tenant": preferred_tenant,
            "Age of Building": age_of_building
        })
    except Exception as e:
        print(f"An error occurred: {e}")
        continue

# Save the data to CSV
save_to_csv(data, 'rental_properties.csv')

# Close the browser
browser.quit()


TimeoutException: Message: 


In [2]:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv

# Function to save data to CSV file
def save_to_csv(data, filename):
    if data:
        keys = data[0].keys()
        with open(filename, 'w', newline='', encoding='utf-8') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=keys)
            writer.writeheader()
            writer.writerows(data)
    else:
        print("No data to save.")

# Start the browser
service = Service(executable_path='/snap/bin/geckodriver')
browser = webdriver.Firefox(service=service)

# URL of the webpage to scrape
url = "https://www.nobroker.in/property/rent/mumbai/Mumbai%20Central?searchParam=W3sibGF0IjoxOC45NjkwMjQ3LCJsb24iOjcyLjgyMDUyOTIsInBsYWNlSWQiOiJDaElKTjNHeG9XN081enNSNF9YTE83R09HZjQiLCJwbGFjZU5hbWUiOiJNdW1iYWkgQ2VudHJhbCJ9XQ==&radius=2.0&sharedAccomodation=0&city=mumbai&locality=Mumbai%20Central"

# Open the webpage
browser.get(url)

# Wait for the property listings to load
wait = WebDriverWait(browser, 20)  # Increasing timeout to 20 seconds
try:
    property_listings = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'nb__2JHKO')))

    # List to hold the scraped data
    data = []

    # Extract information for each property
    for property in property_listings:
        try:
            rent = property.find_element(By.CLASS_NAME, 'nb__3CnI6').text.strip()
            location = property.find_element(By.CLASS_NAME, 'nb__35Ol7').text.strip()
            apartment_type = property.find_element(By.CLASS_NAME, 'nb__2xbus').text.strip()
            area = property.find_element(By.CLASS_NAME, 'nb__3oNyC').text.strip()
            deposit = property.find_element(By.CLASS_NAME, 'nb__2NPHR').text.strip()
            furnishing = property.find_element(By.CLASS_NAME, 'nb__2IMzv').text.strip()
            preferred_tenant = property.find_element(By.CLASS_NAME, 'nb__2dW7A').text.strip()
            age_of_building = property.find_element(By.CLASS_NAME, 'nb__2rRJH').text.strip()

            # Append the data to the list
            data.append({
                "Rent": rent,
                "Location": location,
                "Apartment Type": apartment_type,
                "Area": area,
                "Deposit": deposit,
                "Furnishing": furnishing,
                "Preferred Tenant": preferred_tenant,
                "Age of Building": age_of_building
            })
        except Exception as e:
            print(f"An error occurred: {e}")
            continue

    # Save the data to CSV
    save_to_csv(data, 'rental_properties.csv')

except TimeoutException:
    print("Timeout occurred while waiting for property listings to load")

finally:
    # Close the browser
    browser.quit()


NameError: name 'TimeoutException' is not defined

In [None]:
from selenium import webdriver
import pandas as pd
import time
import random
import progressbar


def scrape_nobroker_rentals(url_pattern, max_pages):
    titles = []
    addresses = []
    rents = []
    sizes = []
    deposits = []
    furnishings = []
    property_ages = []
    available_fors = []
    immediate_possessions = []

    bar = progressbar.ProgressBar(maxval=max_pages)
    bar.start()

    driver = webdriver.Chrome()  # Assuming you are using Chrome WebDriver, adjust if necessary
    driver.maximize_window()

    for page in range(1, max_pages + 1):
        bar.update(page)
        link = url_pattern.format(page)
        driver.get(link)

        # Wait for the page to load (adjust waiting time according to your network speed)
        time.sleep(random.uniform(2, 4))

        # Extract data from the current page
        house_containers = driver.find_elements_by_class_name("card")

        if not house_containers:
            break

        for container in house_containers:
            try:
                rent = container.find_element_by_xpath('.//h3[3]/span').text.replace(',', '')
                rents.append(int(rent))
            except:
                rents.append('-')

            try:
                size = int(container.find_element_by_xpath('.//h3[1]/span').text.replace(',', ''))
                sizes.append(size)
            except:
                sizes.append('-')

            try:
                deposit = int(container.find_element_by_xpath('.//h3[2]/span').text.replace(',', ''))
                deposits.append(deposit)
            except:
                deposits.append('-')

            titles.append(container.find_element_by_class_name('card-header-title').find_element_by_tag_name('h2').text.strip())
            addresses.append(container.find_element_by_class_name('card-header-title').find_element_by_tag_name('h5').text.strip())
            furnishing = container.find_element_by_class_name('detail-summary').find_elements_by_tag_name('h5')[0].text.strip()
            furnishings.append(furnishing)
            property_age = container.find_element_by_class_name('detail-summary').find_elements_by_tag_name('h5')[1].text.strip()
            property_ages.append(property_age)
            available_for = container.find_element_by_class_name('detail-summary').find_elements_by_tag_name('h5')[2].text.strip()
            available_fors.append(available_for)
            immediate_possession = container.find_element_by_class_name('detail-summary').find_elements_by_tag_name('h5')[3].text.strip()
            immediate_possessions.append(immediate_possession)

        time.sleep(random.uniform(1, 2))

    bar.finish()
    print("Successfully scraped {} pages containing {} properties.".format(page, len(titles)))

    driver.quit()

    return pd.DataFrame({
        'Title': titles,
        'Address': addresses,
        'Rent(Rs)': rents,
        'Deposit(Rs)': deposits,
        'Size(Acres)': sizes,
        'Furnishing': furnishings,
        'Property age': property_ages,
        'Available for': available_fors,
        'Immediate possession': immediate_possessions
    })


# Define the URL pattern and maximum number of pages to scrape
url_pattern = "https://www.nobroker.in/property/rent/chennai/Chennai/?searchParam=W3sibGF0IjoxMy4wNDM3NjEyODI5MTkyLCJsb24iOjgwLjIwMDA2ODUxNjk2


In [3]:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import random
import progressbar

def scrape_nobroker_rentals(url, max_pages):
    titles = []
    addresses = []
    rents = []
    sizes = []
    deposits = []
    furnishings = []
    property_ages = []
    available_fors = []
    immediate_possessions = []

    bar = progressbar.ProgressBar(maxval=max_pages)
    bar.start()

    service = Service(executable_path='/snap/bin/geckodriver')  # Adjust geckodriver path as per your system
    browser = webdriver.Firefox(service=service)

    for page in range(1, max_pages + 1):
        bar.update(page)
        link = f"{url}&pageNo={page}"
        browser.get(link)

        time.sleep(random.uniform(2, 4))  # Add some random delay to simulate human-like behavior

        # Extract data from the current page
        house_containers = browser.find_elements(By.CLASS_NAME, "card")

        if not house_containers:
            break

        for container in house_containers:
            try:
                rent = container.find_element(By.XPATH, './/h3[3]/span').text.replace(',', '')
                rents.append(int(rent))
            except:
                rents.append('-')

            try:
                size = int(container.find_element(By.XPATH, './/h3[1]/span').text.replace(',', ''))
                sizes.append(size)
            except:
                sizes.append('-')

            try:
                deposit = int(container.find_element(By.XPATH, './/h3[2]/span').text.replace(',', ''))
                deposits.append(deposit)
            except:
                deposits.append('-')

            titles.append(container.find_element(By.CLASS_NAME, 'card-header-title').find_element(By.TAG_NAME, 'h2').text.strip())
            addresses.append(container.find_element(By.CLASS_NAME, 'card-header-title').find_element(By.TAG_NAME, 'h5').text.strip())
            furnishing = container.find_element(By.CLASS_NAME, 'detail-summary').find_elements(By.TAG_NAME, 'h5')[0].text.strip()
            furnishings.append(furnishing)
            property_age = container.find_element(By.CLASS_NAME, 'detail-summary').find_elements(By.TAG_NAME, 'h5')[1].text.strip()
            property_ages.append(property_age)
            available_for = container.find_element(By.CLASS_NAME, 'detail-summary').find_elements(By.TAG_NAME, 'h5')[2].text.strip()
            available_fors.append(available_for)
            immediate_possession = container.find_element(By.CLASS_NAME, 'detail-summary').find_elements(By.TAG_NAME, 'h5')[3].text.strip()
            immediate_possessions.append(immediate_possession)

        time.sleep(random.uniform(1, 2))

    bar.finish()
    print("Successfully scraped {} pages containing {} properties.".format(page, len(titles)))

    browser.quit()

    return pd.DataFrame({
        'Title': titles,
        'Address': addresses,
        'Rent(Rs)': rents,
        'Deposit(Rs)': deposits,
        'Size(Acres)': sizes,
        'Furnishing': furnishings,
        'Property age': property_ages,
        'Available for': available_fors,
        'Immediate possession': immediate_possessions
    })


# Define the URL and maximum number of pages to scrape
url = "https://www.nobroker.in/property/rent/mumbai/Mumbai%20Central?searchParam=W3sibGF0IjoxOC45NjkwMjQ3LCJsb24iOjcyLjgyMDUyOTIsInBsYWNlSWQiOiJDaElKTjNHeG9XN081enNSNF9YTE83R09HZjQiLCJwbGFjZU5hbWUiOiJNdW1iYWkgQ2VudHJhbCJ9XQ==&radius=2.0&sharedAccomodation=0&city=mumbai&locality=Mumbai%20Central"
max_pages = 1000

# Scraping rental properties
mumbai_rentals = scrape_nobroker_rentals(url, max_pages)

# Save data to CSV
mumbai_rentals.to_csv('mumbai_rent.csv', index=False)


ModuleNotFoundError: No module named 'progressbar'

In [4]:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
import pandas as pd
import time
import random

def scrape_nobroker_rentals(url, max_pages):
    titles = []
    addresses = []
    rents = []
    sizes = []
    deposits = []
    furnishings = []
    property_ages = []
    available_fors = []
    immediate_possessions = []

    service = Service(executable_path='/snap/bin/geckodriver')
    browser = webdriver.Firefox(service=service)

    for page in range(1, max_pages + 1):
        link = f"{url}&pageNo={page}"
        browser.get(link)

        time.sleep(random.uniform(2, 4))

        # Extract data from the current page
        house_containers = browser.find_elements(By.CLASS_NAME, "card")

        if not house_containers:
            break

        for container in house_containers:
            try:
                rent = container.find_element(By.XPATH, './/h3[3]/span').text.replace(',', '')
                rents.append(int(rent))
            except:
                rents.append('-')

            try:
                size = int(container.find_element(By.XPATH, './/h3[1]/span').text.replace(',', ''))
                sizes.append(size)
            except:
                sizes.append('-')

            try:
                deposit = int(container.find_element(By.XPATH, './/h3[2]/span').text.replace(',', ''))
                deposits.append(deposit)
            except:
                deposits.append('-')

            titles.append(container.find_element(By.CLASS_NAME, 'card-header-title').find_element(By.TAG_NAME, 'h2').text.strip())
            addresses.append(container.find_element(By.CLASS_NAME, 'card-header-title').find_element(By.TAG_NAME, 'h5').text.strip())
            furnishing = container.find_element(By.CLASS_NAME, 'detail-summary').find_elements(By.TAG_NAME, 'h5')[0].text.strip()
            furnishings.append(furnishing)
            property_age = container.find_element(By.CLASS_NAME, 'detail-summary').find_elements(By.TAG_NAME, 'h5')[1].text.strip()
            property_ages.append(property_age)
            available_for = container.find_element(By.CLASS_NAME, 'detail-summary').find_elements(By.TAG_NAME, 'h5')[2].text.strip()
            available_fors.append(available_for)
            immediate_possession = container.find_element(By.CLASS_NAME, 'detail-summary').find_elements(By.TAG_NAME, 'h5')[3].text.strip()
            immediate_possessions.append(immediate_possession)

        time.sleep(random.uniform(1, 2))

    browser.quit()

    return pd.DataFrame({
        'Title': titles,
        'Address': addresses,
        'Rent(Rs)': rents,
        'Deposit(Rs)': deposits,
        'Size(Acres)': sizes,
        'Furnishing': furnishings,
        'Property age': property_ages,
        'Available for': available_fors,
        'Immediate possession': immediate_possessions
    })


# Define the URL and maximum number of pages to scrape
url = "https://www.nobroker.in/property/rent/mumbai/Mumbai%20Central?searchParam=W3sibGF0IjoxOC45NjkwMjQ3LCJsb24iOjcyLjgyMDUyOTIsInBsYWNlSWQiOiJDaElKTjNHeG9XN081enNSNF9YTE83R09HZjQiLCJwbGFjZU5hbWUiOiJNdW1iYWkgQ2VudHJhbCJ9XQ==&radius=2.0&sharedAccomodation=0&city=mumbai&locality=Mumbai%20Central"
max_pages = 1000

# Scraping rental properties
mumbai_rentals = scrape_nobroker_rentals(url, max_pages)

# Save data to CSV
mumbai_rentals.to_csv('mumbai_rent.csv', index=False)


In [6]:

from bs4 import BeautifulSoup
import requests
import pandas as pd
import time

# Creating time string to give fie name
timestr = time.strftime("%Y%m%d-%H%M%S")

# Creating empty list
BHK = []
Area = []
Latitude = []
Longitude = []
Size = []
Deposit = []
Rent = []
Type = []
Age = []
For = []
Possesion = []
Link = []


# Function to scrape
def scrape_NoBroker(n):
    print(f'Exporting {n} rows!!!')

    try:
        for page in range(int(n / 10)):

            try:
                print(f'{(page + 1) * 10} rows added!!!')

                # Requesting URL
                url = requests.get(
                    'https://www.nobroker.in/property/rent/bangalore/Bangalore/?searchParam=W3sibGF0IjoxMi45NzE1OTg3LCJsb24iOjc3LjU5NDU2MjcsInBsYWNlSWQiOiJDaElKYlU2MHlYQVdyanNSNEU5LVVlakQzX2ciLCJwbGFjZU5hbWUiOiJCYW5nYWxvcmUifV0=&sharedAccomodation=0&orderBy=nbRank,desc&radius=2&traffic=true&travelTime=30&propertyType=rent&pageNo=' + str(
                        page)).text

                # Converting from HTML tag to BeautifulSoup object
                soup = BeautifulSoup(url, 'lxml')

                # Finding all the div tag wich contains all the info
                houses = soup.find_all('div', class_='card')

                # Looping through each div tag to get individual content
                for house in houses:
                    BHK.append(house.find('a', class_='card-link-detail')['title'][:1])
                    Area_raw = house.find('a', class_='card-link-detail')['title']
                    if ',' in Area_raw:
                        Area.append(Area_raw.split(',')[-1])
                    else:
                        Area.append(Area_raw.split('in', 1)[-1])
                    Latitude.append(house.find('meta', itemprop='latitude')['content'])
                    Longitude.append(house.find('meta', itemprop='longitude')['content'])
                    Size.append(house.find_all('meta', itemprop='value')[0]['content'])
                    Deposit.append(house.find_all('meta', itemprop='value')[1]['content'])
                    Rent.append(house.find_all('meta', itemprop='value')[2]['content'])
                    Type.append(house.find_all('h5', class_="semi-bold")[0].text)
                    Age.append(house.find_all('h5', class_="semi-bold")[1].text)
                    For.append(house.find_all('h5', class_="semi-bold")[2].text.replace('\n', ''))
                    Possesion.append(house.find_all('h5', class_="semi-bold")[3].text.replace('\n', ''))
                    Link.append(house.find('a', class_='card-link-detail')['href'])
            except:
                print(f'Row number {(page + 1) * 10} failed. Trying next one!!!')
    except:
        pass

    # Creating DataFrame and storing data
    df = pd.DataFrame(list(zip(BHK, Area, Latitude, Longitude, Size, Deposit, Rent, Type, Age, For, Possesion, Link)),
                      columns=['BHK', 'Address', 'Latitude', 'Longitude', 'Size(Acres)', 'Deposit(Rs)', 'Rent(Rs)',
                               'Furnishing', 'Property Age', 'Available For', ' Immediate Possesion', 'Link'])

    # Exporting DataFrame in form of CSV file
    File_name = "House_Data_" + timestr + ".csv"
    df.to_csv(File_name, index=False)
    print("File Exported Sucessfully!!!!")

# Calling fuction to export 10000 rows
scrape_NoBroker(10)



Exporting 10 rows!!!
10 rows added!!!
Row number 10 failed. Trying next one!!!
File Exported Sucessfully!!!!


In [7]:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Creating time string to give file name
timestr = time.strftime("%Y%m%d-%H%M%S")

# Function to scrape
def scrape_NoBroker(n):
    print(f'Exporting {n} rows!!!')

    BHK = []
    Area = []
    Latitude = []
    Longitude = []
    Size = []
    Deposit = []
    Rent = []
    Type = []
    Age = []
    For = []
    Possesion = []
    Link = []

    # Start the browser
    service = Service(executable_path='/snap/bin/geckodriver')
    browser = webdriver.Firefox(service=service)

    try:
        for page in range(int(n / 10)):

            try:
                print(f'{(page + 1) * 10} rows added!!!')

                # Requesting URL
                url = f'https://www.nobroker.in/property/rent/bangalore/Bangalore/?searchParam=W3sibGF0IjoxMi45NzE1OTg3LCJsb24iOjc3LjU5NDU2MjcsInBsYWNlSWQiOiJDaElKYlU2MHlYQVdyanNSNEU5LVVlakQzX2ciLCJwbGFjZU5hbWUiOiJCYW5nYWxvcmUifV0=&sharedAccomodation=0&orderBy=nbRank,desc&radius=2&traffic=true&travelTime=30&propertyType=rent&pageNo={page}'
                browser.get(url)

                # Extract data from the current page
                house_containers = browser.find_elements(By.CLASS_NAME, "card")

                for house in house_containers:
                    BHK.append(house.find_element(By.CLASS_NAME, 'card-link-detail').get_attribute('title')[:1])
                    Area_raw = house.find_element(By.CLASS_NAME, 'card-link-detail').get_attribute('title')
                    if ',' in Area_raw:
                        Area.append(Area_raw.split(',')[-1])
                    else:
                        Area.append(Area_raw.split('in', 1)[-1])
                    Latitude.append(house.find_element(By.CSS_SELECTOR, 'meta[itemprop="latitude"]').get_attribute('content'))
                    Longitude.append(house.find_element(By.CSS_SELECTOR, 'meta[itemprop="longitude"]').get_attribute('content'))
                    Size.append(house.find_elements(By.CSS_SELECTOR, 'meta[itemprop="value"]')[0].get_attribute('content'))
                    Deposit.append(house.find_elements(By.CSS_SELECTOR, 'meta[itemprop="value"]')[1].get_attribute('content'))
                    Rent.append(house.find_elements(By.CSS_SELECTOR, 'meta[itemprop="value"]')[2].get_attribute('content'))
                    Type.append(house.find_elements(By.CLASS_NAME, "semi-bold")[0].text)
                    Age.append(house.find_elements(By.CLASS_NAME, "semi-bold")[1].text)
                    For.append(house.find_elements(By.CLASS_NAME, "semi-bold")[2].text.replace('\n', ''))
                    Possesion.append(house.find_elements(By.CLASS_NAME, "semi-bold")[3].text.replace('\n', ''))
                    Link.append(house.find_element(By.CLASS_NAME, 'card-link-detail').get_attribute('href'))
            except Exception as e:
                print(f'Row number {(page + 1) * 10} failed. Trying next one!!!')
                print(e)
    except Exception as e:
        print(f"Error: {e}")

    # Creating DataFrame and storing data
    df = pd.DataFrame(list(zip(BHK, Area, Latitude, Longitude, Size, Deposit, Rent, Type, Age, For, Possesion, Link)),
                      columns=['BHK', 'Address', 'Latitude', 'Longitude', 'Size(Acres)', 'Deposit(Rs)', 'Rent(Rs)',
                               'Furnishing', 'Property Age', 'Available For', ' Immediate Possesion', 'Link'])

    # Exporting DataFrame in form of CSV file
    File_name = "House_Data_" + timestr + ".csv"
    df.to_csv(File_name, index=False)
    print("File Exported Successfully!!!!")

    # Close the browser
    browser.quit()

# Calling function to export 10000 rows
scrape_NoBroker(10)


Exporting 10 rows!!!
10 rows added!!!
File Exported Successfully!!!!


In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Creating time string to give file name
timestr = time.strftime("%Y%m%d-%H%M%S")

# Function to scrape
def scrape_NoBroker(url):
    print(f'Starting scraping from URL: {url}')

    BHK = []
    Area = []
    Latitude = []
    Longitude = []
    Size = []
    Deposit = []
    Rent = []
    Type = []
    Age = []
    For = []
    Possesion = []
    Link = []

    # Start the browser
    driver = webdriver.Firefox()

    try:
        driver.get(url)
        time.sleep(5)  # Let the page load

        # Extract data from the page
        houses = driver.find_elements(By.CLASS_NAME, "card")

        for house in houses:
            BHK.append(house.find_element(By.CLASS_NAME, 'card-title').text[0])
            Area.append(house.find_element(By.CLASS_NAME, 'card-title').text.split(',')[-1])
            Latitude.append(house.find_element(By.CSS_SELECTOR, 'meta[itemprop="latitude"]').get_attribute('content'))
            Longitude.append(house.find_element(By.CSS_SELECTOR, 'meta[itemprop="longitude"]').get_attribute('content'))
            Size.append(house.find_elements(By.CSS_SELECTOR, 'meta[itemprop="value"]')[0].get_attribute('content'))
            Deposit.append(house.find_elements(By.CSS_SELECTOR, 'meta[itemprop="value"]')[1].get_attribute('content'))
            Rent.append(house.find_elements(By.CSS_SELECTOR, 'meta[itemprop="value"]')[2].get_attribute('content'))
            Type.append(house.find_elements(By.CLASS_NAME, "detail-summary")[0].text)
            Age.append(house.find_elements(By.CLASS_NAME, "detail-summary")[1].text)
            For.append(house.find_elements(By.CLASS_NAME, "detail-summary")[2].text)
            Possesion.append(house.find_elements(By.CLASS_NAME, "detail-summary")[3].text)
            Link.append(house.find_element(By.CLASS_NAME, 'card-title').get_attribute('href'))
    except Exception as e:
        print(f"Error: {e}")
    finally:
        # Creating DataFrame and storing data
        df = pd.DataFrame(list(zip(BHK, Area, Latitude, Longitude, Size, Deposit, Rent, Type, Age, For, Possesion, Link)),
                          columns=['BHK', 'Address', 'Latitude', 'Longitude', 'Size(Acres)', 'Deposit(Rs)', 'Rent(Rs)',
                                   'Furnishing', 'Property Age', 'Available For', ' Immediate Possesion', 'Link'])

        # Exporting DataFrame in form of CSV file
        File_name = "House_Data_" + timestr + ".csv"
        df.to_csv(File_name, index=False)
        print("File Exported Successfully!!!!")

        # Close the browser
        driver.quit()

# URL to scrape
url = "https://www.nobroker.in/flats-for-rent-in-pune_pune"

# Call the function to scrape data
scrape_NoBroker(url)


Starting scraping from URL: https://www.nobroker.in/flats-for-rent-in-pune_pune


InvalidArgumentException: Message: binary is not a Firefox executable


In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Creating time string to give file name
timestr = time.strftime("%Y%m%d-%H%M%S")

# Function to scrape
def scrape_NoBroker(url):
    print(f'Starting scraping from URL: {url}')

    BHK = []
    Area = []
    Latitude = []
    Longitude = []
    Size = []
    Deposit = []
    Rent = []
    Type = []
    Age = []
    For = []
    Possesion = []
    Link = []

    # Start the browser
    driver = webdriver.Firefox(executable_path='/path/to/geckodriver')

    try:
        driver.get(url)
        time.sleep(5)  # Let the page load

        # Extract data from the page
        houses = driver.find_elements(By.CLASS_NAME, "card")

        for house in houses:
            BHK.append(house.find_element(By.CLASS_NAME, 'card-title').text[0])
            Area.append(house.find_element(By.CLASS_NAME, 'card-title').text.split(',')[-1])
            Latitude.append(house.find_element(By.CSS_SELECTOR, 'meta[itemprop="latitude"]').get_attribute('content'))
            Longitude.append(house.find_element(By.CSS_SELECTOR, 'meta[itemprop="longitude"]').get_attribute('content'))
            Size.append(house.find_elements(By.CSS_SELECTOR, 'meta[itemprop="value"]')[0].get_attribute('content'))
            Deposit.append(house.find_elements(By.CSS_SELECTOR, 'meta[itemprop="value"]')[1].get_attribute('content'))
            Rent.append(house.find_elements(By.CSS_SELECTOR, 'meta[itemprop="value"]')[2].get_attribute('content'))
            Type.append(house.find_elements(By.CLASS_NAME, "detail-summary")[0].text)
            Age.append(house.find_elements(By.CLASS_NAME, "detail-summary")[1].text)
            For.append(house.find_elements(By.CLASS_NAME, "detail-summary")[2].text)
            Possesion.append(house.find_elements(By.CLASS_NAME, "detail-summary")[3].text)
            Link.append(house.find_element(By.CLASS_NAME, 'card-title').get_attribute('href'))
    except Exception as e:
        print(f"Error: {e}")
    finally:
        # Creating DataFrame and storing data
        df = pd.DataFrame(list(zip(BHK, Area, Latitude, Longitude, Size, Deposit, Rent, Type, Age, For, Possesion, Link)),
                          columns=['BHK', 'Address', 'Latitude', 'Longitude', 'Size(Acres)', 'Deposit(Rs)', 'Rent(Rs)',
                                   'Furnishing', 'Property Age', 'Available For', ' Immediate Possesion', 'Link'])

        # Exporting DataFrame in form of CSV file
        File_name = "House_Data_" + timestr + ".csv"
        df.to_csv(File_name, index=False)
        print("File Exported Successfully!!!!")

        # Close the browser
        driver.quit()

# URL to scrape
url = "https://www.nobroker.in/flats-for-rent-in-pune_pune"

# Call the function to scrape data
scrape_NoBroker(url)


Starting scraping from URL: https://www.nobroker.in/flats-for-rent-in-pune_pune


TypeError: WebDriver.__init__() got an unexpected keyword argument 'executable_path'

Starting scraping from URL: https://www.nobroker.in/flats-for-rent-in-pune_pune


NoSuchDriverException: Message: Unable to obtain driver for firefox; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors/driver_location
