In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [25]:
properties = []

def get_address(soup):
    try:
        # Extract address string
        address = soup.find('p', attrs={'class':'styles-module_map__title__M2mBC'})
        address_value = address.text
        address_string = address_value.strip()
        
        # Split address into parts
        parts = [part.strip() for part in address_string.split(',')]
        
        # Initialize fields
        property_data = {
            "property_name": "",
            "property_locality": "",
            "location": "",
            "property_city": "Dubai"  # Default
        }
        
        # Parse based on number of parts
        if len(parts) >= 4:
            # Full address: e.g., "Standpoint Tower 1, Standpoint Towers, Downtown Dubai, Dubai"
            property_data["property_name"] = parts[0]
            property_data["property_locality"] = parts[1]
            property_data["location"] = parts[2]
            property_data["property_city"] = parts[3]
        elif len(parts) == 3:
            # Missing locality: e.g., "Tower A, Business Bay, Dubai"
            property_data["property_name"] = parts[0]
            property_data["location"] = parts[1]
            property_data["property_city"] = parts[2]
        elif len(parts) == 2:
            # Missing locality and location: e.g., "Villa Serenity, Dubai"
            property_data["property_name"] = parts[0]
            property_data["property_city"] = parts[1]
        else:
            # Only property name
            property_data["property_name"] = parts[0]
        
        return property_data
    
    except AttributeError:
        return {
            "property_name": "",
            "property_locality": "",
            "location": "",
            "property_city": "Dubai"
        }

def get_price(soup):
    try:
        price = soup.find('span', attrs={'data-testid': 'property-price-value', 'class':'styles_desktop_price__value__JLKWF'}).text.strip()
    except AttributeError:
        price = ""
    return price

def get_feature(soup):
    try:
        feature = soup.find('h1', attrs={'class':'styles_desktop_title__j0uNx'}).text.strip()
    except AttributeError:
        feature = ""
    return feature

def get_property_type(soup):
    try:
        property_type = soup.find('p', attrs={'data-testid': 'property-details-type', 'class':'styles_desktop_list__value__uIdMl'}).text.strip()
    except AttributeError:
        property_type = ""
    return property_type

def get_bedrooms(soup):
    try:
        bedrooms = soup.find('p', attrs={'data-testid': 'property-details-bedrooms', 'class':'styles_desktop_list__value__uIdMl'}).text.strip()
    except AttributeError:
        bedrooms = ""
    return bedrooms

def get_bathrooms(soup):
    try:
        bathrooms = soup.find('p', attrs={'data-testid': 'property-details-bathrooms', 'class':'styles_desktop_list__value__uIdMl'}).text.strip()
    except AttributeError:
        bathrooms = ""
    return bathrooms

def get_property_size(soup):
    try:
        property_size = soup.find('p', attrs={'data-testid': 'property-details-size', 'class':'styles_desktop_list__value__uIdMl'}).text.strip()
    except AttributeError:
        property_size = ""
    return property_size


In [26]:
if __name__ == '__main__':
    # User agent
    HEADERS = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US, en;q=0.5'
    }

    # Webpage URL
    URL = "https://www.propertyfinder.ae/en/search?l=1&c=1&fu=0&ob=mr"

    # HTTP Request
    webpage = requests.get(URL, headers=HEADERS)

    # Soup Object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")

    # Fetch links as List of Tag Objects
    links = soup.find_all("a", attrs={'class':'property-card-module_property-card__link__L6AKb'})

    # Store the links
    links_list = []

    # Loop for extracting links from Tag Objects
    for link in links:
        links_list.append(link.get('href'))

    # Initialize dictionary for data
    d = {
        "property_name": [],
        "property_locality": [],
        "location": [],
        "property_city": [],
        "price": [],
        "feature": [],
        "property_type": [],
        "property_size": [],
        "bedrooms": [],
        "bathrooms": []
    }
    
    # Loop for extracting product details from each link
    for link in links_list:
        new_webpage = requests.get(link, headers=HEADERS)
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")

        # Get address fields
        address_data = get_address(new_soup)
        
        # Append data to dictionary
        d['property_name'].append(address_data['property_name'])
        d['property_locality'].append(address_data['property_locality'])
        d['location'].append(address_data['location'])
        d['property_city'].append(address_data['property_city'])
        d['price'].append(get_price(new_soup))
        d['feature'].append(get_feature(new_soup))
        d['property_type'].append(get_property_type(new_soup))
        d['bedrooms'].append(get_bedrooms(new_soup))
        d['bathrooms'].append(get_bathrooms(new_soup))
        d['property_size'].append(get_property_size(new_soup))

    # Create DataFrame and save to CSV
    property_df = pd.DataFrame.from_dict(d)
    property_df.to_csv("propertyfinder_data.csv", header=True, index=False)
    print("Data saved to propertyfinder_data.csv")

Data saved to propertyfinder_data.csv


In [28]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import time
import re

def get_address(soup):
    try:
        # Extract address string
        address = soup.find('p', attrs={'class':'styles-module_map__title__M2mBC'})
        address_value = address.text
        address_string = address_value.strip()
        
        # Split address into parts
        parts = [part.strip() for part in address_string.split(',')]
        
        # Initialize fields
        property_data = {
            "property_name": "",
            "property_locality": "",
            "location": "",
            "property_city": "Dubai"  # Default
        }
        
        # Parse based on number of parts
        if len(parts) >= 4:
            # Full address: e.g., "Standpoint Tower 1, Standpoint Towers, Downtown Dubai, Dubai"
            property_data["property_name"] = parts[0]
            property_data["property_locality"] = parts[1]
            property_data["location"] = parts[2]
            property_data["property_city"] = parts[3]
        elif len(parts) == 3:
            # Missing locality: e.g., "Tower A, Business Bay, Dubai"
            property_data["property_name"] = parts[0]
            property_data["location"] = parts[1]
            property_data["property_city"] = parts[2]
        elif len(parts) == 2:
            # Missing locality and location: e.g., "Villa Serenity, Dubai"
            property_data["property_name"] = parts[0]
            property_data["property_city"] = parts[1]
        else:
            # Only property name
            property_data["property_name"] = parts[0]
        
        return property_data
    
    except AttributeError:
        return {
            "property_name": "",
            "property_locality": "",
            "location": "",
            "property_city": "Dubai"
        }

def get_price(soup):
    try:
        price = soup.find('span', attrs={'data-testid': 'property-price-value', 'class':'styles_desktop_price__value__JLKWF'}).text.strip()
    except AttributeError:
        price = ""
    return price

def get_feature(soup):
    try:
        feature = soup.find('h1', attrs={'class':'styles_desktop_title__j0uNx'}).text.strip()
    except AttributeError:
        feature = ""
    return feature

def get_property_type(soup):
    try:
        property_type = soup.find('p', attrs={'data-testid': 'property-details-type', 'class':'styles_desktop_list__value__uIdMl'}).text.strip()
    except AttributeError:
        property_type = ""
    return property_type

def get_bedrooms(soup):
    try:
        bedrooms = soup.find('p', attrs={'data-testid': 'property-details-bedrooms', 'class':'styles_desktop_list__value__uIdMl'}).text.strip()
    except AttributeError:
        bedrooms = ""
    return bedrooms

def get_bathrooms(soup):
    try:
        bathrooms = soup.find('p', attrs={'data-testid': 'property-details-bathrooms', 'class':'styles_desktop_list__value__uIdMl'}).text.strip()
    except AttributeError:
        bathrooms = ""
    return bathrooms

def get_property_size(soup):
    try:
        property_size = soup.find('p', attrs={'data-testid': 'property-details-size', 'class':'styles_desktop_list__value__uIdMl'}).text.strip()
        
        # Initialize defaults
        sqft = ""
        sqm = ""
        
        # Split by '/' and clean parts
        if '/' in property_size:
            parts = [part.strip() for part in property_size.split('/')]
            # Extract sqft (first part)
            sqft_match = re.search(r'[\d,]+', parts[0])
            if sqft_match:
                sqft = sqft_match.group().replace(',', '')
            # Extract sqm (second part)
            if len(parts) > 1:
                sqm_match = re.search(r'[\d,]+', parts[1])
                if sqm_match:
                    sqm = sqm_match.group().replace(',', '')
        else:
            # Handle case with only sqft
            sqft_match = re.search(r'[\d,]+', property_size)
            if sqft_match:
                sqft = sqft_match.group().replace(',', '')
        
        return {"property_size_in_sqft": sqft, "property_size_in_sqm": sqm}
    
    except AttributeError:
        return {"property_size_in_sqft": "", "property_size_in_sqm": ""}

if __name__ == '__main__':
    # User agent
    HEADERS = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US, en;q=0.5'
    }

    # Initialize dictionary for data
    d = {
        "property_name": [],
        "property_locality": [],
        "location": [],
        "property_city": [],
        "price": [],
        "feature": [],
        "property_type": [],
        "property_size_in_sqft": [],
        "property_size_in_sqm": [],
        "bedrooms": [],
        "bathrooms": []
    }

    # Loop through all 10 pages
    for page in range(1, 11):  # Pages 1 to 10
        # Webpage URL with page parameter
        URL = f"https://www.propertyfinder.ae/en/search?l=1&c=1&fu=0&ob=mr&page={page}"
        print(f"Scraping page {page}...")

        try:
            # HTTP Request
            webpage = requests.get(URL, headers=HEADERS)
            webpage.raise_for_status()  # Check for request errors

            # Soup Object containing all data
            soup = BeautifulSoup(webpage.content, "html.parser")

            # Fetch links as List of Tag Objects
            links = soup.find_all("a", attrs={'class':'property-card-module_property-card__link__L6AKb'})

            # Store the links
            links_list = []

            # Loop for extracting links from Tag Objects
            for link in links:
                href = link.get('href')
                # Ensure the link is complete
                if href.startswith('/'):
                    href = f"https://www.propertyfinder.ae{href}"
                links_list.append(href)

            # Loop for extracting product details from each link
            for link in links_list:
                try:
                    new_webpage = requests.get(link, headers=HEADERS)
                    new_webpage.raise_for_status()
                    new_soup = BeautifulSoup(new_webpage.content, "html.parser")

                    # Get address fields
                    address_data = get_address(new_soup)
                    
                    # Get property size fields
                    size_data = get_property_size(new_soup)
                    
                    # Append data to dictionary
                    d['property_name'].append(address_data['property_name'])
                    d['property_locality'].append(address_data['property_locality'])
                    d['location'].append(address_data['location'])
                    d['property_city'].append(address_data['property_city'])
                    d['price'].append(get_price(new_soup))
                    d['feature'].append(get_feature(new_soup))
                    d['property_type'].append(get_property_type(new_soup))
                    d['property_size_in_sqft'].append(size_data['property_size_in_sqft'])
                    d['property_size_in_sqm'].append(size_data['property_size_in_sqm'])
                    d['bedrooms'].append(get_bedrooms(new_soup))
                    d['bathrooms'].append(get_bathrooms(new_soup))

                except requests.RequestException as e:
                    print(f"Error fetching property page {link}: {e}")
                    continue

                time.sleep(1)  # Polite delay between property requests

        except requests.RequestException as e:
            print(f"Error fetching page {page}: {e}")
            continue

        time.sleep(2)  # Polite delay between page requests

    # Create DataFrame and save to CSV
    property_df = pd.DataFrame.from_dict(d)
    property_df.to_csv("propertyfinder_data_size.csv", header=True, index=False)
    print("Data saved to propertyfinder_data_size.csv")

Scraping page 1...
Error fetching property page https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-jumeirah-village-circle-five-at-jumeirah-village-circle-13932767.html: 500 Server Error: Internal Server Error for url: https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-jumeirah-village-circle-five-at-jumeirah-village-circle-13932767.html
Scraping page 2...
Error fetching property page https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-dubai-marina-damac-heights-13906019.html: 500 Server Error: Internal Server Error for url: https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-dubai-marina-damac-heights-13906019.html
Error fetching property page https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-jumeirah-village-circle-tower-108-13896575.html: 500 Server Error: Internal Server Error for url: https://www.propertyfinder.ae/en/plp/buy/apartment-for-sale-dubai-jumeirah-village-circle-tower-108-13896575.html
Error fetch