In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

from tqdm.notebook import tqdm

In [2]:
def get_property_data(url):
    try:
        response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
        text = response.text
        soup = BeautifulSoup(text, 'lxml')

        price_list = soup.find_all("span", class_='f343d9ce')
        add_small_list = soup.find_all('div', class_='_7afabd84')
        type_list = soup.find_all('div', class_='_9a4e3964')
        desc_list = soup.find_all('h2', class_='_7f17f34f')
        beds_list = soup.find_all('span', class_='b6a29bc0') # 72 instances
        href_list = soup.find_all('a', class_='_287661cb')

        prices = []
        for price_s in price_list:
            price = float(str(price_s).split(' ')[2].replace('class="f343d9ce">', '').replace('</span>', '').replace(',', ''))
            prices.append(price)

        blocks = []
        areas = []
        districts = []

        for loc in add_small_list:
            block, area, district = str(loc).split('>')[1].replace('</div', '').strip().split(',')
            block = block.strip()
            area = area.strip()
            district = district.strip()

            blocks.append(block)
            areas.append(area)
            districts.append(district)

        types = []

        for type in type_list:
            type = str(type).split('>')[1].replace('</div', '').strip()

            types.append(type)

        descriptions = []

        for desc in desc_list:
            desc = str(desc).split('>')[1].replace('</h2', '').strip()

            descriptions.append(desc)

        hrefs = []

        for href in href_list:
            link = str(href).split('"')[5]
            hrefs.append(link)

        i = 0
        beds = []
        while i < len(beds_list):
            bed = str(beds_list[i]).replace('<span aria-label="Beds" class="b6a29bc0">', '').replace('</span>', '')
            beds.append(bed)

            i += 3

        i = 1
        baths = []
        while i < len(beds_list):
            bath = str(beds_list[i]).replace('<span aria-label="Baths" class="b6a29bc0">', '').replace('</span>', '')
            baths.append(bath)

            i += 3

        i = 2
        dims = []
        while i < len(beds_list):
            dim = str(beds_list[i]).replace('<span aria-label="Area" class="b6a29bc0">', '').replace('</span>', '').replace('<span>', '')
            dims.append(dim)

            i += 3


        df = pd.DataFrame(list(zip(dims, beds, baths, types, districts, areas, 
                                   blocks, descriptions, hrefs, prices)), 
                          columns=['dimensions', 'num_beds', 'num_baths', 'house_type', 'city', 
                                   'location', 'area_block', 'description', 'url', 'price'])

        return df
    except:
        print("Error: ", url)

In [3]:
dfs = []

for i in tqdm(range(45)):
    if i == 0:
        continue
    if i == 1:
        url = 'https://www.bproperty.com/en/dhaka/apartments-for-rent-in-dhanmondi/?occupancy_status=vacant'
    else:
        url = f'https://www.bproperty.com/en/dhaka/apartments-for-rent-in-dhanmondi/page-{i}/?occupancy_status=vacant'
    
    dfs.append(get_property_data(url))

  0%|          | 0/45 [00:00<?, ?it/s]

Error:  https://www.bproperty.com/en/dhaka/apartments-for-rent-in-dhanmondi/page-37/?occupancy_status=vacant


In [4]:
df = pd.concat(dfs)

In [5]:
df.head()

Unnamed: 0,dimensions,num_beds,num_baths,house_type,city,location,area_block,description,url,price
0,"1,900 sqft",3,3,Apartment,Dhaka,Dhanmondi,Road No 8,1900 Sq Ft Flat Is Up For Rent With All Facili...,/en/property/details-3011148.html,50000.0
1,800 sqft,2,2,Apartment,Dhaka,Dhanmondi,West Dhanmondi and Shangkar,Check This 800 Sq. Ft Apartment Up For Rent Ve...,/en/property/details-5387388.html,16000.0
2,800 sqft,2,2,Apartment,Dhaka,Dhanmondi,West Dhanmondi and Shangkar,Very Close To West Dhanmondi Yousuf High Schoo...,/en/property/details-5387397.html,16000.0
3,800 sqft,2,2,Apartment,Dhaka,Dhanmondi,West Dhanmondi and Shangkar,Be the occupant of this 800 SQ FT residential ...,/en/property/details-5387390.html,16000.0
4,800 sqft,2,2,Apartment,Dhaka,Dhanmondi,West Dhanmondi and Shangkar,Grab This 800 Square Feet Decent House Up For ...,/en/property/details-5387392.html,16000.0


In [6]:
df.tail()

Unnamed: 0,dimensions,num_beds,num_baths,house_type,city,location,area_block,description,url,price
16,"1,907 sqft",3,3,Apartment,Dhaka,Dhanmondi,Road No 4,Sophisticated 1907 Sq Ft Flat Is Available For...,/en/property/details-1722843.html,60000.0
17,"1,480 sqft",3,3,Apartment,Dhaka,Dhanmondi,Road No 8,A rightly planned 1480 SQ FT residence is foun...,/en/property/details-1719553.html,30000.0
18,"1,850 sqft",3,3,Apartment,Dhaka,Dhanmondi,Satmasjid Road,"Picture yourself, residing in this well constr...",/en/property/details-1716479.html,45000.0
19,"1,200 sqft",3,3,Apartment,Dhaka,Dhanmondi,Road No 8A,A structurally well set commercial space of 12...,/en/property/details-1713231.html,26000.0
20,"1,400 sqft",3,4,Apartment,Dhaka,Dhanmondi,Road No 8A,"Comfortable, convenient and well constructed 1...",/en/property/details-1712904.html,26000.0


In [7]:
df.shape

(981, 10)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 981 entries, 0 to 20
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   dimensions   981 non-null    object 
 1   num_beds     981 non-null    object 
 2   num_baths    981 non-null    object 
 3   house_type   981 non-null    object 
 4   city         981 non-null    object 
 5   location     981 non-null    object 
 6   area_block   981 non-null    object 
 7   description  981 non-null    object 
 8   url          981 non-null    object 
 9   price        981 non-null    float64
dtypes: float64(1), object(9)
memory usage: 84.3+ KB


In [10]:
# df.to_csv('bproperty_dhanmondi.csv', index=False)