In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

from tqdm.notebook import tqdm

In [2]:
def get_property_data(url):
    try:
        response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
        text = response.text
        soup = BeautifulSoup(text, 'lxml')

        price_list = soup.find_all("span", class_='f343d9ce')
        add_small_list = soup.find_all('div', class_='_7afabd84')
        type_list = soup.find_all('div', class_='_9a4e3964')
        desc_list = soup.find_all('h2', class_='_7f17f34f')
        beds_list = soup.find_all('span', class_='b6a29bc0') # 72 instances
        href_list = soup.find_all('a', class_='_287661cb')

        prices = []
        for price_s in price_list:
            price = float(str(price_s).split(' ')[2].replace('class="f343d9ce">', '').replace('</span>', '').replace(',', ''))
            prices.append(price)

        blocks = []
        areas = []
        districts = []

        for loc in add_small_list:
            block, area, district = str(loc).split('>')[1].replace('</div', '').strip().split(',')
            block = block.strip()
            area = area.strip()
            district = district.strip()

            blocks.append(block)
            areas.append(area)
            districts.append(district)

        types = []

        for type in type_list:
            type = str(type).split('>')[1].replace('</div', '').strip()

            types.append(type)

        descriptions = []

        for desc in desc_list:
            desc = str(desc).split('>')[1].replace('</h2', '').strip()

            descriptions.append(desc)

        hrefs = []

        for href in href_list:
            link = str(href).split('"')[5]
            hrefs.append(link)

        i = 0
        beds = []
        while i < len(beds_list):
            bed = str(beds_list[i]).replace('<span aria-label="Beds" class="b6a29bc0">', '').replace('</span>', '')
            beds.append(bed)

            i += 3

        i = 1
        baths = []
        while i < len(beds_list):
            bath = str(beds_list[i]).replace('<span aria-label="Baths" class="b6a29bc0">', '').replace('</span>', '')
            baths.append(bath)

            i += 3

        i = 2
        dims = []
        while i < len(beds_list):
            dim = str(beds_list[i]).replace('<span aria-label="Area" class="b6a29bc0">', '').replace('</span>', '').replace('<span>', '')
            dims.append(dim)

            i += 3


        df = pd.DataFrame(list(zip(dims, beds, baths, types, districts, areas, 
                                   blocks, descriptions, hrefs, prices)), 
                          columns=['dimensions', 'num_beds', 'num_baths', 'house_type', 'city', 
                                   'location', 'area_block', 'description', 'url', 'price'])

        return df
    except:
        print("Error: ", url)

In [3]:
dfs = []

for i in tqdm(range(335)):
    if i == 0:
        continue
    if i == 1:
        url = 'https://www.bproperty.com/en/dhaka/apartments-for-rent-in-mirpur-2/?occupancy_status=vacant'
    else:
        url = f'https://www.bproperty.com/en/dhaka/apartments-for-rent-in-mirpur-2/page-{i}/?occupancy_status=vacant'
    
    dfs.append(get_property_data(url))

  0%|          | 0/335 [00:00<?, ?it/s]

Error:  https://www.bproperty.com/en/dhaka/apartments-for-rent-in-mirpur-2/page-282/?occupancy_status=vacant


In [4]:
df = pd.concat(dfs)

In [5]:
df.head()

Unnamed: 0,dimensions,num_beds,num_baths,house_type,city,location,area_block,description,url,price
0,500 sqft,2,1,Apartment,Dhaka,Mirpur,Middle Paikpara,"500 Sq Ft Flat For Rent In Mirpur, Middle Paik...",/en/property/details-4551653.html,6500.0
1,600 sqft,2,1,Apartment,Dhaka,Mirpur,West Kazipara,Feel The Satisfactory Environment Of This Area...,/en/property/details-4807464.html,9000.0
2,650 sqft,2,1,Apartment,Dhaka,Mirpur,West Kazipara,Live With Pleasure In This 650 Sq Ft Nice Flat...,/en/property/details-4807467.html,10000.0
3,700 sqft,2,2,Apartment,Dhaka,Mirpur,Pirerbag,Accomplish Your Goal Of Renting A Cozy 700 Sq ...,/en/property/details-4958768.html,10000.0
4,900 sqft,2,2,Apartment,Dhaka,Mirpur,West Kazipara,"Relish Your Days In This Cozy 900 Sq Ft Flat, ...",/en/property/details-5316841.html,15000.0


In [6]:
df.tail()

Unnamed: 0,dimensions,num_beds,num_baths,house_type,city,location,area_block,description,url,price
10,650 sqft,2,2,Apartment,Dhaka,Mirpur,Section 6,Sophisticated 650 SQ FT flat is available for ...,/en/property/details-1706925.html,10000.0
11,700 sqft,2,2,Apartment,Dhaka,Mirpur,Section 11,A Noteworthy And Well Planned 700 Sq Ft Reside...,/en/property/details-1700683.html,12000.0
12,800 sqft,2,2,Apartment,Dhaka,Mirpur,Rupnagar R/A,A 800 Sq Ft Household Is Available At Rupnagar...,/en/property/details-1694573.html,18000.0
13,650 sqft,2,2,Apartment,Dhaka,Mirpur,Section 12,"In Mirpur, Section 12 a 650 SQ FT residential ...",/en/property/details-1689394.html,11000.0
14,800 sqft,2,2,Apartment,Dhaka,Mirpur,West Shewrapara,800 Sq Ft Residential Apartment Is Set For Ren...,/en/property/details-1681183.html,10000.0


In [7]:
df.shape

(7935, 10)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7935 entries, 0 to 14
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   dimensions   7935 non-null   object 
 1   num_beds     7935 non-null   object 
 2   num_baths    7935 non-null   object 
 3   house_type   7935 non-null   object 
 4   city         7935 non-null   object 
 5   location     7935 non-null   object 
 6   area_block   7935 non-null   object 
 7   description  7935 non-null   object 
 8   url          7935 non-null   object 
 9   price        7935 non-null   float64
dtypes: float64(1), object(9)
memory usage: 681.9+ KB


In [10]:
df.to_csv('bproperty_mirpur.csv', index=False)