In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

from tqdm.notebook import tqdm

In [2]:
def get_property_data(url):
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    text = response.text
    soup = BeautifulSoup(text, 'lxml')

    price_list = soup.find_all("span", class_='f343d9ce')
    add_small_list = soup.find_all('div', class_='_7afabd84')
    type_list = soup.find_all('div', class_='_9a4e3964')
    desc_list = soup.find_all('h2', class_='_7f17f34f')
    beds_list = soup.find_all('span', class_='b6a29bc0') # 72 instances
    href_list = soup.find_all('a', class_='_287661cb')

    prices = []
    for price_s in price_list:
        price = float(str(price_s).split(' ')[2].replace('class="f343d9ce">', '').replace('</span>', '').replace(',', ''))
        prices.append(price)

    blocks = []
    areas = []
    districts = []

    for loc in add_small_list:
        block, area, district = str(loc).split('>')[1].replace('</div', '').strip().split(',')
        block = block.strip()
        area = area.strip()
        district = district.strip()

        blocks.append(block)
        areas.append(area)
        districts.append(district)

    types = []

    for type in type_list:
        type = str(type).split('>')[1].replace('</div', '').strip()

        types.append(type)

    descriptions = []

    for desc in desc_list:
        desc = str(desc).split('>')[1].replace('</h2', '').strip()

        descriptions.append(desc)

    hrefs = []

    for href in href_list:
        link = str(href).split('"')[5]
        hrefs.append(link)

    i = 0
    beds = []
    while i < len(beds_list):
        bed = str(beds_list[i]).replace('<span aria-label="Beds" class="b6a29bc0">', '').replace('</span>', '')
        beds.append(bed)

        i += 3

    i = 1
    baths = []
    while i < len(beds_list):
        bath = str(beds_list[i]).replace('<span aria-label="Baths" class="b6a29bc0">', '').replace('</span>', '')
        baths.append(bath)

        i += 3

    i = 2
    dims = []
    while i < len(beds_list):
        dim = str(beds_list[i]).replace('<span aria-label="Area" class="b6a29bc0">', '').replace('</span>', '').replace('<span>', '')
        dims.append(dim)

        i += 3


    df = pd.DataFrame(list(zip(dims, beds, baths, types, districts, areas, 
                               blocks, descriptions, hrefs, prices)), 
                      columns=['dimensions', 'num_beds', 'num_baths', 'house_type', 'city', 
                               'location', 'area_block', 'description', 'url', 'price'])
    
    return df

In [9]:
dfs = []

for i in tqdm(range(70)):
    if i == 0:
        continue
    if i == 1:
        url = 'https://www.bproperty.com/en/dhaka/apartments-for-rent-in-bashundhara-r-a/?occupancy_status=vacant'
    else:
        url = f'https://www.bproperty.com/en/dhaka/apartments-for-rent-in-bashundhara-r-a/page-{i}/?occupancy_status=vacant'
    
    dfs.append(get_property_data(url))

  0%|          | 0/70 [00:00<?, ?it/s]

In [10]:
df = pd.concat(dfs)

In [11]:
df.head()

Unnamed: 0,dimensions,num_beds,num_baths,house_type,city,location,area_block,description,url,price
0,"2,055 sqft",4,4,Apartment,Dhaka,Bashundhara R-A,Block D,2055 Sq Ft Cozy Flat For Rent In Bashundhara R...,/en/property/details-5237946.html,35000.0
1,"1,911 sqft",3,3,Apartment,Dhaka,Bashundhara R-A,Block A,Emanate Your Knack For Gardening By Renting Th...,/en/property/details-3875341.html,45000.0
2,"1,200 sqft",3,3,Apartment,Dhaka,Bashundhara R-A,Block K,"To Secure Your Better State Of Living, Conside...",/en/property/details-5182996.html,20000.0
3,"1,910 sqft",3,3,Apartment,Dhaka,Bashundhara R-A,Block A,For Rental Purpose 1910 Sq Ft Commendable Desi...,/en/property/details-3875358.html,45000.0
4,800 sqft,2,2,Apartment,Dhaka,Bashundhara R-A,Block G,Bright And Cozy Apartment Featuring 800 Sq Ft ...,/en/property/details-3651253.html,18000.0


In [12]:
df.shape

(1534, 10)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1534 entries, 0 to 22
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   dimensions   1534 non-null   object 
 1   num_beds     1534 non-null   object 
 2   num_baths    1534 non-null   object 
 3   house_type   1534 non-null   object 
 4   city         1534 non-null   object 
 5   location     1534 non-null   object 
 6   area_block   1534 non-null   object 
 7   description  1534 non-null   object 
 8   url          1534 non-null   object 
 9   price        1534 non-null   float64
dtypes: float64(1), object(9)
memory usage: 131.8+ KB


In [8]:
# df.to_csv('bproperty_bashundhara_RA.csv', index=False)