In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

from tqdm.notebook import tqdm

In [2]:
def get_property_data(url):
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    text = response.text
    soup = BeautifulSoup(text, 'lxml')

    price_list = soup.find_all("span", class_='f343d9ce')
    add_small_list = soup.find_all('div', class_='_7afabd84')
    type_list = soup.find_all('div', class_='_9a4e3964')
    desc_list = soup.find_all('h2', class_='_7f17f34f')
    beds_list = soup.find_all('span', class_='b6a29bc0') # 72 instances
    href_list = soup.find_all('a', class_='_287661cb')

    prices = []
    for price_s in price_list:
        price = float(str(price_s).split(' ')[2].replace('class="f343d9ce">', '').replace('</span>', '').replace(',', ''))
        prices.append(price)

    blocks = []
    areas = []
    districts = []

    for loc in add_small_list:
        block, area, district = str(loc).split('>')[1].replace('</div', '').strip().split(',')
        block = block.strip()
        area = area.strip()
        district = district.strip()

        blocks.append(block)
        areas.append(area)
        districts.append(district)

    types = []

    for type in type_list:
        type = str(type).split('>')[1].replace('</div', '').strip()

        types.append(type)

    descriptions = []

    for desc in desc_list:
        desc = str(desc).split('>')[1].replace('</h2', '').strip()

        descriptions.append(desc)

    hrefs = []

    for href in href_list:
        link = str(href).split('"')[5]
        hrefs.append(link)

    i = 0
    beds = []
    while i < len(beds_list):
        bed = str(beds_list[i]).replace('<span aria-label="Beds" class="b6a29bc0">', '').replace('</span>', '')
        beds.append(bed)

        i += 3

    i = 1
    baths = []
    while i < len(beds_list):
        bath = str(beds_list[i]).replace('<span aria-label="Baths" class="b6a29bc0">', '').replace('</span>', '')
        baths.append(bath)

        i += 3

    i = 2
    dims = []
    while i < len(beds_list):
        dim = str(beds_list[i]).replace('<span aria-label="Area" class="b6a29bc0">', '').replace('</span>', '').replace('<span>', '')
        dims.append(dim)

        i += 3


    df = pd.DataFrame(list(zip(dims, beds, baths, types, districts, areas, 
                               blocks, descriptions, hrefs, prices)), 
                      columns=['dimensions', 'num_beds', 'num_baths', 'house_type', 'city', 
                               'location', 'area_block', 'description', 'url', 'price'])
    
    return df

In [3]:
dfs = []

for i in tqdm(range(115)):
    if i == 0:
        continue
    if i == 1:
        url = 'https://www.bproperty.com/en/dhaka/apartments-for-rent-in-uttara/?occupancy_status=vacant'
    else:
        url = f'https://www.bproperty.com/en/dhaka/apartments-for-rent-in-uttara/page-{i}/?occupancy_status=vacant'
    
    dfs.append(get_property_data(url))

  0%|          | 0/115 [00:00<?, ?it/s]

In [4]:
df = pd.concat(dfs)

In [5]:
df.head()

Unnamed: 0,dimensions,num_beds,num_baths,house_type,city,location,area_block,description,url,price
0,"1,654 sqft",3,4,Apartment,Dhaka,Uttara,Sector 18,Grab The Deal Of Renting This 1654 Sq Feet Res...,/en/property/details-4782719.html,15000.0
1,"1,654 sqft",3,4,Apartment,Dhaka,Uttara,Sector 18,Hurry! Make This 3 Bedroom Apartment Your Next...,/en/property/details-4782658.html,15000.0
2,"1,654 sqft",3,4,Apartment,Dhaka,Uttara,Sector 18,Bright And Cozy Apartment Featuring 1654 Sq Ft...,/en/property/details-4782580.html,15000.0
3,"1,654 sqft",3,4,Apartment,Dhaka,Uttara,Sector 18,Good-Looking Flat Is Vacant For Rent In Rajuk-...,/en/property/details-4606011.html,15000.0
4,800 sqft,2,1,Apartment,Dhaka,Uttara,Sector 11,Ready convenient flat of 800 SQ FT is up for r...,/en/property/details-5387352.html,18000.0


In [9]:
df.tail()

Unnamed: 0,dimensions,num_beds,num_baths,house_type,city,location,area_block,description,url,price
14,"2,500 sqft",3,4,Apartment,Dhaka,Uttara,Sector 4,For You 2500 Sq Ft Flat Is Now For Rent Near T...,/en/property/details-1701632.html,50000.0
15,"1,600 sqft",3,3,Apartment,Dhaka,Uttara,Sector 6,A noteworthy Residence up for rent in Uttara a...,/en/property/details-1701287.html,25000.0
16,900 sqft,2,2,Apartment,Dhaka,Uttara,Sector 14,A 900 SQ FT vacant apartment is ready to be re...,/en/property/details-1667106.html,16000.0
17,"1,200 sqft",3,3,Apartment,Dhaka,Uttara,Sector 5,Residential Flat Of 1200 Sq Ft Is Available Fo...,/en/property/details-1665314.html,25000.0
18,"2,900 sqft",4,4,Apartment,Dhaka,Uttara,Sector 6,"Close To Rajuk Uttara Model College, A 2900 Sq...",/en/property/details-1664303.html,60000.0


In [6]:
df.shape

(2731, 10)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2731 entries, 0 to 18
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   dimensions   2731 non-null   object 
 1   num_beds     2731 non-null   object 
 2   num_baths    2731 non-null   object 
 3   house_type   2731 non-null   object 
 4   city         2731 non-null   object 
 5   location     2731 non-null   object 
 6   area_block   2731 non-null   object 
 7   description  2731 non-null   object 
 8   url          2731 non-null   object 
 9   price        2731 non-null   float64
dtypes: float64(1), object(9)
memory usage: 234.7+ KB


In [10]:
# df.to_csv('bproperty_uttara.csv', index=False)