In [1]:
# Import all dependencies:
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
matplotlib.rcParams["figure.figsize"] = (20,10)

In [3]:
# Read in the dataframe:
df1 = pd.read_csv('dataframe_bangladesh.csv')
df1.head()

Unnamed: 0,title,beds,bath,area,adress,type,purpose,flooPlan,url,lastUpdated,price
0,Eminent Apartment Of 2200 Sq Ft Is Vacant For ...,3,4,"2,200 sqft","Block A, Bashundhara R-A, Dhaka",Apartment,For Rent,https://images-cdn.bproperty.com/thumbnails/10...,https://www.bproperty.com/en/property/details-...,"August 13, 2022",50 Thousand
1,"Apartment Ready To Rent In South Khulshi, Near...",3,4,"1,400 sqft","South Khulshi, Khulshi, Chattogram",Apartment,For Rent,https://images-cdn.bproperty.com/thumbnails/44...,https://www.bproperty.com/en/property/details-...,"January 25, 2022",30 Thousand
2,"Smartly priced 1950 SQ FT apartment, that you ...",3,4,"1,950 sqft","Block F, Bashundhara R-A, Dhaka",Apartment,For Rent,https://images-cdn.bproperty.com/thumbnails/11...,https://www.bproperty.com/en/property/details-...,"February 22, 2023",30 Thousand
3,2000 Sq Ft Residential Apartment Is Up For Ren...,3,3,"2,000 sqft","Sector 9, Uttara, Dhaka",Apartment,For Rent,https://images-cdn.bproperty.com/thumbnails/14...,https://www.bproperty.com/en/property/details-...,"October 28, 2021",35 Thousand
4,Strongly Structured This 1650 Sq. ft Apartment...,3,4,"1,650 sqft","Block I, Bashundhara R-A, Dhaka",Apartment,For Rent,https://images-cdn.bproperty.com/thumbnails/10...,https://www.bproperty.com/en/property/details-...,"February 19, 2023",25 Thousand


In [5]:
# Check the total number of rows and columns:
df1.shape

(7557, 11)

In [7]:
# Check Type of residence:
df1.groupby('type')['type'].agg('count')

type
Apartment    7489
Building       21
Duplex         47
Name: type, dtype: int64

In [14]:
# Clean the data:
# Drop the unnecessary columns:
df2 = df1.drop(['title','purpose','flooPlan','url','lastUpdated'],axis='columns')
df2.head()

Unnamed: 0,beds,bath,area,adress,type,price
0,3,4,"2,200 sqft","Block A, Bashundhara R-A, Dhaka",Apartment,50 Thousand
1,3,4,"1,400 sqft","South Khulshi, Khulshi, Chattogram",Apartment,30 Thousand
2,3,4,"1,950 sqft","Block F, Bashundhara R-A, Dhaka",Apartment,30 Thousand
3,3,3,"2,000 sqft","Sector 9, Uttara, Dhaka",Apartment,35 Thousand
4,3,4,"1,650 sqft","Block I, Bashundhara R-A, Dhaka",Apartment,25 Thousand


In [12]:
# check if null or not:
df2.isnull().sum()
# if null then drop:
df3 = df2.dropna()

In [13]:
df3.shape

(7557, 7)

In [15]:
# check total bedrooms:
df3['beds'].unique()

array(['3 ', '5 ', '4 ', '1 Bed', '2 ', '17 ', '6 ', '21 ', '7 ', '20 ',
       '32 ', '22 ', '36 '], dtype=object)

In [22]:
# create new column free of anomaly:
df3['bedroom'] = df3['beds'].apply(lambda x: int(x.split(' ')[0]))
df3.drop(columns='beds', inplace=True)
df3.head()

Unnamed: 0,title,bath,area,adress,type,price,bedroom
0,Eminent Apartment Of 2200 Sq Ft Is Vacant For ...,4,"2,200 sqft","Block A, Bashundhara R-A, Dhaka",Apartment,50 Thousand,3
1,"Apartment Ready To Rent In South Khulshi, Near...",4,"1,400 sqft","South Khulshi, Khulshi, Chattogram",Apartment,30 Thousand,3
2,"Smartly priced 1950 SQ FT apartment, that you ...",4,"1,950 sqft","Block F, Bashundhara R-A, Dhaka",Apartment,30 Thousand,3
3,2000 Sq Ft Residential Apartment Is Up For Ren...,3,"2,000 sqft","Sector 9, Uttara, Dhaka",Apartment,35 Thousand,3
4,Strongly Structured This 1650 Sq. ft Apartment...,4,"1,650 sqft","Block I, Bashundhara R-A, Dhaka",Apartment,25 Thousand,3


In [24]:
# convert object datatype to int:
df3['bedroom'].astype(str).astype(int)

0       3
1       3
2       3
3       3
4       3
       ..
7552    4
7553    3
7554    2
7555    3
7556    4
Name: bedroom, Length: 7557, dtype: int32

In [25]:
# check if they are valid:
df3[df3.bedroom>15]

Unnamed: 0,title,bath,area,adress,type,price,bedroom
181,9000 Sq Ft Full Building For Rent In Bashundha...,10,"9,000 sqft","Block H, Bashundhara R-A, Dhaka",Building,3.5 Lakh,17
372,16100 Sq Ft Full Building Is For Rent In Uttara-6,10,"16,100 sqft","Road No 2, Sector 6, Uttara, Dhaka",Building,10 Lakh,21
872,Full Building Is Up For Rent At Uttara-11.,10,"14,000 sqft","Sector 11, Uttara, Dhaka",Building,5 Lakh,20
2548,18000 Sq Ft Full Residential Building Is Avail...,10,"18,000 sqft","Block D, Bashundhara R-A, Dhaka",Building,5 Lakh,32
3966,10000 Square Feet Full Building Is For Rent In...,10,"10,000 sqft","Sector 14, Uttara, Dhaka",Building,4 Lakh,22
5615,Visit This 19360 Sq Ft Full Building Which Is ...,10,"19,360 sqft","Block K, Baridhara, Dhaka",Building,16.6 Lakh,21
6936,"Now You Can Afford To Dwell Well, Check This 3...",10,"31,500 sqft","Block K, Baridhara, Dhaka",Building,20 Lakh,36
7118,This Residential Building Of 15000 Sq Ft Is Re...,10,"15,000 sqft","Block K, Baridhara, Dhaka",Building,10 Lakh,20


In [27]:
df3['bedroom'].unique()

array([ 3,  5,  4,  1,  2, 17,  6, 21,  7, 20, 32, 22, 36], dtype=int64)

In [28]:
# check the area:
df3.area.unique()

array(['2,200 sqft', '1,400 sqft', '1,950 sqft', '2,000 sqft',
       '1,650 sqft', '3,400 sqft', '1,600 sqft', '1,250 sqft',
       '2,150 sqft', '1,580 sqft', '3,200 sqft', '3,000 sqft',
       '1,800 sqft', '1,750 sqft', '1,310 sqft', '1,700 sqft',
       '2,750 sqft', '2,500 sqft', '550 sqft', '1,050 sqft', '1,350 sqft',
       '3,590 sqft', '400 sqft', '2,400 sqft', '500 sqft', '2,300 sqft',
       '4,350 sqft', '1,200 sqft', '800 sqft', '2,146 sqft', '1,315 sqft',
       '1,500 sqft', '700 sqft', '600 sqft', '960 sqft', '4,200 sqft',
       '2,250 sqft', '1,517 sqft', '6,300 sqft', '1,407 sqft',
       '1,850 sqft', '1,150 sqft', '900 sqft', '1,975 sqft', '1,300 sqft',
       '1,450 sqft', '950 sqft', '1,100 sqft', '2,600 sqft', '750 sqft',
       '2,115 sqft', '3,600 sqft', '3,150 sqft', '2,100 sqft',
       '1,319 sqft', '1,676 sqft', '1,825 sqft', '2,977 sqft', '350 sqft',
       '1,550 sqft', '450 sqft', '1,190 sqft', '2,230 sqft', '2,280 sqft',
       '2,197 sqft', '1,393 sq

In [29]:
def is_float(x):
    try:
        float(x)
    except:
        return False
    return True


In [30]:
df3[df3['area'].apply(is_float)]

Unnamed: 0,title,bath,area,adress,type,price,bedroom


In [36]:
df3.area

0       2,200 sqft
1       1,400 sqft
2       1,950 sqft
3       2,000 sqft
4       1,650 sqft
           ...    
7552    3,600 sqft
7553      900 sqft
7554    1,000 sqft
7555    3,600 sqft
7556    2,600 sqft
Name: area, Length: 7557, dtype: object

In [37]:
df3['area']

0       2,200 sqft
1       1,400 sqft
2       1,950 sqft
3       2,000 sqft
4       1,650 sqft
           ...    
7552    3,600 sqft
7553      900 sqft
7554    1,000 sqft
7555    3,600 sqft
7556    2,600 sqft
Name: area, Length: 7557, dtype: object

In [38]:
df3.area[0] 

'2,200 sqft'

In [43]:
# remove the extra words from the object:
df3['area'] = df3['area'].str.replace('sqft','')
df3.head()

Unnamed: 0,title,bath,area,adress,type,price,bedroom
0,Eminent Apartment Of 2200 Sq Ft Is Vacant For ...,4,2200,"Block A, Bashundhara R-A, Dhaka",Apartment,50 Thousand,3
1,"Apartment Ready To Rent In South Khulshi, Near...",4,1400,"South Khulshi, Khulshi, Chattogram",Apartment,30 Thousand,3
2,"Smartly priced 1950 SQ FT apartment, that you ...",4,1950,"Block F, Bashundhara R-A, Dhaka",Apartment,30 Thousand,3
3,2000 Sq Ft Residential Apartment Is Up For Ren...,3,2000,"Sector 9, Uttara, Dhaka",Apartment,35 Thousand,3
4,Strongly Structured This 1650 Sq. ft Apartment...,4,1650,"Block I, Bashundhara R-A, Dhaka",Apartment,25 Thousand,3


In [45]:
# remove the comma from the object:
df3['area'] = df3['area'].str.replace(',','')
df3

Unnamed: 0,title,bath,area,adress,type,price,bedroom
0,Eminent Apartment Of 2200 Sq Ft Is Vacant For ...,4,2200,"Block A, Bashundhara R-A, Dhaka",Apartment,50 Thousand,3
1,"Apartment Ready To Rent In South Khulshi, Near...",4,1400,"South Khulshi, Khulshi, Chattogram",Apartment,30 Thousand,3
2,"Smartly priced 1950 SQ FT apartment, that you ...",4,1950,"Block F, Bashundhara R-A, Dhaka",Apartment,30 Thousand,3
3,2000 Sq Ft Residential Apartment Is Up For Ren...,3,2000,"Sector 9, Uttara, Dhaka",Apartment,35 Thousand,3
4,Strongly Structured This 1650 Sq. ft Apartment...,4,1650,"Block I, Bashundhara R-A, Dhaka",Apartment,25 Thousand,3
...,...,...,...,...,...,...,...
7552,"Picture Yourself, Residing In This Well Constr...",4,3600,"Sector 3, Uttara, Dhaka",Duplex,80 Thousand,4
7553,Flat For Rent In Uttara Sector 13 Near Lubana ...,2,900,"Sector 13, Uttara, Dhaka",Apartment,19 Thousand,3
7554,1000 SQ FT flat for rent in Uttara Sector 13 n...,2,1000,"Sector 13, Uttara, Dhaka",Apartment,22 Thousand,2
7555,Well Planned Apartment For Rent In Gulshan 1 N...,4,3600,"Gulshan 1, Gulshan, Dhaka",Apartment,1.75 Lakh,3


In [47]:
# convert the object to int:
df3['area'] = df3['area'].astype(str).astype(int)
df3

Unnamed: 0,title,bath,area,adress,type,price,bedroom
0,Eminent Apartment Of 2200 Sq Ft Is Vacant For ...,4,2200,"Block A, Bashundhara R-A, Dhaka",Apartment,50 Thousand,3
1,"Apartment Ready To Rent In South Khulshi, Near...",4,1400,"South Khulshi, Khulshi, Chattogram",Apartment,30 Thousand,3
2,"Smartly priced 1950 SQ FT apartment, that you ...",4,1950,"Block F, Bashundhara R-A, Dhaka",Apartment,30 Thousand,3
3,2000 Sq Ft Residential Apartment Is Up For Ren...,3,2000,"Sector 9, Uttara, Dhaka",Apartment,35 Thousand,3
4,Strongly Structured This 1650 Sq. ft Apartment...,4,1650,"Block I, Bashundhara R-A, Dhaka",Apartment,25 Thousand,3
...,...,...,...,...,...,...,...
7552,"Picture Yourself, Residing In This Well Constr...",4,3600,"Sector 3, Uttara, Dhaka",Duplex,80 Thousand,4
7553,Flat For Rent In Uttara Sector 13 Near Lubana ...,2,900,"Sector 13, Uttara, Dhaka",Apartment,19 Thousand,3
7554,1000 SQ FT flat for rent in Uttara Sector 13 n...,2,1000,"Sector 13, Uttara, Dhaka",Apartment,22 Thousand,2
7555,Well Planned Apartment For Rent In Gulshan 1 N...,4,3600,"Gulshan 1, Gulshan, Dhaka",Apartment,1.75 Lakh,3


In [48]:
df3['area']

0       2200
1       1400
2       1950
3       2000
4       1650
        ... 
7552    3600
7553     900
7554    1000
7555    3600
7556    2600
Name: area, Length: 7557, dtype: int32

In [49]:
df4 = df3.copy()