## Load the dataset

In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [29]:
df=pd.read_csv('houserentdhaka.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Location,Area,Bed,Bath,Price
0,0,"Block H, Bashundhara R-A, Dhaka","1,600 sqft",3,3,20 Thousand
1,1,"Farmgate, Tejgaon, Dhaka",900 sqft,2,2,20 Thousand
2,2,"Block B, Nobodoy Housing Society, Mohammadpur,...","1,250 sqft",3,3,18 Thousand
3,3,"Gulshan 1, Gulshan, Dhaka","2,200 sqft",3,4,75 Thousand
4,4,"Baridhara, Dhaka","2,200 sqft",3,3,75 Thousand


**We need to drop unnamed: 0 column because it doesnt put any impact in our prediction**

In [30]:
df.drop(['Unnamed: 0'], axis='columns', inplace=True)
df.head()

Unnamed: 0,Location,Area,Bed,Bath,Price
0,"Block H, Bashundhara R-A, Dhaka","1,600 sqft",3,3,20 Thousand
1,"Farmgate, Tejgaon, Dhaka",900 sqft,2,2,20 Thousand
2,"Block B, Nobodoy Housing Society, Mohammadpur,...","1,250 sqft",3,3,18 Thousand
3,"Gulshan 1, Gulshan, Dhaka","2,200 sqft",3,4,75 Thousand
4,"Baridhara, Dhaka","2,200 sqft",3,3,75 Thousand


**Now we can see our dataset have two columns named Price and Area where we need just the number, not that string like- Thousand,sqft etc. So now we clean that**

In [31]:
multiplier= {
    'Thousand': 1000,
    'Lakh': 100000,
    'Million': 1000000,
    'Crore': 10000000
}

def clean_price(price):
    parts = str(price).split()

    if len(parts)==2:
        num,prefix = parts
        return float(num) * multiplier.get(prefix,1)
    else:
        return float(parts[0])

df['Price']=df['Price'].apply(clean_price)

In [32]:
df.head()

Unnamed: 0,Location,Area,Bed,Bath,Price
0,"Block H, Bashundhara R-A, Dhaka","1,600 sqft",3,3,20000.0
1,"Farmgate, Tejgaon, Dhaka",900 sqft,2,2,20000.0
2,"Block B, Nobodoy Housing Society, Mohammadpur,...","1,250 sqft",3,3,18000.0
3,"Gulshan 1, Gulshan, Dhaka","2,200 sqft",3,4,75000.0
4,"Baridhara, Dhaka","2,200 sqft",3,3,75000.0


In [34]:
df['Area']=df['Area'].str.replace(',','').str.replace('sqft','').astype(int)

In [35]:
df.head()

Unnamed: 0,Location,Area,Bed,Bath,Price
0,"Block H, Bashundhara R-A, Dhaka",1600,3,3,20000.0
1,"Farmgate, Tejgaon, Dhaka",900,2,2,20000.0
2,"Block B, Nobodoy Housing Society, Mohammadpur,...",1250,3,3,18000.0
3,"Gulshan 1, Gulshan, Dhaka",2200,3,4,75000.0
4,"Baridhara, Dhaka",2200,3,3,75000.0
