In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt

# load the dataset

In [2]:
dataset=pd.read_csv("House_rent.csv")

In [3]:
dataset

Unnamed: 0,BHK,Rent,Size,Floor,Area Type,Area Locality,City,Furnishing Status,Tenant Preferred,Bathroom
0,2,10000,1100,Ground out of 2,Super Area,Bandel,Kolkata,Unfurnished,Bachelors/Family,2
1,2,20000,800,1 out of 3,Super Area,"Phool Bagan, Kankurgachi",Kolkata,Semi-Furnished,Bachelors/Family,1
2,2,17000,1000,1 out of 3,Super Area,Salt Lake City Sector 2,Kolkata,Semi-Furnished,Bachelors/Family,1
3,2,10000,800,1 out of 2,Super Area,Dumdum Park,Kolkata,Unfurnished,Bachelors/Family,1
4,2,7500,850,1 out of 2,Carpet Area,South Dum Dum,Kolkata,Unfurnished,Bachelors,1
...,...,...,...,...,...,...,...,...,...,...
4741,2,15000,1000,3 out of 5,Carpet Area,Bandam Kommu,Hyderabad,Semi-Furnished,Bachelors/Family,2
4742,3,29000,2000,1 out of 4,Super Area,"Manikonda, Hyderabad",Hyderabad,Semi-Furnished,Bachelors/Family,3
4743,3,35000,1750,3 out of 5,Carpet Area,"Himayath Nagar, NH 7",Hyderabad,Semi-Furnished,Bachelors/Family,3
4744,3,45000,1500,23 out of 34,Carpet Area,Gachibowli,Hyderabad,Semi-Furnished,Family,2


# Getting Basic Information about the Dataset

In [4]:
dataset.shape

(4746, 10)

In [5]:
dataset.columns

Index(['BHK', 'Rent', 'Size', 'Floor', 'Area Type', 'Area Locality', 'City',
       'Furnishing Status', 'Tenant Preferred', 'Bathroom'],
      dtype='object')

In [6]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4746 entries, 0 to 4745
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   BHK                4746 non-null   int64 
 1   Rent               4746 non-null   int64 
 2   Size               4746 non-null   int64 
 3   Floor              4746 non-null   object
 4   Area Type          4746 non-null   object
 5   Area Locality      4746 non-null   object
 6   City               4746 non-null   object
 7   Furnishing Status  4746 non-null   object
 8   Tenant Preferred   4746 non-null   object
 9   Bathroom           4746 non-null   int64 
dtypes: int64(4), object(6)
memory usage: 370.9+ KB


In [7]:
dataset.describe()

Unnamed: 0,BHK,Rent,Size,Bathroom
count,4746.0,4746.0,4746.0,4746.0
mean,2.08386,34993.45,967.490729,1.965866
std,0.832256,78106.41,634.202328,0.884532
min,1.0,1200.0,10.0,1.0
25%,2.0,10000.0,550.0,1.0
50%,2.0,16000.0,850.0,2.0
75%,3.0,33000.0,1200.0,2.0
max,6.0,3500000.0,8000.0,10.0


In [8]:
# Check the first few rows of the dataset
dataset.head()

Unnamed: 0,BHK,Rent,Size,Floor,Area Type,Area Locality,City,Furnishing Status,Tenant Preferred,Bathroom
0,2,10000,1100,Ground out of 2,Super Area,Bandel,Kolkata,Unfurnished,Bachelors/Family,2
1,2,20000,800,1 out of 3,Super Area,"Phool Bagan, Kankurgachi",Kolkata,Semi-Furnished,Bachelors/Family,1
2,2,17000,1000,1 out of 3,Super Area,Salt Lake City Sector 2,Kolkata,Semi-Furnished,Bachelors/Family,1
3,2,10000,800,1 out of 2,Super Area,Dumdum Park,Kolkata,Unfurnished,Bachelors/Family,1
4,2,7500,850,1 out of 2,Carpet Area,South Dum Dum,Kolkata,Unfurnished,Bachelors,1


In [9]:
# Check the missing values
dataset.isnull().sum()


BHK                  0
Rent                 0
Size                 0
Floor                0
Area Type            0
Area Locality        0
City                 0
Furnishing Status    0
Tenant Preferred     0
Bathroom             0
dtype: int64

# categorical column convert to numerical column

In [10]:
# categorical column---->nominal data--->one-hot-encoding
# ordinal data---->label encoder

In [11]:
dataset = pd.get_dummies(dataset, drop_first=True,dtype=int)

In [12]:
dataset

Unnamed: 0,BHK,Rent,Size,Bathroom,Floor_1 out of 1,Floor_1 out of 10,Floor_1 out of 11,Floor_1 out of 12,Floor_1 out of 13,Floor_1 out of 14,...,Area Locality_whitefield,City_Chennai,City_Delhi,City_Hyderabad,City_Kolkata,City_Mumbai,Furnishing Status_Semi-Furnished,Furnishing Status_Unfurnished,Tenant Preferred_Bachelors/Family,Tenant Preferred_Family
0,2,10000,1100,2,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,1,0
1,2,20000,800,1,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,1,0
2,2,17000,1000,1,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,1,0
3,2,10000,800,1,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,1,0
4,2,7500,850,1,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4741,2,15000,1000,2,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,1,0
4742,3,29000,2000,3,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,1,0
4743,3,35000,1750,3,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,1,0
4744,3,45000,1500,2,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,1
