# Rental Price prediction(Delhi) using Linear Regression

In [46]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [3]:
df = pd.read_csv("/content/drive/MyDrive/Data_sets/Rental_price/Indian_housing_Delhi_data.csv")

In [4]:
df.head()

Unnamed: 0,house_type,house_size,location,city,latitude,longitude,price,currency,numBathrooms,numBalconies,isNegotiable,priceSqFt,verificationDate,description,SecurityDeposit,Status
0,1 RK Studio Apartment,400 sq ft,Kalkaji,Delhi,28.545561,77.254349,22000,INR,1.0,,,,Posted a day ago,"Fully furnished, loaded with amenities & gadge...",No Deposit,Furnished
1,1 RK Studio Apartment,400 sq ft,Mansarover Garden,Delhi,28.643259,77.132828,20000,INR,1.0,,,,Posted 9 days ago,Here is an excellent 1 BHK Independent Floor a...,No Deposit,Furnished
2,2 BHK Independent Floor,500 sq ft,Uttam Nagar,Delhi,28.618677,77.053352,8500,INR,1.0,,,,Posted 12 days ago,"Zero Brokerage.\n\n2 Room set, Govt bijali Met...",No Deposit,Semi-Furnished
3,3 BHK Independent House,"1,020 sq ft",Model Town,Delhi,28.712898,77.18,48000,INR,3.0,,,,Posted a year ago,Itâs a 3 bhk independent house situated in M...,No Deposit,Furnished
4,2 BHK Apartment,810 sq ft,Sector 13 Rohini,Delhi,28.723539,77.131424,20000,INR,2.0,,,,Posted a year ago,Well designed 2 bhk multistorey apartment is a...,No Deposit,Unfurnished


In [7]:
df.shape

(5000, 16)

# Dropping unwanted columns

In [21]:
df1 = df.drop(['isNegotiable', 'verificationDate', 'description', 'SecurityDeposit', 'Status', 'priceSqFt', 'currency' ], axis=1)
df1.head()

Unnamed: 0,house_type,house_size,location,city,latitude,longitude,price,numBathrooms,numBalconies
0,1 RK Studio Apartment,400 sq ft,Kalkaji,Delhi,28.545561,77.254349,22000,1.0,
1,1 RK Studio Apartment,400 sq ft,Mansarover Garden,Delhi,28.643259,77.132828,20000,1.0,
2,2 BHK Independent Floor,500 sq ft,Uttam Nagar,Delhi,28.618677,77.053352,8500,1.0,
3,3 BHK Independent House,"1,020 sq ft",Model Town,Delhi,28.712898,77.18,48000,3.0,
4,2 BHK Apartment,810 sq ft,Sector 13 Rohini,Delhi,28.723539,77.131424,20000,2.0,


# **DATA CLEANING**

#Checking null values

In [22]:
df1.isna().sum()

Unnamed: 0,0
house_type,0
house_size,0
location,0
city,0
latitude,0
longitude,0
price,0
numBathrooms,25
numBalconies,2263


# Replacing Null Values with the column's mean

In [23]:
df1['numBathrooms'].fillna(np.mean(df1['numBathrooms']), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df1['numBathrooms'].fillna(np.mean(df1['numBathrooms']), inplace=True)


In [24]:
df1[['numBathrooms']].isna().sum()

Unnamed: 0,0
numBathrooms,0


In [25]:
df1['numBalconies'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df1['numBalconies'].fillna(0, inplace=True)


In [26]:
df1[['numBalconies']].isna().sum()

Unnamed: 0,0
numBalconies,0


In [27]:
df1.house_type.unique()

array(['1 RK Studio Apartment ', '2 BHK Independent Floor ',
       '3 BHK Independent House ', '2 BHK Apartment ', '3 BHK Apartment ',
       '3 BHK Independent Floor ', '4 BHK Independent Floor ',
       '1 BHK Independent Floor ', '1 BHK Apartment ',
       '8 BHK Independent Floor ', '4 BHK Apartment ',
       '6 BHK Independent Floor ', '2 BHK Independent House ',
       '1 BHK Independent House ', '5 BHK Independent Floor ',
       '4 BHK Independent House ', '5 BHK Villa ',
       '5 BHK Independent House ', '7 BHK Independent Floor ',
       '8 BHK Independent House ', '10 BHK Independent House ',
       '7 BHK Independent House ', '9 BHK Independent House ',
       '8 BHK Villa ', '4 BHK Villa ', '5 BHK Apartment ',
       '6 BHK penthouse ', '12 BHK Independent House '], dtype=object)

In [28]:
df1['Number_of_BHK'] = df1['house_type'].str.extract('(\d+)').astype(int)
df1.head()

Unnamed: 0,house_type,house_size,location,city,latitude,longitude,price,numBathrooms,numBalconies,Number_of_BHK
0,1 RK Studio Apartment,400 sq ft,Kalkaji,Delhi,28.545561,77.254349,22000,1.0,0.0,1
1,1 RK Studio Apartment,400 sq ft,Mansarover Garden,Delhi,28.643259,77.132828,20000,1.0,0.0,1
2,2 BHK Independent Floor,500 sq ft,Uttam Nagar,Delhi,28.618677,77.053352,8500,1.0,0.0,2
3,3 BHK Independent House,"1,020 sq ft",Model Town,Delhi,28.712898,77.18,48000,3.0,0.0,3
4,2 BHK Apartment,810 sq ft,Sector 13 Rohini,Delhi,28.723539,77.131424,20000,2.0,0.0,2


In [29]:
df1['Number_of_BHK'].unique()

array([ 1,  2,  3,  4,  8,  6,  5,  7, 10,  9, 12])

In [32]:
df1['size'] = df1['house_size'].str.extract('(\d+)').astype(int)
df1.head()

Unnamed: 0,house_size,location,latitude,longitude,price,numBathrooms,numBalconies,Number_of_BHK,size
0,400 sq ft,Kalkaji,28.545561,77.254349,22000,1.0,0.0,1,400
1,400 sq ft,Mansarover Garden,28.643259,77.132828,20000,1.0,0.0,1,400
2,500 sq ft,Uttam Nagar,28.618677,77.053352,8500,1.0,0.0,2,500
3,"1,020 sq ft",Model Town,28.712898,77.18,48000,3.0,0.0,3,1
4,810 sq ft,Sector 13 Rohini,28.723539,77.131424,20000,2.0,0.0,2,810


In [30]:
df1.city.unique()

array(['Delhi'], dtype=object)

In [None]:
df1.drop(['house_type', 'city'], axis=1, inplace=True)
df1.head()

In [34]:
df1.drop(['house_size'], axis=1, inplace=True)
df1.head()

Unnamed: 0,location,latitude,longitude,price,numBathrooms,numBalconies,Number_of_BHK,size
0,Kalkaji,28.545561,77.254349,22000,1.0,0.0,1,400
1,Mansarover Garden,28.643259,77.132828,20000,1.0,0.0,1,400
2,Uttam Nagar,28.618677,77.053352,8500,1.0,0.0,2,500
3,Model Town,28.712898,77.18,48000,3.0,0.0,3,1
4,Sector 13 Rohini,28.723539,77.131424,20000,2.0,0.0,2,810


In [39]:
df1['location'].unique()

array(['Kalkaji', 'Mansarover Garden', 'Uttam Nagar', 'Model Town',
       'Sector 13 Rohini', 'DLF Farms', 'laxmi nagar', 'Swasthya Vihar',
       'Janakpuri', 'Pitampura', 'Gagan Vihar', 'Dabri',
       'Govindpuri Extension', 'Paschim Vihar', 'Vijay Nagar',
       'Vasant Kunj', 'Safdarjung Enclave', 'Hauz Khas', 'Bali Nagar',
       'Rajouri Garden', 'Shalimar Bagh', 'Green Park',
       'Dr Mukherji Nagar', 'Subhash Nagar', 'DLF Phase 5', 'Patel Nagar',
       'Jasola', 'Dwarka Mor', 'Kaushambi', 'Surajmal Vihar',
       'Sector 4 Dwarka', 'Sector 6 Dwarka', 'Sector 14 Dwarka',
       'Sarvodaya Enclave', 'Chattarpur', 'Ramesh Nagar',
       'Mayur Vihar II', 'Naraina', 'Greater Kailash',
       'Chittaranjan Park', 'Sector 19 Dwarka', 'Sector 23 Dwarka',
       'Lajpat Nagar III', 'South Extension 2', 'Sector-18 Dwarka',
       'Mansa Ram Park', 'Gautam Nagar', 'Sector 22 Dwarka',
       'Sheikh Sarai', 'Govindpuri', 'Sector 13 Dwarka', 'Shanti Niketan',
       'Defence Colony', 

In [40]:
df1.location.nunique()

288

In [42]:
label_encoder = preprocessing.LabelEncoder()
df1['location']= label_encoder.fit_transform(df['location'])

df1['location'].unique()

array([ 88, 124, 259, 133, 201,  38, 284, 251,  80, 162,  55,  40,  63,
       159, 268, 264, 184,  74,  18, 173, 238,  67,  46, 247,  39, 160,
        83,  48,  96, 250, 218, 220, 202, 191,  32, 177, 130, 139,  64,
        36, 207, 211, 109, 246, 227, 123,  57, 209, 241,  62, 200, 239,
        44, 120, 212, 101,  17, 107, 228, 213, 287, 182, 197,  93, 136,
       117,  66, 158, 199, 222,  21,   3, 244,  43, 122, 224, 219,  90,
       229,  68, 183, 157, 106, 240,  85,  61, 267,   7,   6,  50,  70,
       279, 171,  89, 252, 187, 208,  58,   8,  10,  45,  69, 195, 204,
       156, 269, 125, 231, 185,  91,   2, 174,  81,  65,  56, 152,   9,
       193,  31, 146, 215, 225, 134, 186,  94, 192, 258,  87,  59, 245,
       285, 132, 236, 178, 126,  98, 198, 223, 100, 282, 168, 286, 176,
        53,  78, 175, 141, 272, 232, 161,  77, 170,   0, 172, 104, 151,
       235, 248, 196, 205,  71, 181, 257, 116,  54, 135, 147, 243, 233,
       140, 194, 254, 153, 105,  72, 145,  82, 143, 249, 131, 27

In [43]:
df1.shape

(5000, 8)

# Splitting the data into Training and Testing

In [45]:
X = df1.drop(['price'], axis=1)
y = df1['price']

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Model Building

In [48]:
lr = LinearRegression()

In [49]:
lr.fit(X,y)

In [51]:
lr.score(X_test, y_test)

0.4724086274243924

In [52]:
df1.head()

Unnamed: 0,location,latitude,longitude,price,numBathrooms,numBalconies,Number_of_BHK,size
0,88,28.545561,77.254349,22000,1.0,0.0,1,400
1,124,28.643259,77.132828,20000,1.0,0.0,1,400
2,259,28.618677,77.053352,8500,1.0,0.0,2,500
3,133,28.712898,77.18,48000,3.0,0.0,3,1
4,201,28.723539,77.131424,20000,2.0,0.0,2,810


In [54]:
prediction = lr.predict([[89, 28.545561, 77.254349, 1.0, 2, 1, 400]])
print(prediction)

[59010.39063942]


