# Data Science Regression Project: Predicting Home Prices in Delhi

Dataset is downloaded from here: https://www.kaggle.com/amitabhajoy/bengaluru-house-price-data

In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib
matplotlib.rcParams["figure.figsize"]=(20,10)

### Data Load: Loading Delhi home prices into a dataframe

In [3]:
df=pd.read_csv('Delhi.csv',encoding='latin1')
df

Unnamed: 0.1,Unnamed: 0,price,Address,area,latitude,longitude,Bedrooms,Bathrooms,Balcony,Status,neworold,parking,Furnished_status,Lift,Landmarks,type_of_building,desc,Price_sqft
0,0,5600000,"Noida Extension, Noida, Delhi NCR",1350,28.608850,77.460560,3,3,,Under Construction,New Property,,,2.0,,Flat,\n\n\n Welcome ...,4148.148148
1,1,8800000,"Sector 79, Gurgaon, Delhi NCR",1490,28.374236,76.952416,3,3,,Ready to Move,New Property,,Semi-Furnished,2.0,,Flat,\n\n\n Mapsko M...,5906.040268
2,2,16500000,"Vaishali, Ghaziabad, Delhi NCR",2385,28.645769,77.385110,4,5,,Ready to Move,New Property,1.0,Unfurnished,,,Flat,\n\n\n This pro...,6918.238994
3,3,3810000,"Link Road, F Block, Sector 50, Noida, Uttar Pr...",1050,28.566914,77.436434,2,2,3.0,,New Property,1.0,Unfurnished,2.0,near Gaur Mulberry Mansion,Flat,\n\n\n AIG Roya...,3628.571429
4,4,6200000,"Jaypee Pavilion Court Sector 128, Noida, Secto...",1350,28.520732,77.356491,2,2,3.0,Ready to Move,Resale,1.0,,3.0,,Flat,\n\n\n The prop...,4592.592593
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7733,7733,7900000,"Indirapuram, Ghaziabad, Delhi NCR",1095,28.635272,77.370395,2,2,,Ready to Move,Resale,,,,,Flat,\n \n \n...,7214.611872
7734,7734,4510000,"Greater Noida, Sector 2, Greater Noida, Delhi NCR",1060,28.581431,77.452819,2,2,3.0,,Resale,,Semi-Furnished,,ek murti chowk,Flat,\n \n \n...,4254.716981
7735,7735,7000000,"Crossings Republik, Ghaziabad, Delhi NCR",1898,28.625850,77.435336,4,3,5.0,Ready to Move,Resale,,,,,Flat,\n \n \n...,3688.092729
7736,7736,6500000,"Raj Nagar Extension, Ghaziabad, Raj Nagar Exte...",1400,28.701622,77.430153,3,3,2.0,Ready to Move,Resale,1.0,,3.0,vvip mall,Flat,\n \n \n...,4642.857143


In [4]:
df.shape

(7738, 18)

Drop features that are not required to build our model

In [5]:
df=df.drop(['Unnamed: 0','latitude','longitude','Status','desc','Landmarks','neworold','Balcony','parking','Lift'],axis=1)
df

Unnamed: 0,price,Address,area,Bedrooms,Bathrooms,Furnished_status,type_of_building,Price_sqft
0,5600000,"Noida Extension, Noida, Delhi NCR",1350,3,3,,Flat,4148.148148
1,8800000,"Sector 79, Gurgaon, Delhi NCR",1490,3,3,Semi-Furnished,Flat,5906.040268
2,16500000,"Vaishali, Ghaziabad, Delhi NCR",2385,4,5,Unfurnished,Flat,6918.238994
3,3810000,"Link Road, F Block, Sector 50, Noida, Uttar Pr...",1050,2,2,Unfurnished,Flat,3628.571429
4,6200000,"Jaypee Pavilion Court Sector 128, Noida, Secto...",1350,2,2,,Flat,4592.592593
...,...,...,...,...,...,...,...,...
7733,7900000,"Indirapuram, Ghaziabad, Delhi NCR",1095,2,2,,Flat,7214.611872
7734,4510000,"Greater Noida, Sector 2, Greater Noida, Delhi NCR",1060,2,2,Semi-Furnished,Flat,4254.716981
7735,7000000,"Crossings Republik, Ghaziabad, Delhi NCR",1898,4,3,,Flat,3688.092729
7736,6500000,"Raj Nagar Extension, Ghaziabad, Raj Nagar Exte...",1400,3,3,,Flat,4642.857143


In [6]:
df.groupby('type_of_building')['type_of_building'].agg('count')

type_of_building
Flat                6226
Individual House    1512
Name: type_of_building, dtype: int64

### Data Cleaning: Handle NA values

In [7]:
df.isnull().sum()

price                  0
Address                0
area                   0
Bedrooms               0
Bathrooms              0
Furnished_status    3614
type_of_building       0
Price_sqft             0
dtype: int64

In [8]:
df=df.fillna('Unknown')
df

Unnamed: 0,price,Address,area,Bedrooms,Bathrooms,Furnished_status,type_of_building,Price_sqft
0,5600000,"Noida Extension, Noida, Delhi NCR",1350,3,3,Unknown,Flat,4148.148148
1,8800000,"Sector 79, Gurgaon, Delhi NCR",1490,3,3,Semi-Furnished,Flat,5906.040268
2,16500000,"Vaishali, Ghaziabad, Delhi NCR",2385,4,5,Unfurnished,Flat,6918.238994
3,3810000,"Link Road, F Block, Sector 50, Noida, Uttar Pr...",1050,2,2,Unfurnished,Flat,3628.571429
4,6200000,"Jaypee Pavilion Court Sector 128, Noida, Secto...",1350,2,2,Unknown,Flat,4592.592593
...,...,...,...,...,...,...,...,...
7733,7900000,"Indirapuram, Ghaziabad, Delhi NCR",1095,2,2,Unknown,Flat,7214.611872
7734,4510000,"Greater Noida, Sector 2, Greater Noida, Delhi NCR",1060,2,2,Semi-Furnished,Flat,4254.716981
7735,7000000,"Crossings Republik, Ghaziabad, Delhi NCR",1898,4,3,Unknown,Flat,3688.092729
7736,6500000,"Raj Nagar Extension, Ghaziabad, Raj Nagar Exte...",1400,3,3,Unknown,Flat,4642.857143


In [9]:
df.isnull().sum()

price               0
Address             0
area                0
Bedrooms            0
Bathrooms           0
Furnished_status    0
type_of_building    0
Price_sqft          0
dtype: int64

In [10]:
df.dtypes

price                 int64
Address              object
area                  int64
Bedrooms              int64
Bathrooms             int64
Furnished_status     object
type_of_building     object
Price_sqft          float64
dtype: object