#  Load Essential libraries

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns


# Load Data

In [2]:
df=pd.read_csv("F:\\price.csv")
df.shape

(13320, 9)

In [3]:
pd.set_option("display.max_columns",None)

In [4]:
df.head()

Unnamed: 0,area_type,availability,location,size,society,total_sqft,bath,balcony,price
0,Super built-up Area,19-Dec,Electronic City Phase II,2 BHK,Coomee,1056,2.0,1.0,39.07
1,Plot Area,Ready To Move,Chikka Tirupathi,4 Bedroom,Theanmp,2600,5.0,3.0,120.0
2,Built-up Area,Ready To Move,Uttarahalli,3 BHK,,1440,2.0,3.0,62.0
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3 BHK,Soiewre,1521,3.0,1.0,95.0
4,Super built-up Area,Ready To Move,Kothanur,2 BHK,,1200,2.0,1.0,51.0


#  Data Analysing

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13320 entries, 0 to 13319
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   area_type     13320 non-null  object 
 1   availability  13320 non-null  object 
 2   location      13319 non-null  object 
 3   size          13304 non-null  object 
 4   society       7818 non-null   object 
 5   total_sqft    13320 non-null  object 
 6   bath          13247 non-null  float64
 7   balcony       12711 non-null  float64
 8   price         13320 non-null  float64
dtypes: float64(3), object(6)
memory usage: 936.7+ KB


In [6]:
df.isnull().sum()

area_type          0
availability       0
location           1
size              16
society         5502
total_sqft         0
bath              73
balcony          609
price              0
dtype: int64

In [7]:
df.describe()

Unnamed: 0,bath,balcony,price
count,13247.0,12711.0,13320.0
mean,2.69261,1.584376,112.565627
std,1.341458,0.817263,148.971674
min,1.0,0.0,8.0
25%,2.0,1.0,50.0
50%,2.0,2.0,72.0
75%,3.0,2.0,120.0
max,40.0,3.0,3600.0


In [8]:
df.sample()

Unnamed: 0,area_type,availability,location,size,society,total_sqft,bath,balcony,price
11047,Super built-up Area,Ready To Move,Hennur Road,3 BHK,ChionCo,1748,3.0,,103.0


#  Data preprocessing

In [9]:
columns=df.isnull().sum()/df.shape[0]*100
columns

area_type        0.000000
availability     0.000000
location         0.007508
size             0.120120
society         41.306306
total_sqft       0.000000
bath             0.548048
balcony          4.572072
price            0.000000
dtype: float64

In [10]:
drop_columns=columns[columns>5].keys()

In [11]:
df1=df.drop(columns=drop_columns)
df1.shape

(13320, 8)

In [12]:
cols=df.isnull().sum()[df.isnull().sum()>5].keys()
cols

Index(['size', 'society', 'bath', 'balcony'], dtype='object')

In [13]:
df1.isnull().sum()

area_type         0
availability      0
location          1
size             16
total_sqft        0
bath             73
balcony         609
price             0
dtype: int64

## Imputation of Numerical Values By Scikit Learn

In [14]:
num_var=df1.select_dtypes(include=["int64","float64"]).keys()
num_var

Index(['bath', 'balcony', 'price'], dtype='object')

In [15]:
from sklearn.impute import SimpleImputer

In [16]:
si= SimpleImputer(
    strategy='mean',
    fill_value=None,
    verbose=0,
    copy=True,
    add_indicator=False)

In [17]:
si.fit(df1[num_var])

SimpleImputer()

In [18]:
df1[num_var]=si.transform(df1[num_var])

In [19]:
df1[num_var].isnull().sum()

bath       0
balcony    0
price      0
dtype: int64

In [20]:
df1.isnull().sum()

area_type        0
availability     0
location         1
size            16
total_sqft       0
bath             0
balcony          0
price            0
dtype: int64

## Imputation of Categorical Values By scikit Learn 

In [21]:
cat_var=df1.select_dtypes(include=["object"]).keys()
cat_var

Index(['area_type', 'availability', 'location', 'size', 'total_sqft'], dtype='object')

In [22]:
si_cat=SimpleImputer(strategy='most_frequent',
    fill_value=None,
    verbose=0,
    copy=True,
    add_indicator=False,
)

In [23]:
si_cat.fit(df1[cat_var])

SimpleImputer(strategy='most_frequent')

In [24]:
df1[cat_var]=si_cat.transform(df1[cat_var])

In [25]:
df1.isnull().sum()

area_type       0
availability    0
location        0
size            0
total_sqft      0
bath            0
balcony         0
price           0
dtype: int64

In [26]:
df1.head()

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price
0,Super built-up Area,19-Dec,Electronic City Phase II,2 BHK,1056,2.0,1.0,39.07
1,Plot Area,Ready To Move,Chikka Tirupathi,4 Bedroom,2600,5.0,3.0,120.0
2,Built-up Area,Ready To Move,Uttarahalli,3 BHK,1440,2.0,3.0,62.0
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3 BHK,1521,3.0,1.0,95.0
4,Super built-up Area,Ready To Move,Kothanur,2 BHK,1200,2.0,1.0,51.0


#  Feature Engineering

In [27]:
# Convert Total_sqft into numerical 

##  conversion of total_sqft

In [28]:
total_sqft_int=[]
for str_val in df1["total_sqft"]:
    try:
        total_sqft_int.append(float(str_val))
    except:
        try:
            temp=[]
            temp=str_val.split("_")
            total_sqft_int.append((float(temp[0]))+(float(temp[1]))/2)
            
        except:
             total_sqft_int.append(np.nan)

In [29]:
df2=df1.reset_index(drop=True)

In [30]:
df3=df2.join(pd.DataFrame({"total_sqft_int":total_sqft_int}))

In [31]:
df3.head()

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,total_sqft_int
0,Super built-up Area,19-Dec,Electronic City Phase II,2 BHK,1056,2.0,1.0,39.07,1056.0
1,Plot Area,Ready To Move,Chikka Tirupathi,4 Bedroom,2600,5.0,3.0,120.0,2600.0
2,Built-up Area,Ready To Move,Uttarahalli,3 BHK,1440,2.0,3.0,62.0,1440.0
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3 BHK,1521,3.0,1.0,95.0,1521.0
4,Super built-up Area,Ready To Move,Kothanur,2 BHK,1200,2.0,1.0,51.0,1200.0


###  Now size has More Counts

In [32]:
bhk_int=[]
for str_val in df3["size"]:
        temp=[]
        temp=str_val.split(" ")
        try:
            bhk_int.append(int(temp[0]))
        except:
             bhk_int.append(str)

In [33]:
df4=df3.reset_index(drop=True)

In [34]:
df5=df4.join(pd.DataFrame({"bhk":bhk_int}))

In [35]:
df5.head()

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,total_sqft_int,bhk
0,Super built-up Area,19-Dec,Electronic City Phase II,2 BHK,1056,2.0,1.0,39.07,1056.0,2
1,Plot Area,Ready To Move,Chikka Tirupathi,4 Bedroom,2600,5.0,3.0,120.0,2600.0,4
2,Built-up Area,Ready To Move,Uttarahalli,3 BHK,1440,2.0,3.0,62.0,1440.0,3
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3 BHK,1521,3.0,1.0,95.0,1521.0,3
4,Super built-up Area,Ready To Move,Kothanur,2 BHK,1200,2.0,1.0,51.0,1200.0,2


In [36]:
df5.isnull().sum()

area_type           0
availability        0
location            0
size                0
total_sqft          0
bath                0
balcony             0
price               0
total_sqft_int    247
bhk                 0
dtype: int64

In [37]:
df6=df5.dropna()

In [38]:
df6.isnull().sum()

area_type         0
availability      0
location          0
size              0
total_sqft        0
bath              0
balcony           0
price             0
total_sqft_int    0
bhk               0
dtype: int64

In [39]:
df6.head()

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,total_sqft_int,bhk
0,Super built-up Area,19-Dec,Electronic City Phase II,2 BHK,1056,2.0,1.0,39.07,1056.0,2
1,Plot Area,Ready To Move,Chikka Tirupathi,4 Bedroom,2600,5.0,3.0,120.0,2600.0,4
2,Built-up Area,Ready To Move,Uttarahalli,3 BHK,1440,2.0,3.0,62.0,1440.0,3
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3 BHK,1521,3.0,1.0,95.0,1521.0,3
4,Super built-up Area,Ready To Move,Kothanur,2 BHK,1200,2.0,1.0,51.0,1200.0,2


#  Finding Outliers

In [40]:
df6["price_per_sqft"]=df6["price"]*100000/df6["total_sqft_int"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df6["price_per_sqft"]=df6["price"]*100000/df6["total_sqft_int"]


In [41]:
df6.head()

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,total_sqft_int,bhk,price_per_sqft
0,Super built-up Area,19-Dec,Electronic City Phase II,2 BHK,1056,2.0,1.0,39.07,1056.0,2,3699.810606
1,Plot Area,Ready To Move,Chikka Tirupathi,4 Bedroom,2600,5.0,3.0,120.0,2600.0,4,4615.384615
2,Built-up Area,Ready To Move,Uttarahalli,3 BHK,1440,2.0,3.0,62.0,1440.0,3,4305.555556
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3 BHK,1521,3.0,1.0,95.0,1521.0,3,6245.890861
4,Super built-up Area,Ready To Move,Kothanur,2 BHK,1200,2.0,1.0,51.0,1200.0,2,4250.0


In [42]:
df6["price_per_sqft"].describe()

count    1.307300e+04
mean     7.949600e+03
std      1.072440e+05
min      2.678298e+02
25%      4.265734e+03
50%      5.454545e+03
75%      7.338057e+03
max      1.200000e+07
Name: price_per_sqft, dtype: float64

#  Outlier Detection

In [43]:
df6[df6["total_sqft_int"]/df6["bhk"]<400]

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,total_sqft_int,bhk,price_per_sqft
9,Plot Area,Ready To Move,Gandhi Bazar,6 Bedroom,1020,6.0,1.584376,370.0,1020.0,6,36274.509804
16,Super built-up Area,Ready To Move,Bisuvanahalli,3 BHK,1180,3.0,2.000000,48.0,1180.0,3,4067.796610
26,Super built-up Area,Ready To Move,Electronic City,2 BHK,660,1.0,1.000000,23.1,660.0,2,3500.000000
29,Super built-up Area,Ready To Move,Electronic City,3 BHK,1025,2.0,1.000000,47.0,1025.0,3,4585.365854
31,Super built-up Area,Ready To Move,Bisuvanahalli,3 BHK,1075,2.0,1.000000,35.0,1075.0,3,3255.813953
...,...,...,...,...,...,...,...,...,...,...,...
13281,Plot Area,Ready To Move,Margondanahalli,5 Bedroom,1375,5.0,1.000000,125.0,1375.0,5,9090.909091
13300,Plot Area,Ready To Move,Hosakerehalli,5 Bedroom,1500,6.0,2.000000,145.0,1500.0,5,9666.666667
13303,Plot Area,Ready To Move,Vidyaranyapura,5 Bedroom,774,5.0,3.000000,70.0,774.0,5,9043.927649
13306,Plot Area,Ready To Move,Rajarajeshwari Nagara,4 Bedroom,1200,5.0,1.584376,325.0,1200.0,4,27083.333333


In [44]:
df7=df6[df6["total_sqft_int"]/df6["bhk"]>400]

In [45]:
df7.shape

(11334, 11)

#  Outlier Detection By Standard deviation Method

In [46]:
mean=df7.price_per_sqft.mean()
mean

6140.340219359961

In [47]:
stdd=df7.price_per_sqft.std()
stdd

3972.0605191517147

In [48]:
lower_limit=df7[df7.price_per_sqft<(mean-stdd)]
lower_limit

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,total_sqft_int,bhk,price_per_sqft
132,Super built-up Area,Ready To Move,Electronic City,2 BHK,880,1.0,1.0,16.5,880.0,2,1875.0
514,Plot Area,Ready To Move,Banashankari Stage III,4 Bedroom,8500,4.0,2.0,145.0,8500.0,4,1705.882353
674,Built-up Area,Ready To Move,Yelahanka,3 BHK,35000,3.0,3.0,130.0,35000.0,3,371.428571
767,Plot Area,Ready To Move,Sarjapur,5 Bedroom,4360,4.0,1.0,90.0,4360.0,5,2064.220183
810,Plot Area,18-Apr,4 Bedroom Farm House in Bagalur,4 Bedroom,10961,4.0,1.0,80.0,10961.0,4,729.860414
996,Plot Area,Ready To Move,Chikkabanavar,1 Bedroom,1200,1.0,0.0,20.0,1200.0,1,1666.666667
1894,Plot Area,Ready To Move,Nelamangala,3 Bedroom,52272,2.0,1.0,140.0,52272.0,3,267.829813
2404,Super built-up Area,Ready To Move,Yelahanka New Town,1 BHK,960,2.0,1.0,18.0,960.0,1,1875.0
2421,Plot Area,Ready To Move,Basavanagara,4 Bedroom,2000,3.0,2.0,25.0,2000.0,4,1250.0
3363,Super built-up Area,Ready To Move,Kaval Byrasandra,3 BHK,2400,2.0,0.0,50.0,2400.0,3,2083.333333


In [49]:
upper_limit=df7[df7.price_per_sqft>(mean+stdd)]
upper_limit

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,total_sqft_int,bhk,price_per_sqft
7,Super built-up Area,Ready To Move,Rajaji Nagar,4 BHK,3300,4.0,1.584376,600.0,3300.0,4,18181.818182
11,Plot Area,Ready To Move,Whitefield,4 Bedroom,2785,5.0,3.000000,295.0,2785.0,4,10592.459605
18,Super built-up Area,Ready To Move,Ramakrishnappa Layout,3 BHK,2770,4.0,2.000000,290.0,2770.0,3,10469.314079
22,Plot Area,Ready To Move,Thanisandra,4 Bedroom,2800,5.0,2.000000,380.0,2800.0,4,13571.428571
57,Super built-up Area,Ready To Move,Ramakrishnappa Layout,2 BHK,1500,2.0,2.000000,185.0,1500.0,2,12333.333333
...,...,...,...,...,...,...,...,...,...,...,...
13290,Super built-up Area,Ready To Move,Sarjapur Road,4 BHK,4050,2.0,1.000000,450.0,4050.0,4,11111.111111
13296,Super built-up Area,Ready To Move,Cox Town,2 BHK,1200,2.0,2.000000,140.0,1200.0,2,11666.666667
13305,Carpet Area,Ready To Move,Hulimavu,1 BHK,500,1.0,3.000000,220.0,500.0,1,44000.000000
13316,Super built-up Area,Ready To Move,Richards Town,4 BHK,3600,5.0,1.584376,400.0,3600.0,4,11111.111111


In [50]:
df8=df7[(df7.price_per_sqft<(mean+stdd))& (df7.price_per_sqft>(mean-stdd))]

In [51]:
df8.head()

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,total_sqft_int,bhk,price_per_sqft
0,Super built-up Area,19-Dec,Electronic City Phase II,2 BHK,1056,2.0,1.0,39.07,1056.0,2,3699.810606
1,Plot Area,Ready To Move,Chikka Tirupathi,4 Bedroom,2600,5.0,3.0,120.0,2600.0,4,4615.384615
2,Built-up Area,Ready To Move,Uttarahalli,3 BHK,1440,2.0,3.0,62.0,1440.0,3,4305.555556
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3 BHK,1521,3.0,1.0,95.0,1521.0,3,6245.890861
4,Super built-up Area,Ready To Move,Kothanur,2 BHK,1200,2.0,1.0,51.0,1200.0,2,4250.0


In [52]:
df8.shape

(10375, 11)

## bath

In [53]:
df8.bath.unique()

array([ 2.        ,  5.        ,  3.        ,  4.        ,  1.        ,
        6.        ,  7.        ,  8.        ,  2.69260965,  9.        ,
       12.        , 16.        , 10.        , 13.        ])

In [54]:
df8[ df8.bath > df8.bhk+1 ]

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,total_sqft_int,bhk,price_per_sqft
84,Super built-up Area,Ready To Move,EPIP Zone,3 BHK,1499,5.00000,2.000000,102.00,1499.0,3,6804.536358
337,Super built-up Area,Ready To Move,Thigalarapalya,4 BHK,3122,6.00000,2.000000,230.00,3122.0,4,7367.072389
344,Super built-up Area,21-Dec,Kanakpura Road,1 BHK,525,2.69261,1.584376,21.53,525.0,1,4100.952381
490,Super built-up Area,Ready To Move,Old Madras Road,5 BHK,4500,7.00000,3.000000,337.00,4500.0,5,7488.888889
524,Super built-up Area,17-Dec,Jakkur,4 BHK,5230,6.00000,1.000000,465.00,5230.0,4,8891.013384
...,...,...,...,...,...,...,...,...,...,...,...
12103,Super built-up Area,Ready To Move,Thanisandra,3 BHK,1806,6.00000,2.000000,116.00,1806.0,3,6423.034330
12192,Super built-up Area,Ready To Move,Thigalarapalya,4 BHK,3122,6.00000,2.000000,250.00,3122.0,4,8007.687380
12366,Plot Area,Ready To Move,Dodsworth Layout,3 Bedroom,5656,5.00000,0.000000,499.00,5656.0,3,8822.489392
13095,Super built-up Area,Ready To Move,Sathya Sai Layout,4 BHK,6652,6.00000,1.000000,660.00,6652.0,4,9921.828022


In [55]:
df8[ df8.bath >df8.bhk ]

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,total_sqft_int,bhk,price_per_sqft
1,Plot Area,Ready To Move,Chikka Tirupathi,4 Bedroom,2600,5.0,3.000000,120.00,2600.0,4,4615.384615
84,Super built-up Area,Ready To Move,EPIP Zone,3 BHK,1499,5.0,2.000000,102.00,1499.0,3,6804.536358
85,Built-up Area,Ready To Move,Hegde Nagar,6 Bedroom,3000,7.0,2.000000,210.00,3000.0,6,7000.000000
150,Super built-up Area,19-Apr,Mysore Road,3 BHK,1710,4.0,2.000000,91.31,1710.0,3,5339.766082
153,Super built-up Area,Ready To Move,Sanjeevini Nagar,3 BHK,2795,4.0,1.584376,235.00,2795.0,3,8407.871199
...,...,...,...,...,...,...,...,...,...,...,...
13208,Super built-up Area,Ready To Move,Hebbal,4 BHK,4000,6.0,1.000000,370.00,4000.0,4,9250.000000
13229,Built-up Area,Ready To Move,Ambedkar Nagar,3 BHK,2395,4.0,2.000000,150.00,2395.0,3,6263.048017
13231,Super built-up Area,Ready To Move,Thigalarapalya,3 BHK,2215,4.0,2.000000,152.00,2215.0,3,6862.302483
13268,Super built-up Area,18-Apr,EPIP Zone,4 BHK,3360,5.0,2.000000,221.00,3360.0,4,6577.380952


In [56]:
df9=df8[ df8.bath <= df8.bhk ]

In [57]:
df9.shape

(9855, 11)

In [58]:
df9["bhk"].unique()

array([ 2,  3,  4,  1,  6,  8,  5,  7, 11,  9, 16, 13, 10], dtype=int64)

In [59]:
df10=df9[df9["bhk"]<9]

In [60]:
df10["balcony"].unique()

array([1.        , 3.        , 1.58437574, 2.        , 0.        ])

In [61]:
df11=df10[df10["balcony"]>=1]

In [62]:
df11.shape

(9337, 11)

In [63]:
df11.head()

Unnamed: 0,area_type,availability,location,size,total_sqft,bath,balcony,price,total_sqft_int,bhk,price_per_sqft
0,Super built-up Area,19-Dec,Electronic City Phase II,2 BHK,1056,2.0,1.0,39.07,1056.0,2,3699.810606
2,Built-up Area,Ready To Move,Uttarahalli,3 BHK,1440,2.0,3.0,62.0,1440.0,3,4305.555556
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3 BHK,1521,3.0,1.0,95.0,1521.0,3,6245.890861
4,Super built-up Area,Ready To Move,Kothanur,2 BHK,1200,2.0,1.0,51.0,1200.0,2,4250.0
5,Super built-up Area,Ready To Move,Whitefield,2 BHK,1170,2.0,1.0,38.0,1170.0,2,3247.863248


# Categorical Encoding

In [64]:
df12=df11.drop(columns=["size","total_sqft"])

In [65]:
df12

Unnamed: 0,area_type,availability,location,bath,balcony,price,total_sqft_int,bhk,price_per_sqft
0,Super built-up Area,19-Dec,Electronic City Phase II,2.0,1.0,39.07,1056.0,2,3699.810606
2,Built-up Area,Ready To Move,Uttarahalli,2.0,3.0,62.00,1440.0,3,4305.555556
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3.0,1.0,95.00,1521.0,3,6245.890861
4,Super built-up Area,Ready To Move,Kothanur,2.0,1.0,51.00,1200.0,2,4250.000000
5,Super built-up Area,Ready To Move,Whitefield,2.0,1.0,38.00,1170.0,2,3247.863248
...,...,...,...,...,...,...,...,...,...
13312,Super built-up Area,Ready To Move,Bellandur,2.0,2.0,47.00,1262.0,2,3724.247227
13313,Super built-up Area,Ready To Move,Uttarahalli,2.0,1.0,57.00,1345.0,3,4237.918216
13314,Super built-up Area,Ready To Move,Green Glen Layout,3.0,3.0,112.00,1715.0,3,6530.612245
13317,Built-up Area,Ready To Move,Raja Rajeshwari Nagar,2.0,1.0,60.00,1141.0,2,5258.545136


#  working on area_type feature

In [66]:
df12["area_type"].value_counts()

Super built-up  Area    7280
Built-up  Area          1673
Plot  Area               331
Carpet  Area              53
Name: area_type, dtype: int64

In [67]:
df15=df12.copy()
for cat_var in ["Super built-up  Area","Built-up  Area","Plot  Area " ]:
    df15["area_type"+cat_var]=np.where(df15["area_type"]==cat_var , 1,0)

In [68]:
df15.shape

(9337, 12)

In [69]:
df15.head()

Unnamed: 0,area_type,availability,location,bath,balcony,price,total_sqft_int,bhk,price_per_sqft,area_typeSuper built-up Area,area_typeBuilt-up Area,area_typePlot Area
0,Super built-up Area,19-Dec,Electronic City Phase II,2.0,1.0,39.07,1056.0,2,3699.810606,1,0,0
2,Built-up Area,Ready To Move,Uttarahalli,2.0,3.0,62.0,1440.0,3,4305.555556,0,1,0
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3.0,1.0,95.0,1521.0,3,6245.890861,1,0,0
4,Super built-up Area,Ready To Move,Kothanur,2.0,1.0,51.0,1200.0,2,4250.0,1,0,0
5,Super built-up Area,Ready To Move,Whitefield,2.0,1.0,38.0,1170.0,2,3247.863248,1,0,0


#  working on availability

In [70]:
df15["availability"].value_counts()

Ready To Move    7257
18-Dec            231
18-May            230
18-Apr            211
18-Aug            177
                 ... 
14-Nov              1
17-Feb              1
15-Aug              1
20-Feb              1
14-Jul              1
Name: availability, Length: 75, dtype: int64

In [71]:
df15["availability_Ready To Move"]=np.where(df15["availability"]=="Ready To Move",1,0)

In [72]:
df15.head()

Unnamed: 0,area_type,availability,location,bath,balcony,price,total_sqft_int,bhk,price_per_sqft,area_typeSuper built-up Area,area_typeBuilt-up Area,area_typePlot Area,availability_Ready To Move
0,Super built-up Area,19-Dec,Electronic City Phase II,2.0,1.0,39.07,1056.0,2,3699.810606,1,0,0,0
2,Built-up Area,Ready To Move,Uttarahalli,2.0,3.0,62.0,1440.0,3,4305.555556,0,1,0,1
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3.0,1.0,95.0,1521.0,3,6245.890861,1,0,0,1
4,Super built-up Area,Ready To Move,Kothanur,2.0,1.0,51.0,1200.0,2,4250.0,1,0,0,1
5,Super built-up Area,Ready To Move,Whitefield,2.0,1.0,38.0,1170.0,2,3247.863248,1,0,0,1


#  Working on Location

In [73]:
location_value_count=df15["location"].value_counts()
location_value_count

Whitefield                                      410
Sarjapur  Road                                  301
Electronic City                                 248
Kanakpura Road                                  214
Thanisandra                                     196
                                               ... 
8th Block Jayanagar                               1
Lakshminarayanapura, Electronic City Phase 2      1
Cottonpet                                         1
Gokaula Extension                                 1
Duvasapalya                                       1
Name: location, Length: 953, dtype: int64

In [74]:
location_get_20=location_value_count[location_value_count>=20].index

In [75]:
location_get_20

Index(['Whitefield', 'Sarjapur  Road', 'Electronic City', 'Kanakpura Road',
       'Thanisandra', 'Yelahanka', 'Uttarahalli', 'Raja Rajeshwari Nagar',
       'Marathahalli', 'Hennur Road',
       ...
       'Babusapalaya', 'Choodasandra', 'Kammasandra', 'Kathriguppe',
       'Singasandra', 'Thubarahalli', 'Sonnenahalli', 'Anekal', 'Gunjur',
       'Amruthahalli'],
      dtype='object', length=106)

In [76]:
df16=df15.copy()
for cat_var in location_get_20:
    df16["location"+cat_var]=np.where(df16["location"]==cat_var,1,0)

In [77]:
df16.head()

Unnamed: 0,area_type,availability,location,bath,balcony,price,total_sqft_int,bhk,price_per_sqft,area_typeSuper built-up Area,area_typeBuilt-up Area,area_typePlot Area,availability_Ready To Move,locationWhitefield,locationSarjapur Road,locationElectronic City,locationKanakpura Road,locationThanisandra,locationYelahanka,locationUttarahalli,locationRaja Rajeshwari Nagar,locationMarathahalli,locationHennur Road,locationHaralur Road,locationHebbal,location7th Phase JP Nagar,locationBannerghatta Road,locationElectronic City Phase II,locationBellandur,locationBegur Road,locationElectronics City Phase 1,locationHarlur,locationHoodi,locationKasavanhalli,locationYeshwanthpur,locationChandapura,locationVarthur,locationHosa Road,locationKR Puram,locationKothanur,locationKaggadasapura,locationJakkur,locationBanashankari,locationHormavu,locationSarjapur,locationRachenahalli,locationOld Madras Road,locationHennur,locationPanathur,locationAkshaya Nagar,locationJP Nagar,locationHSR Layout,locationBudigere,locationRamagondanahalli,locationBalagere,locationJigani,locationBhoganhalli,locationGottigere,locationHosur Road,location8th Phase JP Nagar,locationVittasandra,locationKengeri,locationKanakapura,locationKundalahalli,locationSubramanyapura,locationRamamurthy Nagar,locationGreen Glen Layout,locationJalahalli,locationBrookefield,locationHulimavu,locationMysore Road,locationCV Raman Nagar,locationHoramavu Agara,locationLakshminarayana Pura,locationKudlu Gate,locationTalaghattapura,locationBommasandra,locationKoramangala,location5th Phase JP Nagar,locationMahadevpura,locationChannasandra,location9th Phase JP Nagar,locationKalena Agrahara,locationKadugodi,locationOld Airport Road,locationDoddathoguru,locationKudlu,locationNagarbhavi,locationBommanahalli,locationYelahanka New Town,locationSomasundara Palya,locationChikkalasandra,locationSahakara Nagar,locationHegde Nagar,locationTumkur Road,locationAmbalipura,locationDevanahalli,locationDodda Nekkundi,locationAnanth Nagar,locationThigalarapalya,locationGubbalala,locationArdendale,locationBommasandra Industrial Area,locationKodichikkanahalli,locationAttibele,locationHoramavu Banaswadi,locationVijayanagar,locationKengeri Satellite Town,locationLingadheeranahalli,locationBabusapalaya,locationChoodasandra,locationKammasandra,locationKathriguppe,locationSingasandra,locationThubarahalli,locationSonnenahalli,locationAnekal,locationGunjur,locationAmruthahalli
0,Super built-up Area,19-Dec,Electronic City Phase II,2.0,1.0,39.07,1056.0,2,3699.810606,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Built-up Area,Ready To Move,Uttarahalli,2.0,3.0,62.0,1440.0,3,4305.555556,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3.0,1.0,95.0,1521.0,3,6245.890861,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,Super built-up Area,Ready To Move,Kothanur,2.0,1.0,51.0,1200.0,2,4250.0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Super built-up Area,Ready To Move,Whitefield,2.0,1.0,38.0,1170.0,2,3247.863248,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#  Drop Categorical_value

In [78]:
df17=df16.drop(columns=["availability","area_type","location"],axis=1)

In [79]:
df17.head()

Unnamed: 0,bath,balcony,price,total_sqft_int,bhk,price_per_sqft,area_typeSuper built-up Area,area_typeBuilt-up Area,area_typePlot Area,availability_Ready To Move,locationWhitefield,locationSarjapur Road,locationElectronic City,locationKanakpura Road,locationThanisandra,locationYelahanka,locationUttarahalli,locationRaja Rajeshwari Nagar,locationMarathahalli,locationHennur Road,locationHaralur Road,locationHebbal,location7th Phase JP Nagar,locationBannerghatta Road,locationElectronic City Phase II,locationBellandur,locationBegur Road,locationElectronics City Phase 1,locationHarlur,locationHoodi,locationKasavanhalli,locationYeshwanthpur,locationChandapura,locationVarthur,locationHosa Road,locationKR Puram,locationKothanur,locationKaggadasapura,locationJakkur,locationBanashankari,locationHormavu,locationSarjapur,locationRachenahalli,locationOld Madras Road,locationHennur,locationPanathur,locationAkshaya Nagar,locationJP Nagar,locationHSR Layout,locationBudigere,locationRamagondanahalli,locationBalagere,locationJigani,locationBhoganhalli,locationGottigere,locationHosur Road,location8th Phase JP Nagar,locationVittasandra,locationKengeri,locationKanakapura,locationKundalahalli,locationSubramanyapura,locationRamamurthy Nagar,locationGreen Glen Layout,locationJalahalli,locationBrookefield,locationHulimavu,locationMysore Road,locationCV Raman Nagar,locationHoramavu Agara,locationLakshminarayana Pura,locationKudlu Gate,locationTalaghattapura,locationBommasandra,locationKoramangala,location5th Phase JP Nagar,locationMahadevpura,locationChannasandra,location9th Phase JP Nagar,locationKalena Agrahara,locationKadugodi,locationOld Airport Road,locationDoddathoguru,locationKudlu,locationNagarbhavi,locationBommanahalli,locationYelahanka New Town,locationSomasundara Palya,locationChikkalasandra,locationSahakara Nagar,locationHegde Nagar,locationTumkur Road,locationAmbalipura,locationDevanahalli,locationDodda Nekkundi,locationAnanth Nagar,locationThigalarapalya,locationGubbalala,locationArdendale,locationBommasandra Industrial Area,locationKodichikkanahalli,locationAttibele,locationHoramavu Banaswadi,locationVijayanagar,locationKengeri Satellite Town,locationLingadheeranahalli,locationBabusapalaya,locationChoodasandra,locationKammasandra,locationKathriguppe,locationSingasandra,locationThubarahalli,locationSonnenahalli,locationAnekal,locationGunjur,locationAmruthahalli
0,2.0,1.0,39.07,1056.0,2,3699.810606,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2.0,3.0,62.0,1440.0,3,4305.555556,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3.0,1.0,95.0,1521.0,3,6245.890861,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,2.0,1.0,51.0,1200.0,2,4250.0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,2.0,1.0,38.0,1170.0,2,3247.863248,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [80]:
df17.shape

(9337, 116)

In [81]:
df17.isnull().sum()

bath                    0
balcony                 0
price                   0
total_sqft_int          0
bhk                     0
                       ..
locationThubarahalli    0
locationSonnenahalli    0
locationAnekal          0
locationGunjur          0
locationAmruthahalli    0
Length: 116, dtype: int64

In [82]:
df17

Unnamed: 0,bath,balcony,price,total_sqft_int,bhk,price_per_sqft,area_typeSuper built-up Area,area_typeBuilt-up Area,area_typePlot Area,availability_Ready To Move,locationWhitefield,locationSarjapur Road,locationElectronic City,locationKanakpura Road,locationThanisandra,locationYelahanka,locationUttarahalli,locationRaja Rajeshwari Nagar,locationMarathahalli,locationHennur Road,locationHaralur Road,locationHebbal,location7th Phase JP Nagar,locationBannerghatta Road,locationElectronic City Phase II,locationBellandur,locationBegur Road,locationElectronics City Phase 1,locationHarlur,locationHoodi,locationKasavanhalli,locationYeshwanthpur,locationChandapura,locationVarthur,locationHosa Road,locationKR Puram,locationKothanur,locationKaggadasapura,locationJakkur,locationBanashankari,locationHormavu,locationSarjapur,locationRachenahalli,locationOld Madras Road,locationHennur,locationPanathur,locationAkshaya Nagar,locationJP Nagar,locationHSR Layout,locationBudigere,locationRamagondanahalli,locationBalagere,locationJigani,locationBhoganhalli,locationGottigere,locationHosur Road,location8th Phase JP Nagar,locationVittasandra,locationKengeri,locationKanakapura,locationKundalahalli,locationSubramanyapura,locationRamamurthy Nagar,locationGreen Glen Layout,locationJalahalli,locationBrookefield,locationHulimavu,locationMysore Road,locationCV Raman Nagar,locationHoramavu Agara,locationLakshminarayana Pura,locationKudlu Gate,locationTalaghattapura,locationBommasandra,locationKoramangala,location5th Phase JP Nagar,locationMahadevpura,locationChannasandra,location9th Phase JP Nagar,locationKalena Agrahara,locationKadugodi,locationOld Airport Road,locationDoddathoguru,locationKudlu,locationNagarbhavi,locationBommanahalli,locationYelahanka New Town,locationSomasundara Palya,locationChikkalasandra,locationSahakara Nagar,locationHegde Nagar,locationTumkur Road,locationAmbalipura,locationDevanahalli,locationDodda Nekkundi,locationAnanth Nagar,locationThigalarapalya,locationGubbalala,locationArdendale,locationBommasandra Industrial Area,locationKodichikkanahalli,locationAttibele,locationHoramavu Banaswadi,locationVijayanagar,locationKengeri Satellite Town,locationLingadheeranahalli,locationBabusapalaya,locationChoodasandra,locationKammasandra,locationKathriguppe,locationSingasandra,locationThubarahalli,locationSonnenahalli,locationAnekal,locationGunjur,locationAmruthahalli
0,2.0,1.0,39.07,1056.0,2,3699.810606,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2.0,3.0,62.00,1440.0,3,4305.555556,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3.0,1.0,95.00,1521.0,3,6245.890861,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
4,2.0,1.0,51.00,1200.0,2,4250.000000,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,2.0,1.0,38.00,1170.0,2,3247.863248,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13312,2.0,2.0,47.00,1262.0,2,3724.247227,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13313,2.0,1.0,57.00,1345.0,3,4237.918216,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13314,3.0,3.0,112.00,1715.0,3,6530.612245,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
13317,2.0,1.0,60.00,1141.0,2,5258.545136,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


#  Split data

In [83]:
from sklearn.model_selection import train_test_split

In [84]:
x=df17.drop(["price"],axis=1)
y=df17["price"]
print("shape of x is : ",x.shape)
print("shape of y is : ",y.shape)

shape of x is :  (9337, 115)
shape of y is :  (9337,)


In [85]:
x_train,x_test,y_train,y_test=train_test_split(x,y, test_size=0.2, random_state=51)
print("shape of x_train is : ", x_train.shape)
print("shape of x_test is : ",  x_test.shape)
print("shape of y_train is : ", y_train.shape)
print("shape of y_test is : ",  y_test.shape)

shape of x_train is :  (7469, 115)
shape of x_test is :  (1868, 115)
shape of y_train is :  (7469,)
shape of y_test is :  (1868,)


# Feature Scaling

In [86]:
from sklearn.preprocessing import StandardScaler

In [87]:
sc=StandardScaler()
sc.fit(x_train)
sc.transform(x_train)
x_train_sc=sc.transform(x_train)
x_test_sc=sc.transform(x_test)

#  Ml model building Algorithms

#  Linear Regression

In [88]:
from sklearn.metrics import accuracy_score

In [89]:
from sklearn.linear_model import LinearRegression

In [90]:
lr=LinearRegression()
lr.fit(x_train,y_train)
lr.score(x_test,y_test)

0.9493585376690146

In [91]:
lr.predict(x_test)

array([ 39.8784055 ,  78.9774651 ,  68.70230355, ..., 130.1802216 ,
        78.43837774,  51.23493119])

In [92]:
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

In [93]:
rid=Ridge()
rid.fit(x_train,y_train)
rid.score(x_test,y_test)

0.9493561970163737

In [94]:
rid.predict(x_test)

array([ 39.92685092,  78.99396172,  68.70336255, ..., 130.19042777,
        78.43702257,  51.27078125])

In [95]:
ls=Lasso()
ls.fit(x_train,y_train)
ls.score(x_test,y_test)

0.9446543992912736

In [96]:
from sklearn.metrics import mean_squared_error

In [97]:
y_pred=lr.predict(x_test)
mse=mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mse)
print(mse)
print(rmse)

132.7490625328177
11.521677939120574


# Support Vector Regression

In [98]:
from sklearn.svm import SVR
svcr=SVR()
svcr.fit(x_train,y_train)
svcr.score(x_test,y_test)


0.8309618512595291

In [99]:
from sklearn.metrics import mean_squared_error

In [100]:
y_pred_svcr=svcr.predict(x_test)
mse=mean_squared_error(y_test,y_pred_svcr)
rmse=np.sqrt(mse)
print(mse)
print(rmse)

443.1083690063705
21.050139405865476


## With Scaled Data

In [101]:
from sklearn.svm import SVR
svcer=SVR()
svcer.fit(x_train_sc,y_train)
svcer.score(x_test_sc,y_test)


0.513543623641996

In [102]:
y_pred_svcer=svcer.predict(x_test_sc)
mse=mean_squared_error(y_test,y_pred_svcer)
rmse=np.sqrt(mse)
print(mse)
print(rmse)

1275.1730489647568
35.709565230687936


# Random Forest Regressor

In [103]:
from sklearn.ensemble import RandomForestRegressor

In [104]:
rfr=RandomForestRegressor()
rfr.fit(x_train,y_train)
rfr.score(x_test,y_test)

0.9636235267001648

In [105]:
y_pred_rfr=rfr.predict(x_test)
mse=mean_squared_error(y_test,y_pred_rfr)
rmse=np.sqrt(mse)
print(rmse)

9.765014856905418


#  Decision Tree Regression

In [106]:
from sklearn.tree import DecisionTreeRegressor

In [107]:
dtr=DecisionTreeRegressor()
dtr.fit(x_train,y_train)
dtr.score(x_test,y_test)

0.9604559306016742

In [108]:
y_pred_dtr=dtr.predict(x_test)
mse=mean_squared_error(y_test,y_pred_dtr)
rmse=np.sqrt(mse)
print(rmse)

10.181301354434378


#  XGBoost Regression

In [109]:
import xgboost as xgb

In [110]:
from xgboost import XGBRegressor

In [111]:
xgb=XGBRegressor()
xgb.fit(x_train,y_train)
xgb.score(x_test,y_test)

0.9626628901206404

In [112]:
y_pred_xgb=xgb.predict(x_test)
mse=mean_squared_error(y_test,y_pred_xgb)
rmse=np.sqrt(mse)
print(rmse)

9.893112810996305


#  Hyper Parameter Tuning using RandomizedSearch

In [113]:
from sklearn.model_selection import RandomizedSearchCV

In [114]:
params={"learning_rate"    : [0.1,0.03,0.05,0.20,0.07],
       "max_depth"         : [4,6,8],
       "min_child_weight"  : [1,3,5] ,
       "gamma"             : [0.0,0.1,0.2,0.001],
       "colsample_bytree"  : [0.7,1,1.5],
       "subsample"         : [0.7,1,1.5],
       "n_estimators "     : [100,300,500],
       "objective"         : ["reg:linear"]}

In [116]:
rscv=RandomizedSearchCV(xgb,param_distributions=params,n_jobs=-1,verbose=True)

In [117]:
rscv.fit(x_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   39.2s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:   47.7s finished


Parameters: { n_estimators  } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




RandomizedSearchCV(estimator=XGBRegressor(base_score=0.5, booster='gbtree',
                                          colsample_bylevel=1,
                                          colsample_bynode=1,
                                          colsample_bytree=1, gamma=0,
                                          gpu_id=-1, importance_type='gain',
                                          interaction_constraints='',
                                          learning_rate=0.300000012,
                                          max_delta_step=0, max_depth=6,
                                          min_child_weight=1, missing=nan,
                                          monotone_constraints='()',
                                          n_estimators=100, n_jobs=0,
                                          num_parallel...
                                          scale_pos_weight=1, subsample=1,
                                          tree_method='exact',
                             

In [118]:
rscv.best_params_

{'subsample': 1,
 'objective': 'reg:linear',
 'n_estimators ': 500,
 'min_child_weight': 1,
 'max_depth': 4,
 'learning_rate': 0.2,
 'gamma': 0.2,
 'colsample_bytree': 1}

In [119]:
rscv.best_estimator_

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0.2, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.2, max_delta_step=0, max_depth=4,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_estimators =500, n_jobs=0, num_parallel_tree=1,
             objective='reg:linear', random_state=0, reg_alpha=0, reg_lambda=1,
             scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [122]:
xgb1=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0.2, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.2, max_delta_step=0, max_depth=4,
             min_child_weight=1, monotone_constraints='()',
             n_estimators=100, n_jobs=0, num_parallel_tree=1,
             objective='reg:linear', random_state=0, reg_alpha=0, reg_lambda=1,
             scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)
xgb1.fit(x_train,y_train)
xgb1.score(x_test,y_test)



0.9705248290398264

In [123]:
y_pred1=xgb1.predict(x_test)
mse=mean_squared_error(y_test,y_pred1)
rmse=np.sqrt(mse)
print(rmse)

8.790038530927397


# Save model

In [124]:
import joblib

In [126]:
joblib.dump(xgb1,"House Price Prediction Model.pkl")
House_Price_Prediction_Model=joblib.load("House Price Prediction Model.pkl")

