In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
%matplotlib inline

In [2]:
df=pd.read_csv("Real_Estate.csv",nrows=5000)
df.head()

Unnamed: 0,area_type,availability,location,size,society,total_sqft,bath,balcony,price
0,Super built-up Area,19-Dec,Electronic City Phase II,2 BHK,Coomee,1056,2.0,1.0,39.07
1,Plot Area,Ready To Move,Chikka Tirupathi,4 Bedroom,Theanmp,2600,5.0,3.0,120.0
2,Built-up Area,Ready To Move,Uttarahalli,3 BHK,,1440,2.0,3.0,62.0
3,Super built-up Area,Ready To Move,Lingadheeranahalli,3 BHK,Soiewre,1521,3.0,1.0,95.0
4,Super built-up Area,Ready To Move,Kothanur,2 BHK,,1200,2.0,1.0,51.0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   area_type     5000 non-null   object 
 1   availability  5000 non-null   object 
 2   location      4999 non-null   object 
 3   size          4995 non-null   object 
 4   society       2922 non-null   object 
 5   total_sqft    5000 non-null   object 
 6   bath          4968 non-null   float64
 7   balcony       4749 non-null   float64
 8   price         5000 non-null   float64
dtypes: float64(3), object(6)
memory usage: 351.7+ KB


In [4]:
df.columns

Index(['area_type', 'availability', 'location', 'size', 'society',
       'total_sqft', 'bath', 'balcony', 'price'],
      dtype='object')

In [5]:
df.groupby('area_type')['area_type'].count()

area_type
Built-up  Area           902
Carpet  Area              40
Plot  Area               778
Super built-up  Area    3280
Name: area_type, dtype: int64

In [6]:
#df.groupby('area_type')['area_type'].agg('count')

In [7]:
df.shape

(5000, 9)

In [8]:
df1=df.drop(['area_type', 'availability','society','balcony'],axis='columns')

In [9]:
df1.head()

Unnamed: 0,location,size,total_sqft,bath,price
0,Electronic City Phase II,2 BHK,1056,2.0,39.07
1,Chikka Tirupathi,4 Bedroom,2600,5.0,120.0
2,Uttarahalli,3 BHK,1440,2.0,62.0
3,Lingadheeranahalli,3 BHK,1521,3.0,95.0
4,Kothanur,2 BHK,1200,2.0,51.0


In [10]:
#data cleaning process starts with na values

In [11]:
df1.isnull().sum()

location       1
size           5
total_sqft     0
bath          32
price          0
dtype: int64

In [12]:
df2=df1.dropna()
df2.head()

Unnamed: 0,location,size,total_sqft,bath,price
0,Electronic City Phase II,2 BHK,1056,2.0,39.07
1,Chikka Tirupathi,4 Bedroom,2600,5.0,120.0
2,Uttarahalli,3 BHK,1440,2.0,62.0
3,Lingadheeranahalli,3 BHK,1521,3.0,95.0
4,Kothanur,2 BHK,1200,2.0,51.0


In [13]:
df2.shape

(4967, 5)

In [14]:
df2['size'].unique()

array(['2 BHK', '4 Bedroom', '3 BHK', '4 BHK', '6 Bedroom', '3 Bedroom',
       '1 BHK', '1 RK', '1 Bedroom', '8 Bedroom', '2 Bedroom',
       '7 Bedroom', '5 BHK', '7 BHK', '6 BHK', '5 Bedroom', '11 BHK',
       '9 BHK', '9 Bedroom', '27 BHK', '10 Bedroom', '11 Bedroom',
       '10 BHK', '19 BHK', '16 BHK', '43 Bedroom', '14 BHK'], dtype=object)

In [15]:
df3=df2.copy()

In [16]:
df3['bhk']=df3['size'].apply(lambda x: int(x.split(' ')[0]))

In [17]:
df3.head(2)

Unnamed: 0,location,size,total_sqft,bath,price,bhk
0,Electronic City Phase II,2 BHK,1056,2.0,39.07,2
1,Chikka Tirupathi,4 Bedroom,2600,5.0,120.0,4


In [18]:
df3['bhk'].unique()

array([ 2,  4,  3,  6,  1,  8,  7,  5, 11,  9, 27, 10, 19, 16, 43, 14],
      dtype=int64)

In [19]:
df3[df3['bhk']>20]

Unnamed: 0,location,size,total_sqft,bath,price,bhk
1718,2Electronic City Phase II,27 BHK,8000,27.0,230.0,27
4684,Munnekollal,43 Bedroom,2400,40.0,660.0,43


In [20]:
df3['total_sqft'].unique()

array(['1056', '2600', '1440', ..., '2493', '992', '3580'], dtype=object)

In [21]:
df3.groupby('total_sqft')['total_sqft'].count()

total_sqft
1                 1
1000             61
1000 - 1285       1
10000             2
1000Sq. Meter     1
                 ..
993               2
995               3
996               1
997               1
999               1
Name: total_sqft, Length: 1311, dtype: int64

In [22]:
def is_float(x):
    try:
        float(x)
    except:
        return False
    return True

In [23]:
df3[df3['total_sqft'].apply(is_float)].head(4)

Unnamed: 0,location,size,total_sqft,bath,price,bhk
0,Electronic City Phase II,2 BHK,1056,2.0,39.07,2
1,Chikka Tirupathi,4 Bedroom,2600,5.0,120.0,4
2,Uttarahalli,3 BHK,1440,2.0,62.0,3
3,Lingadheeranahalli,3 BHK,1521,3.0,95.0,3


In [24]:
df3[~df3['total_sqft'].apply(is_float)].head(4)

Unnamed: 0,location,size,total_sqft,bath,price,bhk
30,Yelahanka,4 BHK,2100 - 2850,4.0,186.0,4
122,Hebbal,4 BHK,3067 - 8156,4.0,477.0,4
137,8th Phase JP Nagar,2 BHK,1042 - 1105,2.0,54.005,2
165,Sarjapur,2 BHK,1145 - 1340,2.0,43.49,2


In [25]:
def to_num(x):
    tokens=x.split("-")
    if len(tokens)==2:
        return (float(tokens[0])+float(tokens[1]))/2
    try:
        return float(x)
    except:
        return None
        

In [26]:
to_num('67')

67.0

In [27]:
to_num('23-45')

34.0

In [28]:
to_num("12st")

In [29]:
df4=df3.copy()

In [30]:
df4['total_sqft']=df4['total_sqft'].apply(to_num)

In [31]:
df4.head()

Unnamed: 0,location,size,total_sqft,bath,price,bhk
0,Electronic City Phase II,2 BHK,1056.0,2.0,39.07,2
1,Chikka Tirupathi,4 Bedroom,2600.0,5.0,120.0,4
2,Uttarahalli,3 BHK,1440.0,2.0,62.0,3
3,Lingadheeranahalli,3 BHK,1521.0,3.0,95.0,3
4,Kothanur,2 BHK,1200.0,2.0,51.0,2


In [32]:
df4.iloc[30]

location      Yelahanka
size              4 BHK
total_sqft       2475.0
bath                4.0
price             186.0
bhk                   4
Name: 30, dtype: object

In [33]:
df4.loc[30]

location      Yelahanka
size              4 BHK
total_sqft       2475.0
bath                4.0
price             186.0
bhk                   4
Name: 30, dtype: object

In [34]:
df4=df4.drop("size",axis='columns')

In [35]:
df4.head()

Unnamed: 0,location,total_sqft,bath,price,bhk
0,Electronic City Phase II,1056.0,2.0,39.07,2
1,Chikka Tirupathi,2600.0,5.0,120.0,4
2,Uttarahalli,1440.0,2.0,62.0,3
3,Lingadheeranahalli,1521.0,3.0,95.0,3
4,Kothanur,1200.0,2.0,51.0,2


# Feature Engineering &  Dimensionality reduction Techniques

In [36]:
#It helps for outlier detection and removal

In [37]:
df5=df4.copy()

In [38]:
df5['price_per_sqft']=df5['price']*100000/df5['total_sqft']

In [39]:
df5.head()

Unnamed: 0,location,total_sqft,bath,price,bhk,price_per_sqft
0,Electronic City Phase II,1056.0,2.0,39.07,2,3699.810606
1,Chikka Tirupathi,2600.0,5.0,120.0,4,4615.384615
2,Uttarahalli,1440.0,2.0,62.0,3,4305.555556
3,Lingadheeranahalli,1521.0,3.0,95.0,3,6245.890861
4,Kothanur,1200.0,2.0,51.0,2,4250.0


In [40]:
df5.location.unique()

array(['Electronic City Phase II', 'Chikka Tirupathi', 'Uttarahalli',
       'Lingadheeranahalli', 'Kothanur', 'Whitefield', 'Old Airport Road',
       'Rajaji Nagar', 'Marathahalli', 'Gandhi Bazar',
       '7th Phase JP Nagar', 'Gottigere', 'Sarjapur', 'Mysore Road',
       'Bisuvanahalli', 'Raja Rajeshwari Nagar', 'Ramakrishnappa Layout',
       'Manayata Tech Park', 'Kengeri', 'Binny Pete', 'Thanisandra',
       'Bellandur', ' Thanisandra', 'Mangammanapalya', 'Electronic City',
       'Ramagondanahalli', 'Yelahanka', 'Hebbal', 'Kasturi Nagar',
       'Kanakpura Road', 'Electronics City Phase 1', 'Kundalahalli',
       'Chikkalasandra', 'Murugeshpalya', 'Sarjapur  Road', 'Ganga Nagar',
       'HSR Layout', 'Doddathoguru', 'KR Puram', 'Himagiri Meadows',
       'Adarsh Nagar', 'Bhoganhalli', 'Lakshminarayana Pura',
       'Begur Road', 'Govindaraja Nagar Ward', 'Tharabanahalli',
       'Varthur', 'Bommanahalli', 'Gunjur', 'Devarachikkanahalli',
       'Double Road', 'Hegde Nagar', 'Ha

In [41]:
len(df5.location.unique())

891

In [42]:
df5.location=df5.location.apply(lambda x: x.strip())
df5.head()

Unnamed: 0,location,total_sqft,bath,price,bhk,price_per_sqft
0,Electronic City Phase II,1056.0,2.0,39.07,2,3699.810606
1,Chikka Tirupathi,2600.0,5.0,120.0,4,4615.384615
2,Uttarahalli,1440.0,2.0,62.0,3,4305.555556
3,Lingadheeranahalli,1521.0,3.0,95.0,3,6245.890861
4,Kothanur,1200.0,2.0,51.0,2,4250.0


In [43]:
location_stats=df5.groupby('location')['location'].count()

In [44]:
location_stats

location
1 Annasandrapalya       1
1 Giri Nagar            1
1 Ramamurthy Nagar      1
1Channasandra           1
1Hanuman Nagar          1
                       ..
sankeswari              1
sapthagiri Layout       1
singapura paradise      1
white field,kadugodi    1
whitefiled              1
Name: location, Length: 885, dtype: int64

In [45]:
#location_stats1=df5.groupby('location')['location'].agg('count')
#location_stats1

In [46]:
location_stats=df5.groupby('location')['location'].count().sort_values(ascending=False)

In [47]:
location_stats

location
Whitefield              188
Sarjapur  Road          127
Electronic City         111
Kanakpura Road          106
Marathahalli             84
                       ... 
Jyothi Nagar              1
Junnasandra               1
Jp nagar 8th Phase .      1
Jinkethimmanahalli        1
whitefiled                1
Name: location, Length: 885, dtype: int64

In [48]:
len(location_stats[location_stats<=10])

769

In [49]:
location_less_than_10=location_stats[location_stats<=10]
location_less_than_10

location
Battarahalli            10
Kambipura               10
Malleshpalya            10
Iblur Village           10
Channasandra            10
                        ..
Jyothi Nagar             1
Junnasandra              1
Jp nagar 8th Phase .     1
Jinkethimmanahalli       1
whitefiled               1
Name: location, Length: 769, dtype: int64

In [50]:
len(location_less_than_10)

769

In [51]:
len(df5.location.unique())

885

In [52]:
df5.location=df5.location.apply(lambda x: 'other' if x in location_less_than_10 else x)

In [53]:
df5.head()

Unnamed: 0,location,total_sqft,bath,price,bhk,price_per_sqft
0,Electronic City Phase II,1056.0,2.0,39.07,2,3699.810606
1,other,2600.0,5.0,120.0,4,4615.384615
2,Uttarahalli,1440.0,2.0,62.0,3,4305.555556
3,Lingadheeranahalli,1521.0,3.0,95.0,3,6245.890861
4,Kothanur,1200.0,2.0,51.0,2,4250.0


In [54]:
df5.groupby('location')['location'].count()

location
5th Phase JP Nagar      13
7th Phase JP Nagar      64
8th Phase JP Nagar      17
9th Phase JP Nagar      14
Akshaya Nagar           27
                      ... 
Yelachenahalli          11
Yelahanka               74
Yelahanka New Town      11
Yeshwanthpur            34
other                 1847
Name: location, Length: 117, dtype: int64

In [55]:
len(df5[df5['total_sqft']/df5['bhk']<300])

281

In [56]:
df5[df5['total_sqft']/df5['bhk']<300]

Unnamed: 0,location,total_sqft,bath,price,bhk,price_per_sqft
9,other,1020.0,6.0,370.0,6,3.627451e+04
45,HSR Layout,600.0,9.0,200.0,8,3.333333e+04
58,other,1407.0,4.0,150.0,6,1.066098e+04
68,other,1350.0,7.0,85.0,8,6.296296e+03
70,other,500.0,3.0,100.0,3,2.000000e+04
...,...,...,...,...,...,...
4916,other,1250.0,15.0,125.0,14,1.000000e+04
4932,Banashankari,500.0,5.0,92.0,5,1.840000e+04
4936,Anandapura,640.0,3.0,45.0,3,7.031250e+03
4957,other,540.0,3.0,60.0,3,1.111111e+04


In [57]:
df5[~(df5['total_sqft']/df5['bhk']<300)]

Unnamed: 0,location,total_sqft,bath,price,bhk,price_per_sqft
0,Electronic City Phase II,1056.0,2.0,39.07,2,3699.810606
1,other,2600.0,5.0,120.00,4,4615.384615
2,Uttarahalli,1440.0,2.0,62.00,3,4305.555556
3,Lingadheeranahalli,1521.0,3.0,95.00,3,6245.890861
4,Kothanur,1200.0,2.0,51.00,2,4250.000000
...,...,...,...,...,...,...
4994,Harlur,1174.0,2.0,75.00,2,6388.415673
4996,other,1100.0,2.0,45.00,2,4090.909091
4997,Raja Rajeshwari Nagar,1095.0,2.0,38.33,2,3500.456621
4998,Banashankari,3580.0,3.0,411.00,3,11480.446927


In [58]:
df6=df5[~(df5['total_sqft']/df5['bhk']<300)]
df6.shape

(4686, 6)

In [59]:
df6.price_per_sqft.describe()

count     4670.000000
mean      6230.173775
std       3475.398991
min        267.829813
25%       4236.636166
50%       5333.979955
75%       6947.591474
max      37500.000000
Name: price_per_sqft, dtype: float64

In [60]:
df6.head()

Unnamed: 0,location,total_sqft,bath,price,bhk,price_per_sqft
0,Electronic City Phase II,1056.0,2.0,39.07,2,3699.810606
1,other,2600.0,5.0,120.0,4,4615.384615
2,Uttarahalli,1440.0,2.0,62.0,3,4305.555556
3,Lingadheeranahalli,1521.0,3.0,95.0,3,6245.890861
4,Kothanur,1200.0,2.0,51.0,2,4250.0


In [61]:
df6['bhk'].unique()

array([ 2,  4,  3,  1,  8,  6,  5,  7, 11,  9, 10, 16], dtype=int64)

In [62]:
def bhk(b,s):
    if s/b>=30:
        return s

In [63]:
len(df6[df6['bhk']>=10])

3

In [64]:
df6[df6['bhk']>=10]

Unnamed: 0,location,total_sqft,bath,price,bhk,price_per_sqft
459,other,5000.0,9.0,360.0,11,7200.0
3096,other,12000.0,12.0,525.0,10,4375.0
3609,other,10000.0,16.0,550.0,16,5500.0


In [65]:
df7=df6.copy()

In [66]:
df7[~(df7['total_sqft']/df7['bhk']<300) & (df7['bhk']>10)]

Unnamed: 0,location,total_sqft,bath,price,bhk,price_per_sqft
459,other,5000.0,9.0,360.0,11,7200.0
3609,other,10000.0,16.0,550.0,16,5500.0


In [67]:
df8=df7[df7['bhk']<df7['bhk']+2]

In [68]:
len(df8)

4686

In [69]:
df9=df8.drop('price_per_sqft','columns')

  df9=df8.drop('price_per_sqft','columns')


In [70]:
df9.head()

Unnamed: 0,location,total_sqft,bath,price,bhk
0,Electronic City Phase II,1056.0,2.0,39.07,2
1,other,2600.0,5.0,120.0,4
2,Uttarahalli,1440.0,2.0,62.0,3
3,Lingadheeranahalli,1521.0,3.0,95.0,3
4,Kothanur,1200.0,2.0,51.0,2


In [71]:
dummies=pd.get_dummies(df9['location'],'columns',drop_first=True)
dummies.head()

Unnamed: 0,columns_7th Phase JP Nagar,columns_8th Phase JP Nagar,columns_9th Phase JP Nagar,columns_Akshaya Nagar,columns_Ambalipura,columns_Ambedkar Nagar,columns_Anandapura,columns_Anekal,columns_Ardendale,columns_Attibele,...,columns_Varthur,columns_Vidyaranyapura,columns_Vijayanagar,columns_Vittasandra,columns_Whitefield,columns_Yelachenahalli,columns_Yelahanka,columns_Yelahanka New Town,columns_Yeshwanthpur,columns_other
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [72]:
dummies.columns

Index(['columns_7th Phase JP Nagar', 'columns_8th Phase JP Nagar',
       'columns_9th Phase JP Nagar', 'columns_Akshaya Nagar',
       'columns_Ambalipura', 'columns_Ambedkar Nagar', 'columns_Anandapura',
       'columns_Anekal', 'columns_Ardendale', 'columns_Attibele',
       ...
       'columns_Varthur', 'columns_Vidyaranyapura', 'columns_Vijayanagar',
       'columns_Vittasandra', 'columns_Whitefield', 'columns_Yelachenahalli',
       'columns_Yelahanka', 'columns_Yelahanka New Town',
       'columns_Yeshwanthpur', 'columns_other'],
      dtype='object', length=116)

In [73]:
df10=pd.concat([df9,dummies],'columns')

  df10=pd.concat([df9,dummies],'columns')


In [74]:
df10.head()

Unnamed: 0,location,total_sqft,bath,price,bhk,columns_7th Phase JP Nagar,columns_8th Phase JP Nagar,columns_9th Phase JP Nagar,columns_Akshaya Nagar,columns_Ambalipura,...,columns_Varthur,columns_Vidyaranyapura,columns_Vijayanagar,columns_Vittasandra,columns_Whitefield,columns_Yelachenahalli,columns_Yelahanka,columns_Yelahanka New Town,columns_Yeshwanthpur,columns_other
0,Electronic City Phase II,1056.0,2.0,39.07,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,other,2600.0,5.0,120.0,4,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,Uttarahalli,1440.0,2.0,62.0,3,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Lingadheeranahalli,1521.0,3.0,95.0,3,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Kothanur,1200.0,2.0,51.0,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [75]:
df10=df10.drop('location','columns')

  df10=df10.drop('location','columns')


In [76]:
df10.head()

Unnamed: 0,total_sqft,bath,price,bhk,columns_7th Phase JP Nagar,columns_8th Phase JP Nagar,columns_9th Phase JP Nagar,columns_Akshaya Nagar,columns_Ambalipura,columns_Ambedkar Nagar,...,columns_Varthur,columns_Vidyaranyapura,columns_Vijayanagar,columns_Vittasandra,columns_Whitefield,columns_Yelachenahalli,columns_Yelahanka,columns_Yelahanka New Town,columns_Yeshwanthpur,columns_other
0,1056.0,2.0,39.07,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2600.0,5.0,120.0,4,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,1440.0,2.0,62.0,3,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1521.0,3.0,95.0,3,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1200.0,2.0,51.0,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [77]:
df10.shape

(4686, 120)

In [78]:
df10.isnull().sum()

total_sqft                    16
bath                           0
price                          0
bhk                            0
columns_7th Phase JP Nagar     0
                              ..
columns_Yelachenahalli         0
columns_Yelahanka              0
columns_Yelahanka New Town     0
columns_Yeshwanthpur           0
columns_other                  0
Length: 120, dtype: int64

In [79]:
df11=df10.dropna()
df11.head()

Unnamed: 0,total_sqft,bath,price,bhk,columns_7th Phase JP Nagar,columns_8th Phase JP Nagar,columns_9th Phase JP Nagar,columns_Akshaya Nagar,columns_Ambalipura,columns_Ambedkar Nagar,...,columns_Varthur,columns_Vidyaranyapura,columns_Vijayanagar,columns_Vittasandra,columns_Whitefield,columns_Yelachenahalli,columns_Yelahanka,columns_Yelahanka New Town,columns_Yeshwanthpur,columns_other
0,1056.0,2.0,39.07,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2600.0,5.0,120.0,4,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,1440.0,2.0,62.0,3,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1521.0,3.0,95.0,3,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1200.0,2.0,51.0,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [80]:
df11.isna().sum()

total_sqft                    0
bath                          0
price                         0
bhk                           0
columns_7th Phase JP Nagar    0
                             ..
columns_Yelachenahalli        0
columns_Yelahanka             0
columns_Yelahanka New Town    0
columns_Yeshwanthpur          0
columns_other                 0
Length: 120, dtype: int64

In [81]:
X=df11.drop('price','columns')
X.ndim

  X=df11.drop('price','columns')


2

In [82]:
X.head()

Unnamed: 0,total_sqft,bath,bhk,columns_7th Phase JP Nagar,columns_8th Phase JP Nagar,columns_9th Phase JP Nagar,columns_Akshaya Nagar,columns_Ambalipura,columns_Ambedkar Nagar,columns_Anandapura,...,columns_Varthur,columns_Vidyaranyapura,columns_Vijayanagar,columns_Vittasandra,columns_Whitefield,columns_Yelachenahalli,columns_Yelahanka,columns_Yelahanka New Town,columns_Yeshwanthpur,columns_other
0,1056.0,2.0,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2600.0,5.0,4,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,1440.0,2.0,3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1521.0,3.0,3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1200.0,2.0,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [83]:
y=df11.price
y.head()

0     39.07
1    120.00
2     62.00
3     95.00
4     51.00
Name: price, dtype: float64

In [84]:
y.ndim

1

In [85]:
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=0.2, random_state=10)

In [86]:
X_train[:4]

Unnamed: 0,total_sqft,bath,bhk,columns_7th Phase JP Nagar,columns_8th Phase JP Nagar,columns_9th Phase JP Nagar,columns_Akshaya Nagar,columns_Ambalipura,columns_Ambedkar Nagar,columns_Anandapura,...,columns_Varthur,columns_Vidyaranyapura,columns_Vijayanagar,columns_Vittasandra,columns_Whitefield,columns_Yelachenahalli,columns_Yelahanka,columns_Yelahanka New Town,columns_Yeshwanthpur,columns_other
3765,1090.0,2.0,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4604,416.0,1.0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2013,1980.0,3.0,3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3568,3259.0,3.0,3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [87]:
scaler_X=StandardScaler()
scaler_y=StandardScaler()

In [88]:
X_train=scaler_X.fit_transform(X_train)
X_test=scaler_X.transform(X_test)

In [89]:
y_train=y_train.values.reshape(len(y_train),1)
y_test=y_test.values.reshape(len(y_test),1)

In [90]:
y_train=scaler_y.fit_transform(y_train)

In [91]:
y_test=scaler_y.transform(y_test)

In [92]:
X_train[:3]

array([[-0.44811069, -0.51542582, -0.66234426, -0.11764307, -0.06133041,
        -0.05180582, -0.07518483, -0.04632411, -0.0543417 , -0.0401071 ,
        -0.04632411, -0.05909152, -0.05676567, -0.04914072, -0.05909152,
        -0.0714958 , -0.10270878, -0.05676567, -0.08689787, -0.07518483,
        -0.05676567, -0.04632411, -0.07696449, -0.04632411, -0.05909152,
        -0.05909152, -0.06133041, -0.04914072, -0.09583436, -0.05180582,
        -0.05180582, -0.0543417 , -0.05676567, -0.05676567,  6.32889762,
        -0.0914717 , -0.07870484, -0.0543417 , -0.06558258, -0.05180582,
        -0.05180582, -0.11287418, -0.09294788, -0.12883571, -0.05180582,
        -0.0543417 , -0.04632411, -0.10662921, -0.08371431, -0.04914072,
        -0.05180582, -0.06761007, -0.0714958 , -0.04914072, -0.06349153,
        -0.06957954, -0.05180582, -0.06349153, -0.06957954, -0.05676567,
        -0.06761007, -0.07336301, -0.0543417 , -0.07336301, -0.05180582,
        -0.04914072, -0.0543417 , -0.0714958 , -0.1

In [93]:
model_params={
    'Linear Regression':{
        'model':LinearRegression(),
        'params':{
            'n_jobs':[1,5,10]
        }
    },
    
    'Decision Tree':{
        'model':DecisionTreeRegressor(),
        'params':{
            'criterion':['squared_error','friedman_mse','absolute_error','poisson'],
            'splitter':['best','random']  
        }
    },
    'Random Forest':{
        'model': RandomForestRegressor(),
        'params':{
            'n_estimators':[10,50,100],
            'criterion':['squared_error', 'absolute_error','poisson'],
            'max_features':['sqrt' ,'log2']
        }
    },
    'SVR':{
        'model':SVR(),
        'params':{
            'kernel':['linear','poly', 'rbf', 'sigmoid'],
            'gamma':['scale','auto'],
            'C':[1,10,20]
        }
    }
}

In [94]:
scores=[]
for model_name,model_para in model_params.items():
    regressor=GridSearchCV(model_para['model'],model_para['params'],cv=5,return_train_score=False)
    regressor.fit(X_train,y_train)
    scores.append({
        'model_name':model_name,
        'best_parameter':regressor.best_params_,
        'best_score':regressor.best_score_
    })

Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 1252, in fit
    super().fit(
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 351, in fit
    criterion = CRITERIA_REG[self.criterion](self.n_outputs_,
KeyError: 'squared_error'

Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 1252, in fit
    super().fit(
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 351, in fit
    criterion = CRITERIA_REG[self.criterion](self.n_outputs_,
KeyError: 'squared_erro

Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backen

  estimator.fit(X_train, y_train, **fit_params)
Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\Admin\ana

  estimator.fit(X_train, y_train, **fit_params)
Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\Admin\ana

Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backen

  estimator.fit(X_train, y_train, **fit_params)
Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\Admin\ana

  estimator.fit(X_train, y_train, **fit_params)
Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\Admin\ana

Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backen

Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backen

Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backen

Traceback (most recent call last):
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 598, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Admin\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 1043, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\parallel.py", line 779, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\Admin\anaconda3\lib\site-packages\joblib\_parallel_backen

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

        nan        nan        nan        nan        nan        nan
 0.06404483 0.03208547 0.03798003 0.03227207 0.01881326 0.02067837]
  self.best_estimator_.fit(X, y, **fit_params)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  retu

  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)
  return f(*args, **kwargs)


In [95]:
print(scores)

[{'model_name': 'Linear Regression', 'best_parameter': {'n_jobs': 1}, 'best_score': 0.3682920205595125}, {'model_name': 'Decision Tree', 'best_parameter': {'criterion': 'friedman_mse', 'splitter': 'random'}, 'best_score': 0.3990688694729413}, {'model_name': 'Random Forest', 'best_parameter': {'criterion': 'poisson', 'max_features': 'sqrt', 'n_estimators': 10}, 'best_score': 0.06404482629634112}, {'model_name': 'SVR', 'best_parameter': {'C': 20, 'gamma': 'auto', 'kernel': 'rbf'}, 'best_score': 0.6182505779909684}]


In [96]:
data_fr=pd.DataFrame(scores,columns=['model_name','best_parameter','best_score'])
data_fr

Unnamed: 0,model_name,best_parameter,best_score
0,Linear Regression,{'n_jobs': 1},0.368292
1,Decision Tree,"{'criterion': 'friedman_mse', 'splitter': 'ran...",0.399069
2,Random Forest,"{'criterion': 'poisson', 'max_features': 'sqrt...",0.064045
3,SVR,"{'C': 20, 'gamma': 'auto', 'kernel': 'rbf'}",0.618251


In [97]:
'''
from sklearn.linear_model import LinearRegression
lin_model=LinearRegression()
lin_model.fit(X_train, y_train)
lin_model.score(X_test,y_test)'''

'\nfrom sklearn.linear_model import LinearRegression\nlin_model=LinearRegression()\nlin_model.fit(X_train, y_train)\nlin_model.score(X_test,y_test)'

In [98]:
'''from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

cv=ShuffleSplit(n_splits=5, random_state=0, test_size=0.2)
cross_val_score(LinearRegression(), X, y, cv=cv)'''

'from sklearn.model_selection import ShuffleSplit\nfrom sklearn.model_selection import cross_val_score\n\ncv=ShuffleSplit(n_splits=5, random_state=0, test_size=0.2)\ncross_val_score(LinearRegression(), X, y, cv=cv)'