### Import Data

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import os

In [3]:
os.chdir('C:/Users/ASUS/Documents/Datasets')

In [4]:
df = pd.read_csv('training_set.csv')
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


### Missing Value

In [5]:
df.isna().sum()

Id                 0
MSSubClass         0
MSZoning           0
LotFrontage      259
LotArea            0
                ... 
MoSold             0
YrSold             0
SaleType           0
SaleCondition      0
SalePrice          0
Length: 81, dtype: int64

In [6]:
m = df.isna().sum()
m[m>0]

LotFrontage      259
Alley           1369
MasVnrType       872
MasVnrArea         8
BsmtQual          37
BsmtCond          37
BsmtExposure      38
BsmtFinType1      37
BsmtFinType2      38
Electrical         1
FireplaceQu      690
GarageType        81
GarageYrBlt       81
GarageFinish      81
GarageQual        81
GarageCond        81
PoolQC          1453
Fence           1179
MiscFeature     1406
dtype: int64

### Define Target 

In [7]:
# independent variable
x = df.drop(['SalePrice'], axis = 1)

# dependent variable 
y = df['SalePrice']

In [8]:
x.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,0,,,,0,2,2008,WD,Normal
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,0,,,,0,5,2007,WD,Normal
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,9,2008,WD,Normal
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,2,2006,WD,Abnorml
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,12,2008,WD,Normal


In [9]:
y.head()

0    208500
1    181500
2    223500
3    140000
4    250000
Name: SalePrice, dtype: int64

### Separate Data in Continuous and Categorical

In [10]:
cat=[]
con=[]

for i in x.columns:
    if x[i].dtypes=='object':
        cat.append(i)
    else:
        con.append(i)

In [11]:
cat

['MSZoning',
 'Street',
 'Alley',
 'LotShape',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood',
 'Condition1',
 'Condition2',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'Exterior2nd',
 'MasVnrType',
 'ExterQual',
 'ExterCond',
 'Foundation',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'Heating',
 'HeatingQC',
 'CentralAir',
 'Electrical',
 'KitchenQual',
 'Functional',
 'FireplaceQu',
 'GarageType',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PavedDrive',
 'PoolQC',
 'Fence',
 'MiscFeature',
 'SaleType',
 'SaleCondition']

In [12]:
con

['Id',
 'MSSubClass',
 'LotFrontage',
 'LotArea',
 'OverallQual',
 'OverallCond',
 'YearBuilt',
 'YearRemodAdd',
 'MasVnrArea',
 'BsmtFinSF1',
 'BsmtFinSF2',
 'BsmtUnfSF',
 'TotalBsmtSF',
 '1stFlrSF',
 '2ndFlrSF',
 'LowQualFinSF',
 'GrLivArea',
 'BsmtFullBath',
 'BsmtHalfBath',
 'FullBath',
 'HalfBath',
 'BedroomAbvGr',
 'KitchenAbvGr',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageYrBlt',
 'GarageCars',
 'GarageArea',
 'WoodDeckSF',
 'OpenPorchSF',
 'EnclosedPorch',
 '3SsnPorch',
 'ScreenPorch',
 'PoolArea',
 'MiscVal',
 'MoSold',
 'YrSold']

# Preprocessing

In [13]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.compose import ColumnTransformer

### Numerical Pipeline

In [14]:
num_pipe=Pipeline(steps=[('impute',SimpleImputer(strategy='mean')),('scaler',StandardScaler())])
num_pipe

### Categorical Pipeline

In [15]:
cat_pipe=Pipeline(steps=[('impute',SimpleImputer(strategy='constant',fill_value='not_available')),('encode',OrdinalEncoder())])
cat_pipe

### Column Transformer

In [16]:
pre=ColumnTransformer([('num_pipe',num_pipe,con),('cat_pipe',cat_pipe,cat)])
pre

### Fit Transform

In [17]:
x1=pd.DataFrame(pre.fit_transform(x),columns=pre.get_feature_names_out())
x1.head()

Unnamed: 0,num_pipe__Id,num_pipe__MSSubClass,num_pipe__LotFrontage,num_pipe__LotArea,num_pipe__OverallQual,num_pipe__OverallCond,num_pipe__YearBuilt,num_pipe__YearRemodAdd,num_pipe__MasVnrArea,num_pipe__BsmtFinSF1,...,cat_pipe__GarageType,cat_pipe__GarageFinish,cat_pipe__GarageQual,cat_pipe__GarageCond,cat_pipe__PavedDrive,cat_pipe__PoolQC,cat_pipe__Fence,cat_pipe__MiscFeature,cat_pipe__SaleType,cat_pipe__SaleCondition
0,-1.730865,0.073375,-0.229372,-0.207142,0.651479,-0.5172,1.050994,0.878668,0.511418,0.575425,...,1.0,1.0,4.0,4.0,2.0,3.0,4.0,4.0,8.0,4.0
1,-1.728492,-0.872563,0.451936,-0.091886,-0.071836,2.179628,0.156734,-0.429577,-0.57441,1.171992,...,1.0,1.0,4.0,4.0,2.0,3.0,4.0,4.0,8.0,4.0
2,-1.72612,0.073375,-0.09311,0.07348,0.651479,-0.5172,0.984752,0.830215,0.32306,0.092907,...,1.0,1.0,4.0,4.0,2.0,3.0,4.0,4.0,8.0,4.0
3,-1.723747,0.309859,-0.456474,-0.096897,0.651479,-0.5172,-1.863632,-0.720298,-0.57441,-0.499274,...,5.0,2.0,4.0,4.0,2.0,3.0,4.0,4.0,8.0,0.0
4,-1.721374,0.073375,0.633618,0.375148,1.374795,-0.5172,0.951632,0.733308,1.36457,0.463568,...,1.0,1.0,4.0,4.0,2.0,3.0,4.0,4.0,8.0,4.0


## Feature Selection

In [18]:
from sklearn.feature_selection import SequentialFeatureSelector 
from sklearn.linear_model import LinearRegression

In [19]:
lr = LinearRegression()

In [20]:
sfs = SequentialFeatureSelector(estimator=lr, n_features_to_select = 40)
sfs.fit(x1, y)

In [21]:
cols = sfs.get_feature_names_out()
cols

array(['num_pipe__Id', 'num_pipe__MSSubClass', 'num_pipe__LotArea',
       'num_pipe__OverallQual', 'num_pipe__OverallCond',
       'num_pipe__YearBuilt', 'num_pipe__MasVnrArea',
       'num_pipe__BsmtFinSF1', 'num_pipe__LowQualFinSF',
       'num_pipe__GrLivArea', 'num_pipe__BsmtFullBath',
       'num_pipe__KitchenAbvGr', 'num_pipe__TotRmsAbvGrd',
       'num_pipe__Fireplaces', 'num_pipe__GarageCars',
       'num_pipe__WoodDeckSF', 'num_pipe__ScreenPorch',
       'num_pipe__PoolArea', 'num_pipe__YrSold', 'cat_pipe__Alley',
       'cat_pipe__LandContour', 'cat_pipe__Utilities',
       'cat_pipe__Neighborhood', 'cat_pipe__BldgType',
       'cat_pipe__HouseStyle', 'cat_pipe__RoofStyle',
       'cat_pipe__RoofMatl', 'cat_pipe__Exterior1st',
       'cat_pipe__MasVnrType', 'cat_pipe__ExterQual',
       'cat_pipe__BsmtQual', 'cat_pipe__BsmtCond',
       'cat_pipe__BsmtExposure', 'cat_pipe__HeatingQC',
       'cat_pipe__KitchenQual', 'cat_pipe__Functional',
       'cat_pipe__FireplaceQu', 'ca

In [22]:
x2 = pd.DataFrame(sfs.fit_transform(x1, y), columns=cols)
x2

Unnamed: 0,num_pipe__Id,num_pipe__MSSubClass,num_pipe__LotArea,num_pipe__OverallQual,num_pipe__OverallCond,num_pipe__YearBuilt,num_pipe__MasVnrArea,num_pipe__BsmtFinSF1,num_pipe__LowQualFinSF,num_pipe__GrLivArea,...,cat_pipe__BsmtQual,cat_pipe__BsmtCond,cat_pipe__BsmtExposure,cat_pipe__HeatingQC,cat_pipe__KitchenQual,cat_pipe__Functional,cat_pipe__FireplaceQu,cat_pipe__GarageCond,cat_pipe__Fence,cat_pipe__MiscFeature
0,-1.730865,0.073375,-0.207142,0.651479,-0.517200,1.050994,0.511418,0.575425,-0.120242,0.370333,...,2.0,3.0,3.0,0.0,2.0,6.0,5.0,4.0,4.0,4.0
1,-1.728492,-0.872563,-0.091886,-0.071836,2.179628,0.156734,-0.574410,1.171992,-0.120242,-0.482512,...,2.0,3.0,1.0,0.0,3.0,6.0,4.0,4.0,4.0,4.0
2,-1.726120,0.073375,0.073480,0.651479,-0.517200,0.984752,0.323060,0.092907,-0.120242,0.515013,...,2.0,3.0,2.0,0.0,2.0,6.0,4.0,4.0,4.0,4.0
3,-1.723747,0.309859,-0.096897,0.651479,-0.517200,-1.863632,-0.574410,-0.499274,-0.120242,0.383659,...,3.0,1.0,3.0,2.0,2.0,6.0,2.0,4.0,4.0,4.0
4,-1.721374,0.073375,0.375148,1.374795,-0.517200,0.951632,1.364570,0.463568,-0.120242,1.299326,...,2.0,3.0,0.0,0.0,2.0,6.0,4.0,4.0,4.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1.721374,0.073375,-0.260560,-0.071836,-0.517200,0.918511,-0.574410,-0.973018,-0.120242,0.250402,...,2.0,3.0,3.0,0.0,3.0,6.0,4.0,4.0,4.0,4.0
1456,1.723747,-0.872563,0.266407,-0.071836,0.381743,0.222975,0.084843,0.759659,-0.120242,1.061367,...,2.0,3.0,3.0,4.0,3.0,2.0,4.0,4.0,2.0,4.0
1457,1.726120,0.309859,-0.147810,0.651479,3.078570,-1.002492,-0.574410,-0.369871,-0.120242,1.569647,...,3.0,1.0,3.0,0.0,2.0,6.0,2.0,4.0,0.0,2.0
1458,1.728492,-0.872563,-0.080160,-0.795151,0.381743,-0.704406,-0.574410,-0.865548,-0.120242,-0.832788,...,3.0,3.0,2.0,2.0,2.0,6.0,5.0,4.0,4.0,4.0


### Modifying Column names

In [23]:
imp_cols=[]

for i in cols:
    s=i.split('__')[1]
    imp_cols.append(s)

In [24]:
'num_pipe__Id'.split('__')[1]

'Id'

In [25]:
imp_cols

['Id',
 'MSSubClass',
 'LotArea',
 'OverallQual',
 'OverallCond',
 'YearBuilt',
 'MasVnrArea',
 'BsmtFinSF1',
 'LowQualFinSF',
 'GrLivArea',
 'BsmtFullBath',
 'KitchenAbvGr',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageCars',
 'WoodDeckSF',
 'ScreenPorch',
 'PoolArea',
 'YrSold',
 'Alley',
 'LandContour',
 'Utilities',
 'Neighborhood',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'MasVnrType',
 'ExterQual',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'HeatingQC',
 'KitchenQual',
 'Functional',
 'FireplaceQu',
 'GarageCond',
 'Fence',
 'MiscFeature']

In [26]:
x3 = pd.DataFrame(sfs.fit_transform(x1, y), columns=imp_cols)
x3

Unnamed: 0,Id,MSSubClass,LotArea,OverallQual,OverallCond,YearBuilt,MasVnrArea,BsmtFinSF1,LowQualFinSF,GrLivArea,...,BsmtQual,BsmtCond,BsmtExposure,HeatingQC,KitchenQual,Functional,FireplaceQu,GarageCond,Fence,MiscFeature
0,-1.730865,0.073375,-0.207142,0.651479,-0.517200,1.050994,0.511418,0.575425,-0.120242,0.370333,...,2.0,3.0,3.0,0.0,2.0,6.0,5.0,4.0,4.0,4.0
1,-1.728492,-0.872563,-0.091886,-0.071836,2.179628,0.156734,-0.574410,1.171992,-0.120242,-0.482512,...,2.0,3.0,1.0,0.0,3.0,6.0,4.0,4.0,4.0,4.0
2,-1.726120,0.073375,0.073480,0.651479,-0.517200,0.984752,0.323060,0.092907,-0.120242,0.515013,...,2.0,3.0,2.0,0.0,2.0,6.0,4.0,4.0,4.0,4.0
3,-1.723747,0.309859,-0.096897,0.651479,-0.517200,-1.863632,-0.574410,-0.499274,-0.120242,0.383659,...,3.0,1.0,3.0,2.0,2.0,6.0,2.0,4.0,4.0,4.0
4,-1.721374,0.073375,0.375148,1.374795,-0.517200,0.951632,1.364570,0.463568,-0.120242,1.299326,...,2.0,3.0,0.0,0.0,2.0,6.0,4.0,4.0,4.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1.721374,0.073375,-0.260560,-0.071836,-0.517200,0.918511,-0.574410,-0.973018,-0.120242,0.250402,...,2.0,3.0,3.0,0.0,3.0,6.0,4.0,4.0,4.0,4.0
1456,1.723747,-0.872563,0.266407,-0.071836,0.381743,0.222975,0.084843,0.759659,-0.120242,1.061367,...,2.0,3.0,3.0,4.0,3.0,2.0,4.0,4.0,2.0,4.0
1457,1.726120,0.309859,-0.147810,0.651479,3.078570,-1.002492,-0.574410,-0.369871,-0.120242,1.569647,...,3.0,1.0,3.0,0.0,2.0,6.0,2.0,4.0,0.0,2.0
1458,1.728492,-0.872563,-0.080160,-0.795151,0.381743,-0.704406,-0.574410,-0.865548,-0.120242,-0.832788,...,3.0,3.0,2.0,2.0,2.0,6.0,5.0,4.0,4.0,4.0


## Split Data

In [27]:
from sklearn.model_selection import train_test_split

In [28]:
x_train, x_test, y_train, y_test = train_test_split(x3, y, test_size=0.2, random_state=42)

In [29]:
x_train

Unnamed: 0,Id,MSSubClass,LotArea,OverallQual,OverallCond,YearBuilt,MasVnrArea,BsmtFinSF1,LowQualFinSF,GrLivArea,...,BsmtQual,BsmtCond,BsmtExposure,HeatingQC,KitchenQual,Functional,FireplaceQu,GarageCond,Fence,MiscFeature
254,-1.128206,-0.872563,-0.212153,-0.795151,0.381743,-0.472560,-0.574410,1.049169,-0.120242,-0.383521,...,3.0,3.0,3.0,4.0,3.0,6.0,5.0,4.0,4.0,4.0
1066,0.798404,0.073375,-0.268578,-0.071836,1.280685,0.719786,-0.574410,-0.973018,-0.120242,0.105723,...,2.0,3.0,3.0,2.0,3.0,6.0,4.0,4.0,4.0,4.0
638,-0.217100,-0.636078,-0.174369,-0.795151,1.280685,-2.029235,-0.574410,-0.973018,-0.120242,-1.369623,...,1.0,3.0,3.0,2.0,3.0,6.0,5.0,5.0,2.0,4.0
799,0.164901,-0.163109,-0.332419,-0.795151,1.280685,-1.134975,0.821655,0.274948,-0.120242,0.480746,...,2.0,3.0,3.0,0.0,2.0,6.0,4.0,4.0,2.0,4.0
380,-0.829249,-0.163109,-0.552908,-0.795151,0.381743,-1.565545,-0.574410,-0.494887,-0.120242,0.334164,...,3.0,3.0,3.0,4.0,2.0,6.0,2.0,4.0,4.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1095,0.867212,-0.872563,-0.120249,-0.071836,-0.517200,1.150356,-0.574410,-0.920380,-0.120242,-0.383521,...,2.0,3.0,3.0,2.0,2.0,6.0,2.0,4.0,4.0,4.0
1130,0.950255,-0.163109,-0.271885,-1.518467,-2.315085,-1.433062,-0.574410,0.391191,-0.120242,0.886229,...,3.0,3.0,3.0,4.0,2.0,3.0,4.0,4.0,2.0,4.0
1294,1.339374,-0.872563,-0.235003,-0.795151,1.280685,-0.538802,-0.574410,-0.606743,-0.120242,-1.240174,...,3.0,3.0,3.0,4.0,3.0,6.0,5.0,4.0,4.0,4.0
860,0.309634,-0.163109,-0.288121,0.651479,2.179628,-1.764269,-0.574410,-0.973018,-0.120242,-0.170310,...,3.0,3.0,3.0,2.0,2.0,6.0,2.0,4.0,0.0,4.0


In [30]:
y_train

254     145000
1066    178000
638      85000
799     175000
380     127000
         ...  
1095    176432
1130    135000
1294    115000
860     189950
1126    174000
Name: SalePrice, Length: 1168, dtype: int64

## Model Building

In [31]:
from sklearn.linear_model import Lasso,Ridge

In [32]:
la = Lasso()
ri = Ridge()

## Evaluation

In [33]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

## Lasso

In [34]:
la.fit(x_train, y_train)

### Training Data

In [35]:
y_pred_train = la.predict(x_train)

In [36]:
mse=mean_squared_error(y_pred_train,y_train)
mae=mean_absolute_error(y_pred_train,y_train)
r2_score1=r2_score(y_pred_train,y_train)
rmse=mse**0.5

In [37]:
print("Mean_Squared_Error",mse)
print("Mean Absolute Error",mae)
print("Root Mean Squared Error",rmse)
print("R2 Score",r2_score1)

Mean_Squared_Error 957061178.9735227
Mean Absolute Error 18351.662800392143
Root Mean Squared Error 30936.40539838982
R2 Score 0.8088574676459821


### Testing Data

In [38]:
y_pred_test = la.predict(x_test)

In [39]:
mse=mean_squared_error(y_pred_test,y_test)
mae=mean_absolute_error(y_pred_test,y_test)
r2_score1=r2_score(y_pred_test,y_test)
rmse=mse**0.5

In [40]:
print("Mean_Squared_Error",mse)
print("Mean Absolute Error",mae)
print("Root Mean Squared Error",rmse)
print("R2 Score",r2_score1)

Mean_Squared_Error 1102237831.7543747
Mean Absolute Error 20985.585775712523
Root Mean Squared Error 33199.96734568236
R2 Score 0.802399681764362


### Ridge

In [41]:
ri.fit(x_train, y_train)

### Training Data

In [42]:
y_pred_train = ri.predict(x_train)

In [43]:
mse=mean_squared_error(y_pred_train,y_train)
mae=mean_absolute_error(y_pred_train,y_train)
r2_score1=r2_score(y_pred_train,y_train)
rmse=mse**0.5

In [44]:
print("Mean_Squared_Error",mse)
print("Mean Absolute Error",mae)
print("Root Mean Squared Error",rmse)
print("R2 Score",r2_score1)

Mean_Squared_Error 957619142.4758961
Mean Absolute Error 18368.69785132433
Root Mean Squared Error 30945.421995440556
R2 Score 0.8086035487167696


### Testing Data

In [45]:
y_pred_test = ri.predict(x_test)

In [46]:
mse=mean_squared_error(y_pred_test,y_test)
mae=mean_absolute_error(y_pred_test,y_test)
rmse=mse**0.5
r2_score1=r2_score(y_pred_test,y_test)

In [47]:
print("Mean_Squared_Error",mse)
print("Mean Absolute Error",mae)
print("Root Mean Squared Error",rmse)
print("R2 Score",r2_score1)

Mean_Squared_Error 1102730327.6445007
Mean Absolute Error 20993.248731735814
Root Mean Squared Error 33207.38363142301
R2 Score 0.8022850403360963


## Hyperparameter Tuning

In [48]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [49]:
grid={'alpha': np.arange(0,2,0.1)}
grid

{'alpha': array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. , 1.1, 1.2,
        1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9])}

### GridSearchCV

In [50]:
gs1 = GridSearchCV(la, param_grid = grid, cv=3)

In [51]:
gs1.fit(x_train, y_train)

In [52]:
gs1.best_params_

{'alpha': 1.9000000000000001}

In [53]:
gs1.best_estimator_

### Training Data

In [54]:
y_pred_train = gs1.predict(x_train)

In [55]:
mse=mean_squared_error(y_pred_train,y_train)
mae=mean_absolute_error(y_pred_train,y_train)
rmse=mse**0.5
r2_score_train=r2_score(y_pred_train,y_train)

In [56]:
print("Mean Squared Error=",mse)
print("Mean Absolute Error=",mae)
print("Root Mean Squared Error=",rmse)
print("R2_Score=",r2_score_train)

Mean Squared Error= 957064495.5056213
Mean Absolute Error= 18351.983405386207
Root Mean Squared Error= 30936.459000758656
R2_Score= 0.8088422419622824


### Testing data

In [57]:
y_pred_test = gs1.predict(x_test)

In [58]:
mse=mean_squared_error(y_pred_test,y_test)
mae=mean_absolute_error(y_pred_test,y_test)
rmse=mse**0.5
r2_score_train=r2_score(y_pred_test,y_test)

In [59]:
print("Mean Squared Error=",mse)
print("Mean Absolute Error=",mae)
print("Root Mean Squared Error=",rmse)
print("R2_Score=",r2_score_train)

Mean Squared Error= 1102286603.8299327
Mean Absolute Error= 20985.189311028113
Root Mean Squared Error= 33200.70185748989
R2_Score= 0.8023785136886261


In [60]:
gs2 = GridSearchCV(ri, param_grid = grid, cv=3)

In [61]:
gs2.fit(x_train, y_train)

In [62]:
gs2.best_params_

{'alpha': 1.9000000000000001}

In [63]:
gs2.best_estimator_

### Training data

In [64]:
y_pred_train = gs2.predict(x_train)

In [65]:
mse=mean_squared_error(y_pred_train,y_train)
mae=mean_absolute_error(y_pred_train,y_train)
rmse=mse**0.5
r2_score_train=r2_score(y_pred_train,y_train)

In [66]:
print("Mean Squared Error=",mse)
print("Mean Absolute Error=",mae)
print("Root Mean Squared Error=",rmse)
print("R2_Score=",r2_score_train)

Mean Squared Error= 958009435.6804216
Mean Absolute Error= 18373.083224246533
Root Mean Squared Error= 30951.727507207437
R2_Score= 0.8084279667524148


### Testing Data

In [67]:
y_pred_test = gs2.predict(x_test)

In [68]:
mse=mean_squared_error(y_pred_test,y_test)
mae=mean_absolute_error(y_pred_test,y_test)
rmse=mse**0.5
r2_score_train=r2_score(y_pred_test,y_test)

In [69]:
print("Mean Squared Error=",mse)
print("Mean Absolute Error=",mae)
print("Root Mean Squared Error=",rmse)
print("R2_Score=",r2_score_train)

Mean Squared Error= 1103065999.3502295
Mean Absolute Error= 20994.570292540386
Root Mean Squared Error= 33212.437419590715
R2_Score= 0.802152908500331
