Problem Statement
It is your job to predict the sales price for each house. For each Id in the test set, you must predict the value of the SalePrice variable.



Importing all the necessary libraries

In [1]:
# To avoid non-harmful warnings
from warnings import filterwarnings

filterwarnings("ignore")

# Read the file, numpy library
import pandas as pd
import numpy as np

# Data Preprocessing and Data Cleaning
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder, StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Feature Selection
from sklearn.feature_selection import SequentialFeatureSelector

# Train Test Split
from sklearn.model_selection import train_test_split, GridSearchCV

# Models required
from sklearn.linear_model import LinearRegression, Ridge, Lasso

# Model evaluation
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error


Read the Dataset

In [2]:
path = r"https://raw.githubusercontent.com/Sindhura-tr/Datasets/refs/heads/main/training_set.csv"
df = pd.read_csv(path)
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


Perform the basic data quality checks

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [4]:
df.shape

(1460, 81)

In [5]:
# Missing values
s = df.isna().sum()
s[s > 0]

LotFrontage      259
Alley           1369
MasVnrType       872
MasVnrArea         8
BsmtQual          37
BsmtCond          37
BsmtExposure      38
BsmtFinType1      37
BsmtFinType2      38
Electrical         1
FireplaceQu      690
GarageType        81
GarageYrBlt       81
GarageFinish      81
GarageQual        81
GarageCond        81
PoolQC          1453
Fence           1179
MiscFeature     1406
dtype: int64

In [6]:
1406 / 1460

0.963013698630137

In [7]:
cnt = df["Id"].count()

In [8]:
# find the columns/features which have missing data greater than 50%
b = (s / cnt) * 100
b[b > 50]

Alley          93.767123
MasVnrType     59.726027
PoolQC         99.520548
Fence          80.753425
MiscFeature    96.301370
dtype: float64

There are 5features who have missing data more then 50%. Dropping the features would be better way of handling this

In [9]:
b[b > 50].index

Index(['Alley', 'MasVnrType', 'PoolQC', 'Fence', 'MiscFeature'], dtype='object')

In [10]:
# convert the above index details into a list
features_tobe_removed = list(b[b > 50].index)
features_tobe_removed

['Alley', 'MasVnrType', 'PoolQC', 'Fence', 'MiscFeature']

In [11]:
df = df.drop(columns=features_tobe_removed)
df.shape

(1460, 76)

In [12]:
# Check for duplicated data
df.duplicated().sum()

np.int64(0)

Separate X and Y features
Y: SalePrice

In [13]:
df.columns

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope',
       'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle',
       'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle',
       'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrArea', 'ExterQual',
       'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure',
       'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF',
       'TotalBsmtSF', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical',
       '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath',
       'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr',
       'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces',
       'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish',
       'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond', 'PavedDrive',
       'WoodDeckSF', 'OpenP

In [14]:
X = df.drop(columns=["Id", "SalePrice"])
Y = df[["SalePrice"]]

In [15]:
X.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,...,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,60,RL,65.0,8450,Pave,Reg,Lvl,AllPub,Inside,Gtl,...,61,0,0,0,0,0,2,2008,WD,Normal
1,20,RL,80.0,9600,Pave,Reg,Lvl,AllPub,FR2,Gtl,...,0,0,0,0,0,0,5,2007,WD,Normal
2,60,RL,68.0,11250,Pave,IR1,Lvl,AllPub,Inside,Gtl,...,42,0,0,0,0,0,9,2008,WD,Normal
3,70,RL,60.0,9550,Pave,IR1,Lvl,AllPub,Corner,Gtl,...,35,272,0,0,0,0,2,2006,WD,Abnorml
4,60,RL,84.0,14260,Pave,IR1,Lvl,AllPub,FR2,Gtl,...,84,0,0,0,0,0,12,2008,WD,Normal


In [16]:
Y.head()

Unnamed: 0,SalePrice
0,208500
1,181500
2,223500
3,140000
4,250000


Feature Engineering

Feature Selection

Data Preprocessing
Data Cleaning

In [17]:
cat = list(X.columns[X.dtypes == "object"])
con = list(X.columns[X.dtypes != "object"])

In [18]:
print(cat)

['MSZoning', 'Street', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'SaleType', 'SaleCondition']


In [19]:
print(con)

['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold']


In [20]:
cat_pipe = make_pipeline(
    SimpleImputer(strategy="constant", fill_value="Not Available"), OrdinalEncoder()
)

In [22]:
con_pipe = make_pipeline(SimpleImputer(strategy="mean"), StandardScaler())

In [23]:

pre = ColumnTransformer([("cat", cat_pipe, cat), ("con", con_pipe, con)]).set_output(
    transform="pandas"
)

In [24]:
pre

In [25]:
X_pre = pre.fit_transform(X)
X_pre.head()

Unnamed: 0,cat__MSZoning,cat__Street,cat__LotShape,cat__LandContour,cat__Utilities,cat__LotConfig,cat__LandSlope,cat__Neighborhood,cat__Condition1,cat__Condition2,...,con__GarageArea,con__WoodDeckSF,con__OpenPorchSF,con__EnclosedPorch,con__3SsnPorch,con__ScreenPorch,con__PoolArea,con__MiscVal,con__MoSold,con__YrSold
0,3.0,1.0,3.0,3.0,0.0,4.0,0.0,5.0,2.0,2.0,...,0.351,-0.752176,0.216503,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,-1.599111,0.138777
1,3.0,1.0,3.0,3.0,0.0,2.0,0.0,24.0,1.0,2.0,...,-0.060731,1.626195,-0.704483,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,-0.48911,-0.614439
2,3.0,1.0,0.0,3.0,0.0,4.0,0.0,5.0,2.0,2.0,...,0.631726,-0.752176,-0.070361,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,0.990891,0.138777
3,3.0,1.0,0.0,3.0,0.0,0.0,0.0,6.0,2.0,2.0,...,0.790804,-0.752176,-0.176048,4.092524,-0.116339,-0.270208,-0.068692,-0.087688,-1.599111,-1.367655
4,3.0,1.0,0.0,3.0,0.0,2.0,0.0,15.0,2.0,2.0,...,1.698485,0.780197,0.56376,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,2.100892,0.138777


In [26]:
model = LinearRegression()
sel = SequentialFeatureSelector(model, direction="forward", n_features_to_select="auto")
sel.fit(X_pre, Y)


In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

In [27]:
imp_cols = sel.get_feature_names_out()
imp_cols

array(['cat__Street', 'cat__LandContour', 'cat__Utilities',
       'cat__Neighborhood', 'cat__BldgType', 'cat__HouseStyle',
       'cat__RoofStyle', 'cat__RoofMatl', 'cat__Exterior1st',
       'cat__ExterQual', 'cat__BsmtQual', 'cat__BsmtCond',
       'cat__BsmtExposure', 'cat__HeatingQC', 'cat__Electrical',
       'cat__KitchenQual', 'cat__Functional', 'cat__FireplaceQu',
       'cat__GarageCond', 'cat__PavedDrive', 'con__MSSubClass',
       'con__LotArea', 'con__OverallQual', 'con__OverallCond',
       'con__YearBuilt', 'con__MasVnrArea', 'con__BsmtFinSF1',
       'con__GrLivArea', 'con__BsmtFullBath', 'con__KitchenAbvGr',
       'con__TotRmsAbvGrd', 'con__Fireplaces', 'con__GarageCars',
       'con__WoodDeckSF', 'con__ScreenPorch', 'con__PoolArea',
       'con__YrSold'], dtype=object)

In [28]:
imp_cols[0].split("_")[-1]

'Street'

In [29]:
sel_cols = [col.split("_")[-1] for col in imp_cols]
sel_cols

['Street',
 'LandContour',
 'Utilities',
 'Neighborhood',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'ExterQual',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'HeatingQC',
 'Electrical',
 'KitchenQual',
 'Functional',
 'FireplaceQu',
 'GarageCond',
 'PavedDrive',
 'MSSubClass',
 'LotArea',
 'OverallQual',
 'OverallCond',
 'YearBuilt',
 'MasVnrArea',
 'BsmtFinSF1',
 'GrLivArea',
 'BsmtFullBath',
 'KitchenAbvGr',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageCars',
 'WoodDeckSF',
 'ScreenPorch',
 'PoolArea',
 'YrSold']

In [30]:
X_sel = X[sel_cols]
X_sel.head()

Unnamed: 0,Street,LandContour,Utilities,Neighborhood,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,ExterQual,...,GrLivArea,BsmtFullBath,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageCars,WoodDeckSF,ScreenPorch,PoolArea,YrSold
0,Pave,Lvl,AllPub,CollgCr,1Fam,2Story,Gable,CompShg,VinylSd,Gd,...,1710,1,1,8,0,2,0,0,0,2008
1,Pave,Lvl,AllPub,Veenker,1Fam,1Story,Gable,CompShg,MetalSd,TA,...,1262,0,1,6,1,2,298,0,0,2007
2,Pave,Lvl,AllPub,CollgCr,1Fam,2Story,Gable,CompShg,VinylSd,Gd,...,1786,1,1,6,1,2,0,0,0,2008
3,Pave,Lvl,AllPub,Crawfor,1Fam,2Story,Gable,CompShg,Wd Sdng,TA,...,1717,1,1,7,1,3,0,0,0,2006
4,Pave,Lvl,AllPub,NoRidge,1Fam,2Story,Gable,CompShg,VinylSd,Gd,...,2198,1,1,9,1,3,192,0,0,2008


In [31]:
X.shape


(1460, 74)

In [32]:


X_sel.shape

(1460, 37)

Data Preprocessing and Data Cleaning for final model building


In [33]:
cat_sel = list(X_sel.columns[X_sel.dtypes == "object"])
con_sel = list(X_sel.columns[X_sel.dtypes != "object"])

In [34]:
print(cat_sel)

['Street', 'LandContour', 'Utilities', 'Neighborhood', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'ExterQual', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'HeatingQC', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageCond', 'PavedDrive']


In [35]:
print(con_sel)

['MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'MasVnrArea', 'BsmtFinSF1', 'GrLivArea', 'BsmtFullBath', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageCars', 'WoodDeckSF', 'ScreenPorch', 'PoolArea', 'YrSold']


In [36]:
cat_pipe1 = make_pipeline(
    SimpleImputer(strategy="most_frequent"),
    OneHotEncoder(handle_unknown="ignore", sparse_output=False),
)

In [37]:
con_pipe1 = make_pipeline(SimpleImputer(strategy="median"), StandardScaler())

In [38]:
pre1 = ColumnTransformer(
    [("cat", cat_pipe1, cat_sel), ("con", con_pipe1, con_sel)]
).set_output(transform="pandas")


In [39]:

pre1

In [40]:

X_pre1 = pre1.fit_transform(X_sel)
X_pre1.head()

Unnamed: 0,cat__Street_Grvl,cat__Street_Pave,cat__LandContour_Bnk,cat__LandContour_HLS,cat__LandContour_Low,cat__LandContour_Lvl,cat__Utilities_AllPub,cat__Utilities_NoSeWa,cat__Neighborhood_Blmngtn,cat__Neighborhood_Blueste,...,con__GrLivArea,con__BsmtFullBath,con__KitchenAbvGr,con__TotRmsAbvGrd,con__Fireplaces,con__GarageCars,con__WoodDeckSF,con__ScreenPorch,con__PoolArea,con__YrSold
0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.370333,1.10781,-0.211454,0.91221,-0.951226,0.311725,-0.752176,-0.270208,-0.068692,0.138777
1,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,-0.482512,-0.819964,-0.211454,-0.318683,0.600495,0.311725,1.626195,-0.270208,-0.068692,-0.614439
2,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.515013,1.10781,-0.211454,-0.318683,0.600495,0.311725,-0.752176,-0.270208,-0.068692,0.138777
3,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.383659,1.10781,-0.211454,0.296763,0.600495,1.650307,-0.752176,-0.270208,-0.068692,-1.367655
4,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,1.299326,1.10781,-0.211454,1.527656,0.600495,1.650307,0.780197,-0.270208,-0.068692,0.138777


Split the data into training and testing

In [41]:
xtrain, xtest, ytrain, ytest = train_test_split(
    X_pre1, Y, train_size=0.80, random_state=21
)

In [42]:
xtrain.head()

Unnamed: 0,cat__Street_Grvl,cat__Street_Pave,cat__LandContour_Bnk,cat__LandContour_HLS,cat__LandContour_Low,cat__LandContour_Lvl,cat__Utilities_AllPub,cat__Utilities_NoSeWa,cat__Neighborhood_Blmngtn,cat__Neighborhood_Blueste,...,con__GrLivArea,con__BsmtFullBath,con__KitchenAbvGr,con__TotRmsAbvGrd,con__Fireplaces,con__GarageCars,con__WoodDeckSF,con__ScreenPorch,con__PoolArea,con__YrSold
710,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,-1.497169,-0.819964,-0.211454,-0.93413,-0.951226,-2.36544,-0.752176,-0.270208,-0.068692,0.138777
1098,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,-0.364484,-0.819964,-0.211454,-0.318683,-0.951226,-1.026858,-0.752176,-0.270208,-0.068692,0.891994
1286,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,-0.35687,1.10781,-0.211454,-0.318683,2.152216,0.311725,-0.752176,-0.270208,-0.068692,1.64521
992,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.638751,-0.819964,-0.211454,0.296763,0.600495,0.311725,1.155309,-0.270208,-0.068692,-0.614439
631,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.073361,-0.819964,-0.211454,-0.318683,0.600495,0.311725,0.492877,-0.270208,-0.068692,-0.614439


In [43]:
xtest.head()

Unnamed: 0,cat__Street_Grvl,cat__Street_Pave,cat__LandContour_Bnk,cat__LandContour_HLS,cat__LandContour_Low,cat__LandContour_Lvl,cat__Utilities_AllPub,cat__Utilities_NoSeWa,cat__Neighborhood_Blmngtn,cat__Neighborhood_Blueste,...,con__GrLivArea,con__BsmtFullBath,con__KitchenAbvGr,con__TotRmsAbvGrd,con__Fireplaces,con__GarageCars,con__WoodDeckSF,con__ScreenPorch,con__PoolArea,con__YrSold
880,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,-0.809944,1.10781,-0.211454,-0.93413,-0.951226,0.311725,-0.752176,-0.270208,-0.068692,-0.614439
605,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.895747,-0.819964,-0.211454,0.296763,3.703938,0.311725,-0.752176,3.120637,-0.068692,0.891994
1166,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,0.339875,-0.819964,-0.211454,0.296763,-0.951226,1.650307,0.524802,-0.270208,-0.068692,1.64521
216,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,-0.151273,1.10781,-0.211454,0.91221,-0.951226,0.311725,0.357198,-0.270208,-0.068692,0.138777
970,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,...,-0.615769,-0.819964,-0.211454,-0.318683,-0.951226,-2.36544,-0.752176,-0.270208,-0.068692,-1.367655


In [44]:
ytrain.head()

Unnamed: 0,SalePrice
710,52000
1098,128000
1286,143000
992,187000
631,209500


In [45]:
ytest.head()

Unnamed: 0,SalePrice
880,157000
605,205000
1166,245350
216,210000
970,135000


Model Building

In [46]:
model.fit(xtrain, ytrain)

In [47]:
model.score(xtrain, ytrain)

0.9229140155337772

In [48]:
model.score(xtest, ytest)

0.8149161822579338

In [49]:
model2 = Ridge(alpha=1)
model2.fit(xtrain, ytrain)


In [50]:
model2.score(xtrain, ytrain)

0.9108054258375854

In [51]:
model2.score(xtest, ytest)

0.8405089168884217

Hyperparamter tuning

In [52]:
params = {"alpha": np.arange(start=0.1, stop=150, step=0.1)}

In [53]:
params["alpha"]

array([1.000e-01, 2.000e-01, 3.000e-01, ..., 1.497e+02, 1.498e+02,
       1.499e+02], shape=(1499,))

In [54]:
base_model = Ridge()
gscv = GridSearchCV(estimator=base_model, param_grid=params, scoring="r2", cv=5)
gscv.fit(xtrain, ytrain)

In [55]:
gscv.best_params_

{'alpha': np.float64(19.200000000000003)}

In [56]:
gscv.best_score_

np.float64(0.8460998658179234)

In [58]:
best_ridge = gscv.best_estimator_

In [59]:
best_ridge.fit(xtrain, ytrain)

In [60]:
best_ridge.score(xtrain, ytrain)

0.8836797627706074

In [61]:
best_ridge.score(xtest, ytest)

0.8344409874290946

Lasso

In [62]:
base_model2 = Lasso()
gscv2 = GridSearchCV(estimator=base_model2, param_grid=params, scoring="r2", cv=5)
gscv2.fit(xtrain, ytrain)

In [63]:
gscv2.best_params_

{'alpha': np.float64(102.1)}

In [64]:
gscv2.best_score_

np.float64(0.855159159264763)

In [66]:
best_lasso = gscv2.best_estimator_

In [67]:
best_lasso.fit(xtrain, ytrain)

In [68]:
best_lasso.score(xtrain, ytrain)

0.9142526842085674

In [69]:
best_lasso.score(xtest, ytest)

0.8265856009335546