In [1805]:
import pandas as pd
from pandas_profiling import ProfileReport
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MaxAbsScaler
from matplotlib.pyplot import figure
from datetime import datetime
today = datetime.today()
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

In [1806]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [1807]:
X = pd.DataFrame(train.loc[:, train.columns != 'SalePrice'])
X = pd.concat([X,test], axis = 0)
y = pd.DataFrame(train['SalePrice'])

In [1784]:
np.log(y.SalePrice).skew()

0.12133506220520406

#### Data Cleaning

In [1808]:
X.drop('Id', axis = 1, inplace= True)
X[['GarageCars', 'BsmtHalfBath', 'BsmtFullBath', 'MSSubClass', 'MoSold', 'YrSold', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces']] = X[['GarageCars', 'BsmtHalfBath', 'BsmtFullBath', 'MSSubClass', 'MoSold', 'YrSold', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces']].astype(str)
X['MSZoning'] = X[['MSZoning','Neighborhood']].apply(lambda x: 'RL' if (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'Blmngtn') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'ClearCr') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'CollgCr') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'Crawfor') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'Edwards') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'Gilbert') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'Mitchel') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'NAmes') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'NPkVill') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'NWAmes') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'NoRidge') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'NridgHt') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'SWISU') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'Sawyer') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'SawyerW') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'StoneBr') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'Timber') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'Veenker') else 'RM' if (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'Blueste') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'BrDale') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'BrkSide') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'IDOTRR') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'MeadowV') or (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'OldTown') else 'FV' if (pd.isnull(x['MSZoning']) and x['Neighborhood'] == 'Somerst') else x['MSZoning'], axis = 1)
X['LotFrontage'] = X[['LotFrontage','LotArea']].apply(lambda x: 50 if pd.isnull(x['LotFrontage']) and x['LotArea'] <= 7478 else 68 if pd.isnull(x['LotFrontage']) and x['LotArea'] > 7498 and x['LotArea'] <= 9453 else 76 if pd.isnull(x['LotFrontage']) and x['LotArea'] > 9453 and x['LotArea'] <= 11570 else 85 if pd.isnull(x['LotFrontage']) else x['LotFrontage'], axis = 1)
X['Alley'].fillna('NoAlley', inplace= True)
X.Utilities.fillna('AllPub', inplace = True)
X['Exterior1st'].fillna('VinylSd', inplace = True)
X['Exterior2nd'].fillna('VinylSd', inplace = True)
X['MasVnrType'] = X[['MSSubClass','MasVnrType']].apply(lambda x: 'BrkFace' if x['MSSubClass'] == '120' or x['MSSubClass'] == '80' else 'None', axis = 1)
X['MasVnrArea'] = X[['MasVnrType','MasVnrArea']].apply(lambda x: 50 if x['MasVnrType'] == 'BrkFace' and pd.isnull(x['MasVnrArea']) else 0 if x['MasVnrType'] == 'None' and pd.isnull(x['MasVnrArea']) else x['MasVnrArea'], axis = 1)
X['BsmtQual'].fillna('NoBasement', inplace = True)
X['BsmtCond'].fillna('NoBasement', inplace = True)
X['BsmtExposure'].fillna('NoBasement', inplace = True)
X['BsmtFinType1'].fillna('NoBasement', inplace = True)
X['BsmtFinType2'].fillna('NoBasement', inplace = True)
X['BsmtFinSF1'].fillna(0, inplace = True)
X['BsmtFinSF2'].fillna(0, inplace = True)
X['BsmtUnfSF'].fillna(0, inplace = True)
X['TotalBsmtSF'].fillna(0, inplace = True)
X['Electrical'].fillna('SBrkr', inplace = True)
X['BsmtFullBath'].fillna(0, inplace = True)
X['BsmtHalfBath'].fillna(0, inplace = True)
X['KitchenQual'].fillna('TA', inplace = True)
X['Functional'].fillna('Typ', inplace = True)
X['FireplaceQu'].fillna('NoFireplace', inplace = True)
X['GarageType'].fillna('NoGarage', inplace = True)
X['GarageYrBlt'] = X[['GarageYrBlt','GarageType']].apply(lambda x: 1968 if (x['GarageType'] == '2Types' and pd.isnull(x['GarageYrBlt'])) else 1990 if (x['GarageType'] == 'Attchd' and pd.isnull(x['GarageYrBlt'])) else 1958.5 if (x['GarageType'] == 'Basment' and pd.isnull(x['GarageYrBlt'])) else 2003 if (x['GarageType'] == 'BuiltIn' and pd.isnull(x['GarageYrBlt'])) else 1963 if (x['GarageType'] == 'CarPort' and pd.isnull(x['GarageYrBlt'])) else 1962 if (x['GarageType'] == 'Detchd' and pd.isnull(x['GarageYrBlt'])) else 0 if (x['GarageType'] == 'NoGarage' and pd.isnull(x['GarageYrBlt'])) else x['GarageYrBlt'], axis = 1)
X['GarageFinish'].fillna('NoGarage', inplace = True)
X['GarageCars'].fillna(2, inplace= True)
X['GarageArea'].fillna(480, inplace= True)
X['GarageQual'].fillna('NoGarage', inplace = True)
X['GarageCond'].fillna('NoGarage', inplace = True)
X.drop(['MiscFeature','MiscVal'], axis = 1, inplace = True)
X['PoolQC'].fillna('NoPool', inplace = True)
X['Fence'].fillna('NoFence', inplace = True)
X['SaleType'].fillna('WD', inplace = True)
# X.drop(columns=['PoolArea','ScreenPorch','3SsnPorch','LowQualFinSF','BsmtFinSF2'], axis = 1, inplace = True)
X['GarageYrBlt'] = X['GarageYrBlt'].map(lambda x: 2007 if x == 2207 else x)
X['ExterQual'] = X['ExterQual'].map(lambda x: 1 if x == 'Po' else 2 if x == 'Fa' else 3 if x == 'TA' else 4 if x == 'Gd' else 5 if x == 'Ex' else x)
X['ExterCond'] = X['ExterCond'].map(lambda x: 1 if x == 'Po' else 2 if x == 'Fa' else 3 if x == 'TA' else 4 if x == 'Gd' else 5 if x == 'Ex' else x)
X['BsmtCond'] = X['BsmtCond'].map(lambda x: 0 if x == 'NoBasement' else 1 if x == 'Po' else 2 if x == 'Fa' else 3 if x == 'TA' else 4 if x == 'Gd' else 5 if x == 'Ex' else x)
X['BsmtQual'] = X['BsmtQual'].map(lambda x: 0 if x == 'NoBasement' else 1 if x == 'Po' else 2 if x == 'Fa' else 3 if x == 'TA' else 4 if x == 'Gd' else 5 if x == 'Ex' else x)
X['BsmtFinType1'] = X['BsmtFinType1'].map(lambda x: 0 if x == 'NoBasement' else 1 if x == 'Unf' else 2 if x == 'LwQ' else 3 if x == 'Rec' else 4 if x == 'BLQ' else 5 if x == 'ALQ' else 6 if x == 'GLQ' else x)
X['BsmtFinType2'] = X['BsmtFinType2'].map(lambda x: 0 if x == 'NoBasement' else 1 if x == 'Unf' else 2 if x == 'LwQ' else 3 if x == 'Rec' else 4 if x == 'BLQ' else 5 if x == 'ALQ' else 6 if x == 'GLQ' else x)
X['HeatingQC'] = X['HeatingQC'].map(lambda x: 1 if x == 'Po' else 2 if x == 'Fa' else 3 if x == 'TA' else 4 if x == 'Gd' else 5 if x == 'Ex' else x)
X['KitchenQual'] = X['KitchenQual'].map(lambda x: 1 if x == 'Po' else 2 if x == 'Fa' else 3 if x == 'TA' else 4 if x == 'Gd' else 5 if x == 'Ex' else x)
X['FireplaceQu'] = X['FireplaceQu'].map(lambda x: 0 if x == 'NoFireplace' else 1 if x == 'Po' else 2 if x == 'Fa' else 3 if x == 'TA' else 4 if x == 'Gd' else 5 if x == 'Ex' else x)
X['GarageQual'] = X['GarageQual'].map(lambda x: 0 if x == 'NoGarage' else 1 if x == 'Po' else 2 if x == 'Fa' else 3 if x == 'TA' else 4 if x == 'Gd' else 5 if x == 'Ex' else x)
X['GarageCond'] = X['GarageCond'].map(lambda x: 0 if x == 'NoGarage' else 1 if x == 'Po' else 2 if x == 'Fa' else 3 if x == 'TA' else 4 if x == 'Gd' else 5 if x == 'Ex' else x)
X['PoolQC'] = X['PoolQC'].map(lambda x: 0 if x == 'NoPool' else 1 if x == 'Po' else 2 if x == 'Fa' else 3 if x == 'TA' else 4 if x == 'Gd' else 5 if x == 'Ex' else x)
X['Fence'] = X['Fence'].map(lambda x: 0 if x == 'NoFence' else 1 if x == 'MnWw' else 2 if x == 'GdWo' else 3 if x == 'MnPrv' else 4 if x == 'GdPrv' else x)
X['PoolArea'] = X['PoolArea'].map(lambda x: 0 if x == 0 else 1)
X['ScreenPorch'] = X['ScreenPorch'].map(lambda x: 0 if x == 0 else 1)
X['3SsnPorch'] = X['3SsnPorch'].map(lambda x: 0 if x == 0 else 1)
X['LowQualFin'] = X['LowQualFinSF'].map(lambda x: 0 if x == 0 else 1)
X['BsmtFin2'] = X['BsmtFinSF2'].map(lambda x: 0 if x == 0 else 1)

In [1786]:
X

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MoSold,YrSold,SaleType,SaleCondition,LowQualFin,BsmtFin2,HouseQual
0,60,RL,65.0,8450,Pave,NoAlley,Reg,Lvl,AllPub,Inside,...,0,0,0,2,2008,WD,Normal,0,0,12
1,20,RL,80.0,9600,Pave,NoAlley,Reg,Lvl,AllPub,FR2,...,0,0,0,5,2007,WD,Normal,0,0,14
2,60,RL,68.0,11250,Pave,NoAlley,IR1,Lvl,AllPub,Inside,...,0,0,0,9,2008,WD,Normal,0,0,12
3,70,RL,60.0,9550,Pave,NoAlley,IR1,Lvl,AllPub,Corner,...,0,0,0,2,2006,WD,Abnorml,0,0,12
4,60,RL,84.0,14260,Pave,NoAlley,IR1,Lvl,AllPub,FR2,...,0,0,0,12,2008,WD,Normal,0,0,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,160,RM,21.0,1936,Pave,NoAlley,Reg,Lvl,AllPub,Inside,...,0,0,0,6,2006,WD,Normal,0,0,11
1455,160,RM,21.0,1894,Pave,NoAlley,Reg,Lvl,AllPub,Inside,...,0,0,0,4,2006,WD,Abnorml,0,0,9
1456,20,RL,160.0,20000,Pave,NoAlley,Reg,Lvl,AllPub,Inside,...,0,0,0,9,2006,WD,Abnorml,0,0,12
1457,85,RL,62.0,10441,Pave,NoAlley,Reg,Lvl,AllPub,Inside,...,0,0,3,7,2006,WD,Normal,0,0,10


#### Feature Engineering

In [1809]:
X['HouseAge'] = X['YearBuilt'].map(lambda x: int(today.year) - x if x else x)
X.drop('YearBuilt', axis = 1, inplace = True)
X['Remodeled'] = X['YearRemodAdd'].map(lambda x: 0 if x == 0 else 1)
X.drop('YearRemodAdd', axis = 1, inplace = True)
X['GarageAge'] = X['GarageYrBlt'].map(lambda x: int(today.year) - x if x > 0 else x)
X.drop('GarageYrBlt', axis = 1, inplace = True)


In [1788]:
X

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,MoSold,YrSold,SaleType,SaleCondition,LowQualFin,BsmtFin2,HouseQual,HouseAge,Remodeled,GarageAge
0,60,RL,65.0,8450,Pave,NoAlley,Reg,Lvl,AllPub,Inside,...,2,2008,WD,Normal,0,0,12,19,1,19.0
1,20,RL,80.0,9600,Pave,NoAlley,Reg,Lvl,AllPub,FR2,...,5,2007,WD,Normal,0,0,14,46,1,46.0
2,60,RL,68.0,11250,Pave,NoAlley,IR1,Lvl,AllPub,Inside,...,9,2008,WD,Normal,0,0,12,21,1,21.0
3,70,RL,60.0,9550,Pave,NoAlley,IR1,Lvl,AllPub,Corner,...,2,2006,WD,Abnorml,0,0,12,107,1,24.0
4,60,RL,84.0,14260,Pave,NoAlley,IR1,Lvl,AllPub,FR2,...,12,2008,WD,Normal,0,0,13,22,1,22.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,160,RM,21.0,1936,Pave,NoAlley,Reg,Lvl,AllPub,Inside,...,6,2006,WD,Normal,0,0,11,52,1,0.0
1455,160,RM,21.0,1894,Pave,NoAlley,Reg,Lvl,AllPub,Inside,...,4,2006,WD,Abnorml,0,0,9,52,1,52.0
1456,20,RL,160.0,20000,Pave,NoAlley,Reg,Lvl,AllPub,Inside,...,9,2006,WD,Abnorml,0,0,12,62,1,62.0
1457,85,RL,62.0,10441,Pave,NoAlley,Reg,Lvl,AllPub,Inside,...,7,2006,WD,Normal,0,0,10,30,1,0.0


#### Encoding

In [1810]:
categorical_features = ['MSSubClass', 'MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle','RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'Foundation', 'Heating', 'CentralAir', 'Electrical', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'GarageType', 'GarageFinish', 'GarageCars', 'PavedDrive', 'SaleType', 'SaleCondition', 'MoSold', 'YrSold', 'BsmtExposure']
scaler_features = ['LotFrontage', 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', 'HouseAge', 'GarageAge', 'ExterQual', 'ExterCond', 'BsmtCond', 'BsmtQual', 'BsmtFinType1', 'HeatingQC', 'KitchenQual', 'FireplaceQu', 'GarageQual', 'GarageCond', 'PoolQC', 'Fence']

In [1811]:
X = pd.get_dummies(data = X, columns = categorical_features, drop_first=True)
#X.drop(columns= categorical_features, axis = 1, inplace = True)
#X

In [1791]:
X

Unnamed: 0,LotFrontage,LotArea,OverallQual,OverallCond,MasVnrArea,ExterQual,ExterCond,BsmtQual,BsmtCond,BsmtFinType1,...,MoSold_8,MoSold_9,YrSold_2007,YrSold_2008,YrSold_2009,YrSold_2010,BsmtExposure_Gd,BsmtExposure_Mn,BsmtExposure_No,BsmtExposure_NoBasement
0,65.0,8450,7,5,196.0,4,3,4,3,6,...,0,0,0,1,0,0,0,0,1,0
1,80.0,9600,6,8,0.0,3,3,4,3,5,...,0,0,1,0,0,0,1,0,0,0
2,68.0,11250,7,5,162.0,4,3,4,3,6,...,0,1,0,1,0,0,0,1,0,0
3,60.0,9550,7,5,0.0,3,3,3,4,5,...,0,0,0,0,0,0,0,0,1,0
4,84.0,14260,8,5,350.0,4,3,4,3,6,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,21.0,1936,4,7,0.0,3,3,3,3,1,...,0,0,0,0,0,0,0,0,1,0
1455,21.0,1894,4,5,0.0,3,3,3,3,3,...,0,0,0,0,0,0,0,0,1,0
1456,160.0,20000,5,7,0.0,3,3,3,3,5,...,0,1,0,0,0,0,0,0,1,0
1457,62.0,10441,5,5,0.0,3,3,4,3,6,...,0,0,0,0,0,0,0,0,0,0


#### Data Transformation

In [1812]:
scaler_features

['LotFrontage',
 'LotArea',
 'MasVnrArea',
 'BsmtFinSF1',
 'BsmtUnfSF',
 'TotalBsmtSF',
 '1stFlrSF',
 '2ndFlrSF',
 'GrLivArea',
 'GarageArea',
 'WoodDeckSF',
 'OpenPorchSF',
 'EnclosedPorch',
 'HouseAge',
 'GarageAge',
 'ExterQual',
 'ExterCond',
 'BsmtCond',
 'BsmtQual',
 'BsmtFinType1',
 'HeatingQC',
 'KitchenQual',
 'FireplaceQu',
 'GarageQual',
 'GarageCond',
 'PoolQC',
 'Fence',
 'HouseQual']

In [1813]:
X[scaler_features].skew()

LotFrontage       1.425916
LotArea          12.829025
MasVnrArea        2.615200
BsmtFinSF1        1.425963
BsmtUnfSF         0.919812
TotalBsmtSF       1.157489
1stFlrSF          1.470360
2ndFlrSF          0.862118
GrLivArea         1.270010
GarageArea        0.241308
WoodDeckSF        1.843380
OpenPorchSF       2.536417
EnclosedPorch     4.005950
HouseAge          0.600114
GarageAge         0.562338
ExterQual         0.786786
ExterCond         1.316590
BsmtCond         -3.605964
BsmtQual         -1.269195
BsmtFinType1     -0.149621
HeatingQC        -0.550192
KitchenQual       0.438761
FireplaceQu       0.172269
GarageQual       -3.265354
GarageCond       -3.384860
PoolQC           18.418603
Fence             1.755731
HouseQual        -0.564413
dtype: float64

In [1794]:
X['LotFrontage'] = np.sqrt(X.LotFrontage)
X['LotArea'] = np.log(X.LotArea)
X['MasVnrArea'] = np.sqrt(X.MasVnrArea)
X['BsmtFinSF1'] = np.sqrt(X.BsmtFinSF1)
# X['BsmtFinSF2'] = np.log1p(X.BsmtFinSF2) # 2.98 skew
X['BsmtUnfSF'] = np.sqrt(X.BsmtUnfSF)
X['TotalBsmtSF'] = X.TotalBsmtSF
X['1stFlrSF'] = np.log(X['1stFlrSF'])
X['2ndFlrSF'] = np.sqrt(X['2ndFlrSF'])
# X['LowQualFinSF'] = X['LowQualFinSF'] # too skewed
X['GrLivArea'] = np.log(X['GrLivArea'])
X['GarageArea'] = X['GarageArea'] # not skewed
X['WoodDeckSF'] = np.log1p(X['WoodDeckSF'])
X['OpenPorchSF'] = np.log1p(X['OpenPorchSF'])
X['EnclosedPorch'] = np.log1p(X['EnclosedPorch'])
# X['3SsnPorch'] = X['3SsnPorch'] # too skewed
# X['ScreenPorch'] = np.log1p(X['ScreenPorch']) # 2.94 skew
#X['PoolArea'] = X['PoolArea'] # too skewed
X['HouseAge'] = np.sqrt(X['HouseAge'])
X['GarageAge'] = np.sqrt(X['GarageAge'])

In [1795]:
X['GarageAge'].describe()

count    2919.000000
mean        6.013549
std         2.333404
min         0.000000
25%         4.358899
50%         6.164414
75%         7.778108
max        11.269428
Name: GarageAge, dtype: float64

In [1818]:
scipy.special.boxcox(X['HouseQual'], 0).skew()

-1.9842122441276702

#### Scaling

In [1797]:
#from sklearn.preprocessing import RobustScaler # Robust
#scaler = RobustScaler()
#robust = scaler.fit_transform(X[scaler_features]) 
#scaled = pd.DataFrame(robust, columns= scaler_features)
#X.drop(columns=scaler_features, axis = 1, inplace = True)
#X = X.merge(scaled, left_index= True, right_index= True)

In [1798]:
#from sklearn.preprocessing import MinMaxScaler # MinMax
#scaler = MinMaxScaler()
#minmax = scaler.fit_transform(X[scaler_features])
#scaled = pd.DataFrame(minmax, columns= scaler_features)
#X.drop(columns=scaler_features, axis = 1, inplace = True)
#X = X.merge(scaled, left_index= True, right_index= True)

In [1799]:
from sklearn.preprocessing import StandardScaler ## Standard
scaler = StandardScaler()
std_scaler = scaler.fit_transform(X[scaler_features])
scaled = pd.DataFrame(std_scaler, columns= scaler_features)
X.drop(columns=scaler_features, axis = 1, inplace = True)
encoded_house = X.merge(scaled, left_index= True, right_index= True)

In [1800]:
#from sklearn.preprocessing import MaxAbsScaler # MaxAbs
#scaler = MaxAbsScaler()
#MaxAbs = scaler.fit_transform(X[scaler_features])
#scaled = pd.DataFrame(MaxAbs, columns= scaler_features)
#X.drop(columns=scaler_features, axis = 1, inplace = True)
#X = X.merge(scaled, left_index= True, right_index= True)

#### Unconcatenate

In [1801]:
j = X
a = test.shape[0]
X = j[:train.shape[0]]
test = j[train.shape[0]:test.shape[0]+(a+1)]

#### Model

In [1802]:
model = Lasso(alpha= 0.000018)
x = X
y = np.log(y.SalePrice)
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size = .30, random_state = 1)
model.fit(x_train,y_train)
predictions = model.predict(x_test)
np.sqrt(mean_squared_error(np.exp(y_test),np.exp(predictions)))

29004.059813966076

In [1803]:
predictions = model.predict(test)
test = pd.read_csv('test.csv')
solution = pd.DataFrame({"id": test.Id, "SalePrice": np.exp(predictions)})
solution.to_csv("predictions.csv", index = False)