# House Price

In [40]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import Ridge, Lasso
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
    r2_score,
    root_mean_squared_error,
    mean_absolute_percentage_error,
)
import joblib

In [41]:
from warnings import filterwarnings

filterwarnings("ignore")

In [42]:
df = pd.read_csv("training_set.csv")
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [43]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallC

In [44]:
m = df.isna().sum()
m[m > 0]

LotFrontage      259
Alley           1369
MasVnrType       872
MasVnrArea         8
BsmtQual          37
BsmtCond          37
BsmtExposure      38
BsmtFinType1      37
BsmtFinType2      38
Electrical         1
FireplaceQu      690
GarageType        81
GarageYrBlt       81
GarageFinish      81
GarageQual        81
GarageCond        81
PoolQC          1453
Fence           1179
MiscFeature     1406
dtype: int64

In [45]:
df.duplicated().sum()

np.int64(0)

In [46]:
X = df.drop(columns=["Id", "SalePrice"])
Y = df[["SalePrice"]]

In [47]:
X.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,0,,,,0,2,2008,WD,Normal
1,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,0,,,,0,5,2007,WD,Normal
2,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,,0,9,2008,WD,Normal
3,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,0,,,,0,2,2006,WD,Abnorml
4,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,0,,,,0,12,2008,WD,Normal


In [48]:
Y.head()

Unnamed: 0,SalePrice
0,208500
1,181500
2,223500
3,140000
4,250000


In [49]:
cat = list(X.columns[X.dtypes == "object"])
con = list(X.columns[X.dtypes != "object"])

In [50]:
cat

['MSZoning',
 'Street',
 'Alley',
 'LotShape',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood',
 'Condition1',
 'Condition2',
 'BldgType',
 'HouseStyle',
 'RoofStyle',
 'RoofMatl',
 'Exterior1st',
 'Exterior2nd',
 'MasVnrType',
 'ExterQual',
 'ExterCond',
 'Foundation',
 'BsmtQual',
 'BsmtCond',
 'BsmtExposure',
 'BsmtFinType1',
 'BsmtFinType2',
 'Heating',
 'HeatingQC',
 'CentralAir',
 'Electrical',
 'KitchenQual',
 'Functional',
 'FireplaceQu',
 'GarageType',
 'GarageFinish',
 'GarageQual',
 'GarageCond',
 'PavedDrive',
 'PoolQC',
 'Fence',
 'MiscFeature',
 'SaleType',
 'SaleCondition']

In [51]:
con

['MSSubClass',
 'LotFrontage',
 'LotArea',
 'OverallQual',
 'OverallCond',
 'YearBuilt',
 'YearRemodAdd',
 'MasVnrArea',
 'BsmtFinSF1',
 'BsmtFinSF2',
 'BsmtUnfSF',
 'TotalBsmtSF',
 '1stFlrSF',
 '2ndFlrSF',
 'LowQualFinSF',
 'GrLivArea',
 'BsmtFullBath',
 'BsmtHalfBath',
 'FullBath',
 'HalfBath',
 'BedroomAbvGr',
 'KitchenAbvGr',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageYrBlt',
 'GarageCars',
 'GarageArea',
 'WoodDeckSF',
 'OpenPorchSF',
 'EnclosedPorch',
 '3SsnPorch',
 'ScreenPorch',
 'PoolArea',
 'MiscVal',
 'MoSold',
 'YrSold']

In [52]:
num_pipe = make_pipeline(SimpleImputer(strategy="mean"), StandardScaler())

In [53]:
cat_pipe = make_pipeline(
    SimpleImputer(strategy="constant", fill_value="Not Available"),
    OneHotEncoder(handle_unknown="ignore", drop="first", sparse_output=False),
)

In [54]:
pre = ColumnTransformer(
    [
        ("num", num_pipe, con),
        ("cat", cat_pipe, cat),
    ]
).set_output(transform="pandas")

In [55]:
X_pre = pre.fit_transform(X)
X_pre.head()

Unnamed: 0,num__MSSubClass,num__LotFrontage,num__LotArea,num__OverallQual,num__OverallCond,num__YearBuilt,num__YearRemodAdd,num__MasVnrArea,num__BsmtFinSF1,num__BsmtFinSF2,...,cat__SaleType_ConLI,cat__SaleType_ConLw,cat__SaleType_New,cat__SaleType_Oth,cat__SaleType_WD,cat__SaleCondition_AdjLand,cat__SaleCondition_Alloca,cat__SaleCondition_Family,cat__SaleCondition_Normal,cat__SaleCondition_Partial
0,0.073375,-0.229372,-0.207142,0.651479,-0.5172,1.050994,0.878668,0.511418,0.575425,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1,-0.872563,0.451936,-0.091886,-0.071836,2.179628,0.156734,-0.429577,-0.57441,1.171992,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2,0.073375,-0.09311,0.07348,0.651479,-0.5172,0.984752,0.830215,0.32306,0.092907,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,0.309859,-0.456474,-0.096897,0.651479,-0.5172,-1.863632,-0.720298,-0.57441,-0.499274,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.073375,0.633618,0.375148,1.374795,-0.5172,0.951632,0.733308,1.36457,0.463568,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


In [56]:
xtrain, xtest, ytrain, ytest = train_test_split(
    X_pre, Y, test_size=0.2, random_state=21
)

In [57]:
xtrain.head()

Unnamed: 0,num__MSSubClass,num__LotFrontage,num__LotArea,num__OverallQual,num__OverallCond,num__YearBuilt,num__YearRemodAdd,num__MasVnrArea,num__BsmtFinSF1,num__BsmtFinSF2,...,cat__SaleType_ConLI,cat__SaleType_ConLw,cat__SaleType_New,cat__SaleType_Oth,cat__SaleType_WD,cat__SaleCondition_AdjLand,cat__SaleCondition_Alloca,cat__SaleCondition_Family,cat__SaleCondition_Normal,cat__SaleCondition_Partial
710,-0.636078,-0.6381565,-0.640101,-2.241782,0.381743,-1.201217,0.878668,-0.57441,-0.973018,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1098,-0.163109,-0.9106796,-0.452686,-1.518467,0.381743,-1.168096,-1.689368,-0.57441,0.500854,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1286,-0.872563,6.454645e-16,-0.072844,-0.071836,-0.5172,-0.273836,-1.059473,1.924104,0.274948,0.213629,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
992,0.073375,0.4519361,-0.075851,-0.071836,2.179628,-0.240715,0.394133,1.30917,0.20257,0.436865,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
631,1.492282,-1.637408,-0.593999,1.374795,-0.5172,1.150356,1.024029,0.023903,-0.92038,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


In [58]:
ytrain.head()

Unnamed: 0,SalePrice
710,52000
1098,128000
1286,143000
992,187000
631,209500


In [59]:
xtest.head()

Unnamed: 0,num__MSSubClass,num__LotFrontage,num__LotArea,num__OverallQual,num__OverallCond,num__YearBuilt,num__YearRemodAdd,num__MasVnrArea,num__BsmtFinSF1,num__BsmtFinSF2,...,cat__SaleType_ConLI,cat__SaleType_ConLw,cat__SaleType_New,cat__SaleType_Oth,cat__SaleType_WD,cat__SaleCondition_AdjLand,cat__SaleCondition_Alloca,cat__SaleCondition_Family,cat__SaleCondition_Normal,cat__SaleCondition_Partial
880,-0.872563,-0.456474,-0.350058,-0.795151,-0.5172,1.117235,1.024029,-0.57441,1.176379,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
605,0.073375,0.679039,0.309002,0.651479,0.381743,-0.207594,0.248772,0.40062,0.022723,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1166,-0.872563,-0.274792,-0.004192,1.374795,-0.5172,1.216598,1.120936,-0.175535,-0.973018,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
216,-0.872563,-0.229372,-0.207142,0.651479,-0.5172,1.084115,0.927122,0.899214,1.101808,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
970,-0.163109,-0.456474,0.02838,-1.518467,-1.416142,-0.737526,-1.689368,-0.57441,-0.973018,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [60]:
ytest.head()

Unnamed: 0,SalePrice
880,157000
605,205000
1166,245350
216,210000
970,135000


In [61]:
xtrain.shape, xtest.shape

((1168, 260), (292, 260))

### Linear

In [62]:
model0 = LinearRegression()
model0.fit(xtrain, ytrain)

In [63]:
model0.intercept_

array([-852482.14479399])

In [64]:
model0.coef_

array([[-8.26098851e+02,  1.55217680e+03,  7.83382075e+03,
         7.75903279e+03,  5.96795036e+03,  1.10395544e+04,
         1.90452097e+03,  3.26151662e+03,  9.81839409e+03,
         1.84486167e+03,  1.58836682e+02,  1.10460187e+04,
         6.12493450e+03,  1.71799307e+04, -6.38698817e+02,
         1.87186766e+04,  7.21113606e+01, -1.34608377e+03,
         3.67755707e+02,  6.55998781e+02, -4.02589527e+03,
        -2.30424947e+03,  3.13482874e+03,  1.57829542e+03,
         8.33552708e+02,  1.97095995e+03,  2.35771604e+03,
         1.51913512e+03,  5.35156645e+02, -2.83805766e+02,
         9.82549541e+02,  1.98488215e+03,  2.46548834e+04,
         1.10760420e+03, -2.95673293e+02, -1.97825831e+02,
         4.36167575e+04,  3.15164926e+04,  3.05094651e+04,
         2.50812351e+04,  4.42673532e+04, -2.71915616e+03,
        -4.99414470e+03,  2.54451039e+03, -1.85557914e+02,
         3.17985471e+02,  1.38338023e+03, -1.17960806e+04,
         3.60894550e+02, -2.92732051e+04,  1.17088482e+0

In [65]:
model0.score(xtrain, ytrain)

0.9441736843743175

In [66]:
model0.score(xtest, ytest)

0.7880392178196642

### Ridge

In [67]:
model1 = Ridge(alpha=1.0)
model1.fit(xtrain, ytrain)

In [68]:
model1.score(xtrain, ytrain)

0.9198323589199456

In [69]:
model1.score(xtest, ytest)

0.835927910263397

In [70]:
cv_scores = cross_val_score(model1, xtrain, ytrain, cv=5, scoring="r2", n_jobs=-1)
cv_scores

array([0.51169846, 0.8290882 , 0.89295681, 0.85322192, 0.90283894])

In [71]:
cv_scores.mean()

np.float64(0.7979608665942706)

In [72]:
alphas1 = {"alpha": np.arange(start=1, stop=100, step=1)}
print(alphas1)

{'alpha': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
       69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
       86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])}


In [73]:
base_ridge = Ridge()
gscv_ridge = GridSearchCV(
    estimator=base_ridge,
    param_grid=alphas1,
    scoring="r2",
    cv=5,
    n_jobs=-1,
)
gscv_ridge.fit(xtrain, ytrain)

In [74]:
gscv_ridge.best_params_

{'alpha': np.int64(41)}

In [75]:
gscv_ridge.best_score_

np.float64(0.82028105190524)

In [76]:
best_ridge = gscv_ridge.best_estimator_
best_ridge

In [77]:
best_ridge.score(xtrain, ytrain)

0.8862227184887695

In [78]:
best_ridge.score(xtest, ytest)

0.8233934113005316

### Lasso

In [79]:
model2 = Lasso(alpha=0.6)
model2.fit(xtrain, ytrain)

In [80]:
model2.score(xtrain, ytrain)

0.9440833861171414

In [81]:
model2.score(xtest, ytest)

0.8105748315565902

In [82]:
cv_scores1 = cross_val_score(model2, xtrain, ytrain, cv=5, scoring="r2", n_jobs=-1)
cv_scores1

array([0.46885771, 0.73464283, 0.91345859, 0.81640736, 0.02798145])

In [83]:
cv_scores1.mean()

np.float64(0.5922695887281282)

In [84]:
alphas2 = {"alpha": np.arange(start=1, stop=100, step=1)}
print(alphas2)

{'alpha': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
       69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
       86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])}


In [85]:
base_lasso = Lasso()
gscv_lasso = GridSearchCV(
    estimator=base_lasso,
    param_grid=alphas2,
    scoring="r2",
    cv=5,
    n_jobs=-1,
)
gscv_lasso.fit(xtrain, ytrain)

In [86]:
gscv_lasso.best_params_

{'alpha': np.int64(99)}

In [87]:
gscv_lasso.best_score_

np.float64(0.8146354045004374)

In [88]:
best_lasso = gscv_lasso.best_estimator_
best_lasso

In [89]:
best_lasso.score(xtrain, ytrain)

0.9043172634338377

In [90]:
best_lasso.score(xtest, ytest)

0.8405928259002237

## Scores

### Linear

In [91]:
model0.score(xtrain, ytrain)

0.9441736843743175

In [92]:
model0.score(xtest, ytest)

0.7880392178196642

### Ridge

In [93]:
best_ridge.score(xtrain, ytrain)

0.8862227184887695

In [94]:
best_ridge.score(xtest, ytest)

0.8233934113005316

### Lasso

In [95]:
best_lasso.score(xtrain, ytrain)

0.9043172634338377

In [96]:
best_lasso.score(xtest, ytest)

0.8405928259002237

# One Comparing All 3 Models (Linear, Ridge and Lasso), Lasso is better

In [97]:
best_lasso

In [98]:
best_lasso.score(xtrain, ytrain)

0.9043172634338377

In [99]:
best_lasso.score(xtest, ytest)

0.8405928259002237

In [100]:
def evaluate_model(model, x, y):
    ypred = model.predict(x)

    mae = mean_absolute_error(y, ypred)
    rmse = root_mean_squared_error(y, ypred)
    r2 = r2_score(y, ypred)
    mape = mean_absolute_percentage_error(y, ypred)

    # Print the evaluation metrics
    print(f"MAE: {mae:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print(f"MAPE {mape:.2%}")
    print(f"R2: {r2:.2%}")

In [101]:
print("Evaluation on Training Set:")
evaluate_model(best_lasso, xtrain, ytrain)

Evaluation on Training Set:
MAE: 15396.98
RMSE: 24334.61
MAPE 9.09%
R2: 90.43%


In [102]:
print("Evaluation on Testing Set:")
evaluate_model(best_lasso, xtest, ytest)

Evaluation on Testing Set:
MAE: 17467.92
RMSE: 32868.71
MAPE 10.16%
R2: 84.06%


In [103]:
xnew = pd.read_csv("sample_set.csv")
xnew.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,...,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,...,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,...,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,...,144,0,,,,0,1,2010,WD,Normal


In [104]:
pre

In [105]:
xnew_pre = pre.transform(xnew)
xnew_pre

Unnamed: 0,num__MSSubClass,num__LotFrontage,num__LotArea,num__OverallQual,num__OverallCond,num__YearBuilt,num__YearRemodAdd,num__MasVnrArea,num__BsmtFinSF1,num__BsmtFinSF2,...,cat__SaleType_ConLI,cat__SaleType_ConLw,cat__SaleType_New,cat__SaleType_Oth,cat__SaleType_WD,cat__SaleCondition_AdjLand,cat__SaleCondition_Alloca,cat__SaleCondition_Family,cat__SaleCondition_Normal,cat__SaleCondition_Partial
0,-0.872563,0.451936,0.110763,-0.795151,0.381743,-0.340077,-1.156380,-0.574410,0.053428,0.604293,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1,-0.872563,0.497357,0.375850,-0.071836,0.381743,-0.439440,-1.301740,0.023903,1.051363,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2,0.073375,0.179413,0.332053,-0.795151,-0.517200,0.852269,0.636400,-0.574410,0.761852,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
3,0.073375,0.361095,-0.054002,-0.071836,0.381743,0.885390,0.636400,-0.463612,0.347326,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
4,1.492282,-1.228623,-0.552407,1.374795,-0.517200,0.686666,0.345679,-0.574410,-0.396190,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,2.438219,-2.227875,-0.859988,-1.518467,1.280685,-0.041991,-0.720298,-0.574410,-0.973018,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
1455,2.438219,-2.227875,-0.864197,-1.518467,-0.517200,-0.041991,-0.720298,-0.574410,-0.420316,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1456,-0.872563,4.085578,0.950423,-0.795151,1.280685,-0.373198,0.539493,-0.574410,1.711535,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1457,0.664586,-0.365633,-0.007600,-0.795151,-0.517200,0.686666,0.345679,-0.574410,-0.233889,-0.288653,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0


In [106]:
predictions = best_lasso.predict(xnew_pre)
predictions

array([101207.48287035, 135276.99976035, 176088.4621453 , ...,
       162724.64893715, 104595.44263737, 231855.54469996], shape=(1459,))

In [107]:
xnew["SalePrice_pred"] = predictions.round(2)
xnew

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice_pred
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,...,0,,MnPrv,,0,6,2010,WD,Normal,101207.48
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,...,0,,,Gar2,12500,6,2010,WD,Normal,135277.00
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,...,0,,MnPrv,,0,3,2010,WD,Normal,176088.46
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,...,0,,,,0,6,2010,WD,Normal,189276.81
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,...,0,,,,0,1,2010,WD,Normal,196730.85
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,2915,160,RM,21.0,1936,Pave,,Reg,Lvl,AllPub,...,0,,,,0,6,2006,WD,Normal,80765.32
1455,2916,160,RM,21.0,1894,Pave,,Reg,Lvl,AllPub,...,0,,,,0,4,2006,WD,Abnorml,78561.80
1456,2917,20,RL,160.0,20000,Pave,,Reg,Lvl,AllPub,...,0,,,,0,9,2006,WD,Abnorml,162724.65
1457,2918,85,RL,62.0,10441,Pave,,Reg,Lvl,AllPub,...,0,,MnPrv,Shed,700,7,2006,WD,Normal,104595.44


In [108]:
xnew1 = xnew[["Id", "SalePrice_pred"]]

In [109]:
xnew1

Unnamed: 0,Id,SalePrice_pred
0,1461,101207.48
1,1462,135277.00
2,1463,176088.46
3,1464,189276.81
4,1465,196730.85
...,...,...
1454,2915,80765.32
1455,2916,78561.80
1456,2917,162724.65
1457,2918,104595.44


In [115]:
xnew1.to_csv("LassoResults.csv", index=False)

# Saving

In [116]:
pre

In [117]:
best_lasso

In [118]:
joblib.dump(pre, "pre.joblib")
joblib.dump(best_lasso, "best_lasso.joblib")

['best_lasso.joblib']

# Loading

In [119]:
p = joblib.load("pre.joblib")
m = joblib.load("best_lasso.joblib")
f = pd.read_csv("LassoResults.csv")

In [120]:
p

In [121]:
m

In [122]:
f

Unnamed: 0,Id,SalePrice_pred
0,1461,101207.48
1,1462,135277.00
2,1463,176088.46
3,1464,189276.81
4,1465,196730.85
...,...,...
1454,2915,80765.32
1455,2916,78561.80
1456,2917,162724.65
1457,2918,104595.44
