In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler, PolynomialFeatures,OneHotEncoder,LabelEncoder
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression,Ridge,Lasso,ElasticNet,RidgeCV,LassoCV,ElasticNetCV
from sklearn import metrics


## Import Data

In [2]:
#load train_cleaned.csv
train = pd.read_csv('../datasets/train_cleaned_preprocessed.csv')
train.head()

Unnamed: 0,lot_frontage,lot_area,overall_qual,overall_cond,year_built,year_remod/add,mas_vnr_area,bsmtfin_sf_1,bsmtfin_sf_2,bsmt_unf_sf,...,garage_cars garage_area,garage_cars exter_qual_TA,garage_cars foundation_PConc,garage_cars kitchen_qual_TA,garage_area exter_qual_TA,garage_area foundation_PConc,garage_area kitchen_qual_TA,exter_qual_TA foundation_PConc,exter_qual_TA kitchen_qual_TA,foundation_PConc kitchen_qual_TA
0,69.0552,13517,6,8,1976,2005,289.0,533.0,0.0,192.0,...,950.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,43.0,11492,7,5,1996,1997,132.0,637.0,0.0,276.0,...,1118.0,0.0,2.0,0.0,0.0,559.0,0.0,0.0,0.0,0.0
2,68.0,7922,5,7,1953,2007,0.0,731.0,0.0,326.0,...,246.0,1.0,0.0,0.0,246.0,0.0,0.0,0.0,0.0,0.0
3,73.0,9802,5,5,2006,2007,0.0,0.0,0.0,384.0,...,800.0,2.0,2.0,2.0,400.0,400.0,400.0,1.0,1.0,1.0
4,82.0,14235,6,8,1900,1993,0.0,0.0,0.0,676.0,...,968.0,2.0,2.0,2.0,484.0,484.0,484.0,1.0,1.0,1.0


In [3]:
test = pd.read_csv('../datasets/test_cleaned_preprocessed.csv')
test.head()

Unnamed: 0,id,lot_frontage,lot_area,overall_qual,overall_cond,year_built,year_remod/add,mas_vnr_area,bsmtfin_sf_1,bsmtfin_sf_2,...,garage_cars garage_area,garage_cars exter_qual_TA,garage_cars foundation_PConc,garage_cars kitchen_qual_TA,garage_area exter_qual_TA,garage_area foundation_PConc,garage_area kitchen_qual_TA,exter_qual_TA foundation_PConc,exter_qual_TA kitchen_qual_TA,foundation_PConc kitchen_qual_TA
0,2658,69.0,9142,6,8,1910,1950,0.0,0,0,...,440.0,1.0,0.0,0.0,440.0,0.0,0.0,0.0,0.0,0.0
1,2718,69.0552,9662,5,4,1977,1977,0.0,0,0,...,1160.0,2.0,0.0,2.0,580.0,0.0,580.0,0.0,1.0,0.0
2,2414,58.0,17104,7,5,2006,2006,0.0,554,0,...,852.0,0.0,2.0,0.0,0.0,426.0,0.0,0.0,0.0,0.0
3,1989,60.0,8520,5,6,1923,2006,0.0,0,0,...,960.0,0.0,0.0,2.0,0.0,0.0,480.0,0.0,0.0,0.0
4,625,69.0552,9500,6,5,1963,1963,247.0,609,0,...,1028.0,2.0,0.0,2.0,514.0,0.0,514.0,0.0,1.0,0.0


## 0:Baseline model, saleprice_mean

In [4]:
#create submit dataframe, get ID from test and assign Saleprice as mean
submit = test[['id']].rename(columns={'id':'Id'})
submit['SalePrice'] = train['saleprice'].mean()
submit.head()

Unnamed: 0,Id,SalePrice
0,2658,181358.728117
1,2718,181358.728117
2,2414,181358.728117
3,1989,181358.728117
4,625,181358.728117


In [5]:
#write submit to csv
submit.to_csv('../Model/0_baseline_model.csv',index=False)

## 1: All features model

In [6]:
## 
features1 = [col for col in train.columns if col!='saleprice']
X = train[features1]
y = train['saleprice']


In [7]:
#standardize
ss = StandardScaler()
ss.fit(X)
X_scale = ss.transform(X)

In [8]:
#Generate LinearRegression Model
lr = LinearRegression()
lr.fit(X_scale,y)
lr.score(X_scale,y)

0.9568021339524264

In [9]:
np.sqrt(metrics.mean_squared_error(y,lr.predict(X_scale)))

16474.905601558727

In [10]:
lr_cv_score = cross_val_score(lr,X_scale,y).mean() #very bad performance on unseen data
lr_cv_score

-1.0802777287766038e+24

In [11]:
lr_cv_rmse = np.abs(cross_val_score(lr,X_scale,y,scoring='neg_root_mean_squared_error')).mean()
lr_cv_rmse

5.2644784812583176e+16

This model is over fit. Now try to use Ridge,Lasso,ElasticNet

**Ridge**

In [12]:
ridgecv = RidgeCV(alphas=np.logspace(0,5,200))
ridgecv.fit(X_scale,y)
ridgecv.score(X_scale,y)

0.9527469404136027

In [13]:
ridgecv.alpha_

136.67163564620074

In [14]:
ridge = Ridge(alpha=ridgecv.alpha_)
ridge.fit(X_scale,y)
ridge.score(X_scale,y)

0.9527469404136014

In [15]:
ridge_cv_score = cross_val_score(ridge,X_scale,y).mean() #much better performance
ridge_cv_score

0.9306425358684954

In [16]:
ridge_cv_rmse = np.abs(cross_val_score(ridge,X_scale,y,scoring='neg_root_mean_squared_error')).mean()
ridge_cv_rmse

20784.317912777904

**Lasso**

In [17]:
lassocv = LassoCV(n_alphas=200)
lassocv.fit(X_scale,y)
lassocv.score(X_scale,y)

0.9519326195536462

In [18]:
lassocv_optimal_alpha = lassocv.alpha_
lassocv_optimal_alpha

182.9101588799186

In [19]:
lasso = Lasso(alpha=lassocv_optimal_alpha)
lasso.fit(X_scale,y)
lasso.score(X_scale,y)

0.9519326195536462

In [20]:
lasso_cv_score = cross_val_score(lasso,X_scale,y).mean()
lasso_cv_score

0.9330282369758436

In [21]:
lasso_cv_rmse = np.abs(cross_val_score(lasso,X_scale,y,scoring='neg_root_mean_squared_error')).mean()
lasso_cv_rmse

20411.074262132075

**ElasticNet**

In [22]:
encv = ElasticNetCV(n_alphas=100,l1_ratio=np.linspace(0.1,1,20))
encv.fit(X_scale,y)
encv.score(X_scale,y)

0.951916131871083

In [23]:
encv_optimal_alpha = encv.alpha_
encv_optimal_alpha

183.81023845540972

In [24]:
encv_optimal_l1_ratio = encv.l1_ratio_ #same as lasso
encv_optimal_l1_ratio

1.0

In [25]:
en = ElasticNet(alpha=encv_optimal_alpha,l1_ratio=encv_optimal_l1_ratio)
en.fit(X_scale,y)
en.score(X_scale,y)

0.951916131871083

In [26]:
elasticnet_cv_score = cross_val_score(en,X_scale,y).mean()
elasticnet_cv_score

0.9330300778469194

In [27]:
elasticnet_cv_rmse = np.abs(cross_val_score(en,X_scale,y,scoring='neg_root_mean_squared_error')).mean()
elasticnet_cv_rmse

20410.86462984643

### Report Results

In [28]:
print(f"""     
     LinearRegression
     Estimated R2 Score on unseen data: {round(lr_cv_score,4)}
     Estimated RMSE on unseen data: {round(lr_cv_rmse,4)}
          
     Ridge
     Estimated R2 Score on unseen data: {round(ridge_cv_score,4)}
     Estimated RMSE on unseen data: {round(ridge_cv_rmse,4)}

     Lasso
     Estimated R2 Score on unseen data: {round(lasso_cv_score,4)}
     Estimated RMSE on unseen data: {round(lasso_cv_rmse,4)}   

     ElasticNet
     Opimal l1 ration: {encv_optimal_l1_ratio}
     Estimated R2 Score on unseen data: {round(elasticnet_cv_score,4)}
     Estimated RMSE on unseen data: {round(elasticnet_cv_rmse,4)}         
        """)

     
     LinearRegression
     Estimated R2 Score on unseen data: -1.0802777287766039e+24
     Estimated RMSE on unseen data: 5.2644784812583176e+16
          
     Ridge
     Estimated R2 Score on unseen data: 0.9306
     Estimated RMSE on unseen data: 20784.3179

     Lasso
     Estimated R2 Score on unseen data: 0.933
     Estimated RMSE on unseen data: 20411.0743   

     ElasticNet
     Opimal l1 ration: 1.0
     Estimated R2 Score on unseen data: 0.933
     Estimated RMSE on unseen data: 20410.8646         
        


## Apply to test data

In [29]:
#select subset of test data
X_test = test[features1]
#standardize
X_test_scale = ss.transform(X_test)
#predict and assign to saleprice column
submit['SalePrice'] = en.predict(X_test_scale)

In [30]:
submit.head()

Unnamed: 0,Id,SalePrice
0,2658,134869.973162
1,2718,149719.09089
2,2414,212586.726413
3,1989,100628.404473
4,625,169036.056921


In [31]:
#Print csv file
submit.to_csv('../Model/1_all_features_elasticnet.csv',index=False)

## Write function for all model and report

In [32]:
def all_model_fit_report(feature):
    '''When users input the feature list, instantiate the following model
    
    1.LinearRegression 
    2.RidgeCV
    3.Ridge using alpha_ from RidgeCV
    4.LassoCV
    5.Lasso using alpha_ from LassoCV
    6.ElasticNetCV
    7.ElasticNet using alpha_ and l1_ratio_ from Elasticnet CV
    
    and print report and return name of best model based on RMSE of cross_val_score 
    '''
    # Create X,y
    X = train[feature]
    y = train['saleprice']
    # Standardize
    ss = StandardScaler()
    ss.fit(X)
    X_scale = ss.transform(X)
    
    # LinearRegression
    lr = LinearRegression()
    lr.fit(X_scale,y)
    lr_cv_score = cross_val_score(lr,X_scale,y).mean()
    lr_cv_rmse = np.abs(cross_val_score(lr,X_scale,y,scoring='neg_root_mean_squared_error')).mean()
    
    # RidgeCV
    ridgecv = RidgeCV(alphas=np.logspace(0,5,200))
    ridgecv.fit(X_scale,y)
    # Ridge
    ridge = Ridge(alpha=ridgecv.alpha_)
    ridge.fit(X_scale,y)
    ridge_cv_score = cross_val_score(ridge,X_scale,y).mean()
    ridge_cv_rmse = np.abs(cross_val_score(ridge,X_scale,y,scoring='neg_root_mean_squared_error')).mean()
    
    #LassoCV
    lassocv = LassoCV(n_alphas=200)
    lassocv.fit(X_scale,y)
    #Lasso
    lasso = Lasso(alpha=lassocv.alpha_)
    lasso.fit(X_scale,y)
    lasso_cv_score = cross_val_score(lasso,X_scale,y).mean()
    lasso_cv_rmse = np.abs(cross_val_score(lasso,X_scale,y,scoring='neg_root_mean_squared_error')).mean()
    
    #ElasticNetCV
    encv = ElasticNetCV(n_alphas=100,l1_ratio=np.linspace(0.1,1,20))
    encv.fit(X_scale,y)
    #ElasticNet
    en = ElasticNet(alpha=encv.alpha_,l1_ratio=encv.l1_ratio_)
    en.fit(X_scale,y)
    elasticnet_cv_score = cross_val_score(en,X_scale,y).mean()
    elasticnet_cv_rmse = np.abs(cross_val_score(en,X_scale,y,scoring='neg_root_mean_squared_error')).mean()
    
    #Print Result
    print(f"""     
         LinearRegression
         Estimated R2 Score on unseen data: {round(lr_cv_score,4)}
         Estimated RMSE on unseen data: {round(lr_cv_rmse,4)}

         Ridge
         Estimated R2 Score on unseen data: {round(ridge_cv_score,4)}
         Estimated RMSE on unseen data: {round(ridge_cv_rmse,4)}

         Lasso
         Estimated R2 Score on unseen data: {round(lasso_cv_score,4)}
         Estimated RMSE on unseen data: {round(lasso_cv_rmse,4)}   

         ElasticNet
         Opimal l1 ration: {encv_optimal_l1_ratio}
         Estimated R2 Score on unseen data: {round(elasticnet_cv_score,4)}
         Estimated RMSE on unseen data: {round(elasticnet_cv_rmse,4)}         
        """)
    
    all_model = { "Model":['LinearRegression','Ridge','Lasso','ElasticNet'],
                 "cv_rmse":[lr_cv_rmse,ridge_cv_rmse,lasso_cv_rmse,elasticnet_cv_rmse],
                 "cv_score" :[lr_cv_score,ridge_cv_score,lasso_cv_score,elasticnet_cv_score]
                }
    pd.options.display.float_format = '{:.4f}'.format
    df = pd.DataFrame(all_model)
    best_model = df.sort_values('cv_rmse')['Model'].reset_index(drop=True)[0]
    return best_model

In [33]:
#test on function
all_model_fit_report(features1)

     
         LinearRegression
         Estimated R2 Score on unseen data: -1.0802777287766039e+24
         Estimated RMSE on unseen data: 5.2644784812583176e+16

         Ridge
         Estimated R2 Score on unseen data: 0.9306
         Estimated RMSE on unseen data: 20784.3179

         Lasso
         Estimated R2 Score on unseen data: 0.933
         Estimated RMSE on unseen data: 20411.0743   

         ElasticNet
         Opimal l1 ration: 1.0
         Estimated R2 Score on unseen data: 0.933
         Estimated RMSE on unseen data: 20410.8646         
        


'ElasticNet'

## 2: All numeric model

In [36]:
## import cleaned data to get original columns name
data = pd.read_csv('../datasets/train_cleaned.csv')
data.columns

Index(['ms_subclass', 'ms_zoning', 'lot_frontage', 'lot_area', 'street',
       'alley', 'lot_shape', 'land_contour', 'utilities', 'lot_config',
       'land_slope', 'neighborhood', 'condition_1', 'condition_2', 'bldg_type',
       'house_style', 'overall_qual', 'overall_cond', 'year_built',
       'year_remod/add', 'roof_style', 'roof_matl', 'exterior_1st',
       'exterior_2nd', 'mas_vnr_type', 'mas_vnr_area', 'exter_qual',
       'exter_cond', 'foundation', 'bsmt_qual', 'bsmt_cond', 'bsmt_exposure',
       'bsmtfin_type_1', 'bsmtfin_sf_1', 'bsmtfin_type_2', 'bsmtfin_sf_2',
       'bsmt_unf_sf', 'total_bsmt_sf', 'heating', 'heating_qc', 'central_air',
       'electrical', '1st_flr_sf', '2nd_flr_sf', 'low_qual_fin_sf',
       'gr_liv_area', 'bsmt_full_bath', 'bsmt_half_bath', 'full_bath',
       'half_bath', 'bedroom_abvgr', 'kitchen_abvgr', 'kitchen_qual',
       'totrms_abvgrd', 'functional', 'fireplaces', 'fireplace_qu',
       'garage_type', 'garage_finish', 'garage_cars', 'garage

In [51]:
categorical_columns = [col for col in data.dtypes[data.dtypes == 'object'].index]
categorical_columns.append('ms_subclass')

In [52]:
categorical_columns

['ms_zoning',
 'street',
 'alley',
 'lot_shape',
 'land_contour',
 'utilities',
 'lot_config',
 'land_slope',
 'neighborhood',
 'condition_1',
 'condition_2',
 'bldg_type',
 'house_style',
 'roof_style',
 'roof_matl',
 'exterior_1st',
 'exterior_2nd',
 'mas_vnr_type',
 'exter_qual',
 'exter_cond',
 'foundation',
 'bsmt_qual',
 'bsmt_cond',
 'bsmt_exposure',
 'bsmtfin_type_1',
 'bsmtfin_type_2',
 'heating',
 'heating_qc',
 'central_air',
 'electrical',
 'kitchen_qual',
 'functional',
 'fireplace_qu',
 'garage_type',
 'garage_finish',
 'garage_qual',
 'garage_cond',
 'paved_drive',
 'pool_qc',
 'fence',
 'misc_feature',
 'sale_type',
 'ms_subclass']

In [55]:
numerical_columns = [col for col in data.columns if col not in categorical_columns if col!='saleprice']
numerical_columns

['lot_frontage',
 'lot_area',
 'overall_qual',
 'overall_cond',
 'year_built',
 'year_remod/add',
 'mas_vnr_area',
 'bsmtfin_sf_1',
 'bsmtfin_sf_2',
 'bsmt_unf_sf',
 'total_bsmt_sf',
 '1st_flr_sf',
 '2nd_flr_sf',
 'low_qual_fin_sf',
 'gr_liv_area',
 'bsmt_full_bath',
 'bsmt_half_bath',
 'full_bath',
 'half_bath',
 'bedroom_abvgr',
 'kitchen_abvgr',
 'totrms_abvgrd',
 'fireplaces',
 'garage_cars',
 'garage_area',
 'wood_deck_sf',
 'open_porch_sf',
 'enclosed_porch',
 '3ssn_porch',
 'screen_porch',
 'pool_area',
 'misc_val',
 'mo_sold',
 'yr_sold',
 'age_garage']

In [58]:
features2 = numerical_columns
all_model_fit_report(features2)

     
         LinearRegression
         Estimated R2 Score on unseen data: 0.8696
         Estimated RMSE on unseen data: 28552.85

         Ridge
         Estimated R2 Score on unseen data: 0.8698
         Estimated RMSE on unseen data: 28525.4217

         Lasso
         Estimated R2 Score on unseen data: 0.8703
         Estimated RMSE on unseen data: 28472.2809   

         ElasticNet
         Opimal l1 ration: 1.0
         Estimated R2 Score on unseen data: 0.8703
         Estimated RMSE on unseen data: 28472.3848         
        


'Lasso'

In [59]:
# Create X,y
X = train[features2]
y = train['saleprice']
# Standardize
ss = StandardScaler()
ss.fit(X)
X_scale = ss.transform(X)

In [61]:
#LassoCV
lassocv = LassoCV(n_alphas=200)
lassocv.fit(X_scale,y)
#Lasso
lasso = Lasso(alpha=lassocv.alpha_)
lasso.fit(X_scale,y)
lasso_cv_score = cross_val_score(lasso,X_scale,y).mean()
lasso_cv_rmse = np.abs(cross_val_score(lasso,X_scale,y,scoring='neg_root_mean_squared_error')).mean()

In [62]:
lasso_cv_rmse

28472.280888586283

In [64]:
#select subset of test data
X_test = test[features2]
#standardize
X_test_scale = ss.transform(X_test)
#predict and assign to saleprice column
submit['SalePrice'] = lasso.predict(X_test_scale)

In [65]:
submit.head()

Unnamed: 0,Id,SalePrice
0,2658,143138.6916
1,2718,161881.2238
2,2414,206265.7776
3,1989,108688.5092
4,625,189744.0121


In [66]:
#Print csv file
submit.to_csv('../Model/2_all_numerical_lasso.csv',index=False)

## 3: Subset selection using forward stepwise

In [70]:
#Forward Stepwise
result = {}  #Create empty dict  
base_feature_list = [] #Create empty list
for i in range(1,len(features1)):
    r2_score_base = 0 # Set r2 score = 0 everytimes move to more features
    for feature in [f for f in features1 if f not in base_feature_list]: # select features that not in base list
        feature_list = [l for l in base_feature_list] #create feature_list
        feature_list.append(feature) #append 1 features
        X = train[feature_list]
        y = train['saleprice']
        ss = StandardScaler()
        ss.fit(X)
        X_scale = ss.transform(X)
        lr = LinearRegression()
        lr.fit(X_scale,y)
        r2_score = lr.score(X_scale,y)
        if r2_score > r2_score_base: #if adding features show higher r2 score, store that in dict
            r2_score_base = r2_score
            result_dict = {'feature':feature_list,'r2 CV':CV_score}
            result[i] = result_dict
    base_feature_list = result_dict['feature']

result

KeyboardInterrupt: 

In [None]:
pd.DataFrame(result).T.sort_values('r2 CV',ascending=False).head()

## 4: Subset selection using backward elimination

In [76]:
import statsmodels.api as sm

In [80]:
cols = features1
pmax = 1
while (len(cols)>0):
    p = []
    X_1 = train[cols]
    X_1 = sm.add_constant(X_1)
    model = sm.OLS(y,X_1).fit()
    p = pd.Series(model.pvalues.values[1:],index=cols)
    pmax = max(p)
    features_with_p_max = p.idxmax()
    if(pmax>0.05):
        cols.remove(features_with_p_max)
    else:
        break
features4 = cols

In [95]:
len(features4)

116

In [82]:
all_model_fit_report(features4)

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


     
         LinearRegression
         Estimated R2 Score on unseen data: -1.9767161753301674e+23
         Estimated RMSE on unseen data: 1.5787514309143082e+16

         Ridge
         Estimated R2 Score on unseen data: 0.9413
         Estimated RMSE on unseen data: 19124.9643

         Lasso
         Estimated R2 Score on unseen data: 0.9397
         Estimated RMSE on unseen data: 19401.5607   

         ElasticNet
         Opimal l1 ration: 1.0
         Estimated R2 Score on unseen data: 0.9397
         Estimated RMSE on unseen data: 19401.5607         
        


  model = cd_fast.enet_coordinate_descent(


'Ridge'

In [83]:
# Create X,y
X = train[features4]
y = train['saleprice']
# Standardize
ss = StandardScaler()
ss.fit(X)
X_scale = ss.transform(X)

In [84]:
# RidgeCV
ridgecv = RidgeCV(alphas=np.logspace(0,5,200))
ridgecv.fit(X_scale,y)
# Ridge
ridge = Ridge(alpha=ridgecv.alpha_)
ridge.fit(X_scale,y)
ridge_cv_score = cross_val_score(ridge,X_scale,y).mean()
ridge_cv_rmse = np.abs(cross_val_score(ridge,X_scale,y,scoring='neg_root_mean_squared_error')).mean()

In [85]:
ridge_cv_rmse

19124.9642866445

In [87]:
#select subset of test data
X_test = test[features4]
#standardize
X_test_scale = ss.transform(X_test)
#predict and assign to saleprice column
submit['SalePrice'] = ridge.predict(X_test_scale)

In [88]:
submit.head()

Unnamed: 0,Id,SalePrice
0,2658,96232.1883
1,2718,146992.2354
2,2414,211402.6808
3,1989,160284.3521
4,625,169958.6091


In [89]:
#Print csv file
submit.to_csv('../Model/4_backward_elimination_ridge.csv',index=False)

In [90]:
#LassoCV
lassocv = LassoCV(n_alphas=200)
lassocv.fit(X_scale,y)
#Lasso
lasso = Lasso(alpha=lassocv.alpha_)
lasso.fit(X_scale,y)
lasso_cv_score = cross_val_score(lasso,X_scale,y).mean()
lasso_cv_rmse = np.abs(cross_val_score(lasso,X_scale,y,scoring='neg_root_mean_squared_error')).mean()

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [91]:
lasso_cv_rmse

19401.560682282616

In [92]:
#select subset of test data
X_test = test[features4]
#standardize
X_test_scale = ss.transform(X_test)
#predict and assign to saleprice column
submit['SalePrice'] = lasso.predict(X_test_scale)

In [93]:
submit.head()

Unnamed: 0,Id,SalePrice
0,2658,111051.4707
1,2718,146884.8605
2,2414,213152.3634
3,1989,103309.8885
4,625,169350.5233


In [94]:
#Print csv file
submit.to_csv('../Model/4_backward_elimination_lasso.csv',index=False)