In [101]:
rs = rs[rs['saleprice']>=50000]

In [102]:
rs = rs[rs['saleprice']<500000]

In [103]:
rs = rs[rs['lot_frontage']<=200]

In [104]:
rs = rs[rs['gr_liv_area']<=4000]

In [105]:
rs = rs[rs['total_bsmt_sf']<3000]

In [106]:
rs = rs[rs['lot_area']<60000]

In [107]:
rs = rs[rs['lot_frontage']<160]

In [108]:
rs = rs[rs['mas_vnr_area']<1000]

In [109]:
rs.shape

(2006, 86)

In [28]:
def Modeling(X_train, X_test, y_train, y_test,features_list,ls_alpha=100, lasso_iter=1000, ri_alpha=(0.1,10,100)):
    '''
    Function: To scale train, test data and fit into linear regression, ridge regularization & lasso regularization



    return a dataframe of coefficent of features. Features = index
    '''

    # 1. Instantiate StandardScaler and scaled data
    ss = StandardScaler()
    X_train_scaled = ss.fit_transform(X_train)
    X_test_scaled = ss.transform(X_test)
    
    # 2. Instantiate Models, Cross-Val and Fitting
    lr = LinearRegression()
    lasso_cv = LassoCV(n_alphas = ls_alpha, max_iter=lasso_iter)
    ridge_cv = RidgeCV(alphas=ri_alpha)
    
    lr.fit(X_train_scaled, y_train)
    lasso_cv.fit(X_train_scaled, y_train)
    ridge_cv.fit(X_train_scaled, y_train)

    # 3. Execute Cross_val_scoring
    lr_cv_scores = cross_val_score(lr, X_train_scaled,y_train, cv=5)
    lr_cv_mse_scores = -cross_val_score(lr, X_train_scaled,y_train, cv=5,scoring ='neg_mean_squared_error')
    ridge_cv_scores = cross_val_score(ridge_cv,X_train_scaled,y_train,cv=5)
    ridge_cv_mse_scores = -cross_val_score(ridge_cv,X_train_scaled,y_train,cv=5, scoring ='neg_mean_squared_error')
    lasso_cv_scores = cross_val_score(lasso_cv, X_train_scaled,y_train, cv=5)
    lasso_cv_mse_scores = -cross_val_score(lasso_cv, X_train_scaled,y_train, cv=5,scoring ='neg_mean_squared_error')
    
    # 4. Printing result
    
    ## Linear Regression
    internal_print('LR', 'R^2',lr.score(X_train_scaled,y_train), lr.score(X_test_scaled,y_test),lr_cv_scores.mean(), n_title=True)
    
    internal_print('LR', 'Adj-R^2',r2_adj(y_train, lr.predict(X_train_scaled), len(X_train.columns)), \
                   r2_adj(y_test, lr.predict(X_test_scaled), len(X_test.columns)))
    internal_print('LR', 'RMSE',np.sqrt(metrics.mean_squared_error(y_train, lr.predict(X_train_scaled))), \
                   np.sqrt(metrics.mean_squared_error(y_test, lr.predict(X_test_scaled))),np.sqrt(lr_cv_mse_scores.mean()))
    
    ## Ridge Regularisation
    internal_print('Ridge', 'R^2',ridge_cv.score(X_train_scaled,y_train), ridge_cv.score(X_test_scaled,y_test),ridge_cv_scores.mean(), n_title=True)
    internal_print('Ridge', 'Adj-R^2',r2_adj(y_train, ridge_cv.predict(X_train_scaled), len(X_train.columns)), \
                   r2_adj(y_test, ridge_cv.predict(X_test_scaled), len(X_test.columns)))
    internal_print('Ridge', 'RMSE',np.sqrt(metrics.mean_squared_error(y_train, ridge_cv.predict(X_train_scaled))), \
                   np.sqrt(metrics.mean_squared_error(y_test, ridge_cv.predict(X_test_scaled))),np.sqrt(ridge_cv_mse_scores.mean()))

    ## Lasso Regularisation
    internal_print('Lasso', 'R^2',lasso_cv.score(X_train_scaled,y_train), lasso_cv.score(X_test_scaled,y_test),lasso_cv_scores.mean(), n_title=True)
    internal_print('Lasso', 'Adj-R^2',r2_adj(y_train, lasso_cv.predict(X_train_scaled), len(X_train.columns)), \
                   r2_adj(y_test, lasso_cv.predict(X_test_scaled), len(X_test.columns)))
    internal_print('Lasso', 'RMSE',np.sqrt(metrics.mean_squared_error(y_train, lasso_cv.predict(X_train_scaled))),\
                   np.sqrt(metrics.mean_squared_error(y_test, lasso_cv.predict(X_test_scaled))),np.sqrt(lasso_cv_mse_scores.mean()))

    
    # 5. Print out Lasso Feature Mapping
    pd.Series(lasso_cv.coef_, index=features_list).plot.barh(figsize=(15,20))
    rund_fea = X_train.columns[lasso_cv.coef_==0]
    print(f'Number of redundant features to drop: {len(rund_fea)}')
    
    return lasso_cv.coef_

