In [204]:
#imports

import pandas as pd
import numpy as np
import matplotlib
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.compose import TransformedTargetRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor



In [205]:
data = pd.read_csv("AmesHousing.csv")
data = data.drop(columns=['Order', 'PID', 'Neighborhood'])
data = data.fillna(0)

print(data.columns)
print(data.head())

Index(['MS SubClass', 'MS Zoning', 'Lot Frontage', 'Lot Area', 'Street',
       'Alley', 'Lot Shape', 'Land Contour', 'Utilities', 'Lot Config',
       'Land Slope', 'Condition 1', 'Condition 2', 'Bldg Type', 'House Style',
       'Overall Qual', 'Overall Cond', 'Year Built', 'Year Remod/Add',
       'Roof Style', 'Roof Matl', 'Exterior 1st', 'Exterior 2nd',
       'Mas Vnr Type', 'Mas Vnr Area', 'Exter Qual', 'Exter Cond',
       'Foundation', 'Bsmt Qual', 'Bsmt Cond', 'Bsmt Exposure',
       'BsmtFin Type 1', 'BsmtFin SF 1', 'BsmtFin Type 2', 'BsmtFin SF 2',
       'Bsmt Unf SF', 'Total Bsmt SF', 'Heating', 'Heating QC', 'Central Air',
       'Electrical', '1st Flr SF', '2nd Flr SF', 'Low Qual Fin SF',
       'Gr Liv Area', 'Bsmt Full Bath', 'Bsmt Half Bath', 'Full Bath',
       'Half Bath', 'Bedroom AbvGr', 'Kitchen AbvGr', 'Kitchen Qual',
       'TotRms AbvGrd', 'Functional', 'Fireplaces', 'Fireplace Qu',
       'Garage Type', 'Garage Yr Blt', 'Garage Finish', 'Garage Cars',
      

In [206]:
# get dummies for categoricals ...

categorical_features = ['MS Zoning', 'Street', 'Condition 1', 'Roof Matl', 'Heating QC', 'Kitchen Qual', 'Garage Type', 'Paved Drive', 'Sale Condition']
dummified_additional = pd.get_dummies(data[categorical_features], dtype=float)

# create individual variables for each categorical features columns
street = dummified_additional.filter(like='Street').columns.tolist()
condition_1 = dummified_additional.filter(like='Condition 1').columns.tolist()
roof_matl = dummified_additional.filter(like='Roof Matl').columns.tolist()
heating_qc = dummified_additional.filter(like='Heating QC').columns.tolist()
kitchen_qual = dummified_additional.filter(like='Kitchen Qual').columns.tolist()
garage_type = dummified_additional.filter(like='Garage Type').columns.tolist()
paved_drive = dummified_additional.filter(like='Paved Drive').columns.tolist()
sale_condition = dummified_additional.filter(like='Sale Condition').columns.tolist()
zoning = dummified_additional.filter(like='MS Zoning').columns.tolist()

from_cats = [street, condition_1, roof_matl, heating_qc, kitchen_qual, garage_type, paved_drive, sale_condition, zoning]


In [207]:
# select all the already numerical columns i plan to use
cols_to_use = ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area']

# concatenate the selected data with dummified_additional and SalePrice
data = pd.concat([dummified_additional, data[cols_to_use], data[['SalePrice']]], axis=1)
print(data.columns)

Index(['MS Zoning_A (agr)', 'MS Zoning_C (all)', 'MS Zoning_FV',
       'MS Zoning_I (all)', 'MS Zoning_RH', 'MS Zoning_RL', 'MS Zoning_RM',
       'Street_Grvl', 'Street_Pave', 'Condition 1_Artery', 'Condition 1_Feedr',
       'Condition 1_Norm', 'Condition 1_PosA', 'Condition 1_PosN',
       'Condition 1_RRAe', 'Condition 1_RRAn', 'Condition 1_RRNe',
       'Condition 1_RRNn', 'Roof Matl_ClyTile', 'Roof Matl_CompShg',
       'Roof Matl_Membran', 'Roof Matl_Metal', 'Roof Matl_Roll',
       'Roof Matl_Tar&Grv', 'Roof Matl_WdShake', 'Roof Matl_WdShngl',
       'Heating QC_Ex', 'Heating QC_Fa', 'Heating QC_Gd', 'Heating QC_Po',
       'Heating QC_TA', 'Kitchen Qual_Ex', 'Kitchen Qual_Fa',
       'Kitchen Qual_Gd', 'Kitchen Qual_Po', 'Kitchen Qual_TA',
       'Garage Type_0', 'Garage Type_2Types', 'Garage Type_Attchd',
       'Garage Type_Basment', 'Garage Type_BuiltIn', 'Garage Type_CarPort',
       'Garage Type_Detchd', 'Paved Drive_N', 'Paved Drive_P', 'Paved Drive_Y',
       'Sale Con

In [208]:
# hyper params
class SelectColumns( BaseEstimator, TransformerMixin ):
    # pass the function we want to apply to the column 'SalePrice’
    def __init__( self, columns ):
        self.columns = columns
        # don't need to do anything
    def fit( self, xs, ys, **params ):
        return self
    # actually perform the selection
    def transform( self, xs ):
        return xs[ self.columns ]

regressor = TransformedTargetRegressor(
    LinearRegression( n_jobs = -1 ),
    func = np.sqrt,
    inverse_func = np.square
)

In [209]:


#pipeline
steps_lr = [
    ('column_select', SelectColumns(['GrLivArea', 'OverallQual'])),
    ('linear_regression', regressor),
]
pipe_lr = Pipeline( steps_lr )

# x and y axis
xs = data.drop( columns = [ 'SalePrice' ] )
ys = data[ 'SalePrice' ]

In [210]:



# from_cats = [!street, condition_1, !roof_matl, heating_qc, kitchen_qual, garage_type, !paved_drive, sale_condition, zoning]
grid_lr = { 
    'column_select__columns': [
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces'],
        
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # [ 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],

        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # [ 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + condition_1 + heating_qc + kitchen_qual + garage_type + sale_condition + zoning
    ],
    'linear_regression': [
        LinearRegression(n_jobs=-1),  # no transformation
        TransformedTargetRegressor(
            LinearRegression(n_jobs=-1),
            func=np.sqrt,
            inverse_func=np.square),
        TransformedTargetRegressor(
            LinearRegression(n_jobs=-1),
            func=np.cbrt,
            inverse_func=lambda y: np.power(y, 3)),
        TransformedTargetRegressor(
            LinearRegression(n_jobs=-1),
            func=np.log,
            inverse_func=np.exp),
    ]
}

In [211]:
search_lr = GridSearchCV( pipe_lr, grid_lr, scoring = 'r2', n_jobs = -1, cv = 5)
search_lr.fit( xs, ys )
# output
print(f"Linear regression R-squared: {search_lr.best_score_}")
print(f"Best params: {search_lr.best_params_}")


Linear regression R-squared: 0.823372805555661
Best params: {'column_select__columns': ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area', 'Condition 1_Artery', 'Condition 1_Feedr', 'Condition 1_Norm', 'Condition 1_PosA', 'Condition 1_PosN', 'Condition 1_RRAe', 'Condition 1_RRAn', 'Condition 1_RRNe', 'Condition 1_RRNn', 'Heating QC_Ex', 'Heating QC_Fa', 'Heating QC_Gd', 'Heating QC_Po', 'Heating QC_TA', 'Kitchen Qual_Ex', 'Kitchen Qual_Fa', 'Kitchen Qual_Gd', 'Kitchen Qual_Po', 'Kitchen Qual_TA', 'Garage Type_0', 'Garage Type_2Types', 'Garage Type_Attchd', 'Garage Type_Basment', 'Garage Type_BuiltIn', 'Garage Type_CarPort', 'Garage Type_Detchd', 'Sale Condition_Abnorml', 'Sale Condition_AdjLand', 'Sale Condition_Alloca', 'Sale Condition_Family', 'Sale Condition_Normal', 'Sale Condition_Partial', 'MS Zoning_A (agr)', 'MS Zoning_C (all)', 'MS Zoning_FV', 'MS Zoning_I (all)', 'MS Zoning_RH', 'MS Zoning_RL', 'MS Zoning_RM'], 'linear_regr

In [212]:
#decision tree
pipe_dt = Pipeline([
    ('column_select', SelectColumns(['GrLivArea', 'OverallQual'])),
    ('regression', DecisionTreeRegressor(random_state=5)),
])

grid_dt = {
    'column_select__columns': [
        ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + condition_1 + heating_qc + kitchen_qual + garage_type + sale_condition + zoning,
        # ['Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces'],
        
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # [ 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],

        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # [ 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + condition_1 + heating_qc + kitchen_qual + garage_type + sale_condition + zoning
    ],
    'regression__max_depth': [None, 10, 20, 30],
    'regression__min_samples_split': [2, 5, 10],
    'regression__min_samples_leaf': [1, 2, 4]
}

search_dt = GridSearchCV(pipe_dt, grid_dt, scoring='r2', n_jobs=-1, cv=5)
search_dt.fit(xs, ys)
# output
print(f"Decision Tree regression R-squared: {search_dt.best_score_}")
print(f"Best params: {search_dt.best_params_}")


Decision Tree regression R-squared: 0.8331948491788459
Best params: {'column_select__columns': ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'], 'regression__max_depth': 10, 'regression__min_samples_leaf': 1, 'regression__min_samples_split': 10}


In [213]:
# random Forest regressor
pipe_rf = Pipeline([
    ('column_select', SelectColumns(['GrLivArea', 'OverallQual'])),
    ('regression', RandomForestRegressor(random_state=5)),
])

grid_rf = {
    'column_select__columns': [
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area']+ condition_1 + heating_qc + kitchen_qual + garage_type + sale_condition + zoning,
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area']+ condition_1 + heating_qc + kitchen_qual + garage_type + sale_condition,
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area']+ condition_1 + heating_qc + kitchen_qual + garage_type + zoning,
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area']+ condition_1 + heating_qc + kitchen_qual + sale_condition + zoning,
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area']+ condition_1 + heating_qc + garage_type + sale_condition + zoning,
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area']+ condition_1 + kitchen_qual + garage_type + sale_condition + zoning,
        ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area']+ heating_qc + kitchen_qual + garage_type + sale_condition + zoning,
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces'],
        
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # [ 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],

        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # [ 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + condition_1 + heating_qc + kitchen_qual + garage_type + sale_condition + zoning
    ],
    'regression__n_estimators': [100, 200, 300],
    'regression__max_depth': [10, 20, 30],
    #'regression__max_features': ['auto', 'sqrt']
}

search_rf = GridSearchCV(pipe_rf, grid_rf, scoring='r2', n_jobs=-1, cv=5)
search_rf.fit(xs, ys)
# output
print(f"Random Forest regression R-squared: {search_rf.best_score_}")
print(f"Best params: {search_rf.best_params_}")

Random Forest regression R-squared: 0.8800251246002461
Best params: {'column_select__columns': ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area', 'Heating QC_Ex', 'Heating QC_Fa', 'Heating QC_Gd', 'Heating QC_Po', 'Heating QC_TA', 'Kitchen Qual_Ex', 'Kitchen Qual_Fa', 'Kitchen Qual_Gd', 'Kitchen Qual_Po', 'Kitchen Qual_TA', 'Garage Type_0', 'Garage Type_2Types', 'Garage Type_Attchd', 'Garage Type_Basment', 'Garage Type_BuiltIn', 'Garage Type_CarPort', 'Garage Type_Detchd', 'Sale Condition_Abnorml', 'Sale Condition_AdjLand', 'Sale Condition_Alloca', 'Sale Condition_Family', 'Sale Condition_Normal', 'Sale Condition_Partial', 'MS Zoning_A (agr)', 'MS Zoning_C (all)', 'MS Zoning_FV', 'MS Zoning_I (all)', 'MS Zoning_RH', 'MS Zoning_RL', 'MS Zoning_RM'], 'regression__max_depth': 30, 'regression__n_estimators': 300}


In [214]:
# gradient boosting regressor
pipe_gb = Pipeline([
    ('column_select', SelectColumns(['GrLivArea', 'OverallQual'])),
    ('regression', GradientBoostingRegressor(random_state=5)),
])

grid_gb = {
    'column_select__columns': [
        #['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + condition_1 + heating_qc + kitchen_qual + garage_type + sale_condition + zoning,
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + condition_1 + heating_qc + kitchen_qual + garage_type + sale_condition,
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + condition_1 + heating_qc + kitchen_qual + garage_type + zoning,
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + condition_1 + heating_qc + kitchen_qual + sale_condition + zoning,
        ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + condition_1 + heating_qc + garage_type + sale_condition + zoning,
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + condition_1 + kitchen_qual + garage_type + sale_condition + zoning,
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + heating_qc + kitchen_qual + garage_type + sale_condition + zoning,
        # ['Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces'],
        
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # [ 'Overall Qual', 'Year Built', 'Garage Area', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],

        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # [ 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'],
        # ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Year Remod/Add', 'Fireplaces', 'Lot Area'] + condition_1 + heating_qc + kitchen_qual + garage_type + sale_condition + zoning
    ],
    'regression__n_estimators': [100, 200, 300],
    'regression__learning_rate': [0.01, 0.1, 0.2],
    'regression__max_depth': [2, 3, 5, 7]
}

search_gb = GridSearchCV(pipe_gb, grid_gb, scoring='r2', n_jobs=-1, cv=5)
search_gb.fit(xs, ys)
# output
print(f"Gradient Boosting regression R-squared: {search_gb.best_score_}")
print(f"Best params: {search_gb.best_params_}")

Gradient Boosting regression R-squared: 0.8969914540286263
Best params: {'column_select__columns': ['Gr Liv Area', 'Overall Qual', 'Year Built', 'Garage Area', 'Total Bsmt SF', 'Full Bath', 'Year Remod/Add', 'Fireplaces', 'Lot Area', 'Condition 1_Artery', 'Condition 1_Feedr', 'Condition 1_Norm', 'Condition 1_PosA', 'Condition 1_PosN', 'Condition 1_RRAe', 'Condition 1_RRAn', 'Condition 1_RRNe', 'Condition 1_RRNn', 'Heating QC_Ex', 'Heating QC_Fa', 'Heating QC_Gd', 'Heating QC_Po', 'Heating QC_TA', 'Garage Type_0', 'Garage Type_2Types', 'Garage Type_Attchd', 'Garage Type_Basment', 'Garage Type_BuiltIn', 'Garage Type_CarPort', 'Garage Type_Detchd', 'Sale Condition_Abnorml', 'Sale Condition_AdjLand', 'Sale Condition_Alloca', 'Sale Condition_Family', 'Sale Condition_Normal', 'Sale Condition_Partial', 'MS Zoning_A (agr)', 'MS Zoning_C (all)', 'MS Zoning_FV', 'MS Zoning_I (all)', 'MS Zoning_RH', 'MS Zoning_RL', 'MS Zoning_RM'], 'regression__learning_rate': 0.1, 'regression__max_depth': 3, 're