In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import PowerTransformer, QuantileTransformer
from sklearn.compose import make_column_selector as selector
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn import linear_model 
from sklearn import compose, pipeline
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
from sklearn.pipeline import Pipeline
import warnings
from sklearn.model_selection import GridSearchCV
warnings.filterwarnings('ignore')

In [191]:
df1 = pd.read_csv('housing_pp5.csv', index_col=0)
y1 = df1.SalePrice
X1 = df1.drop(['SalePrice', 'PID'], axis =1).copy()


In [192]:
df2 = pd.read_csv('housing_pp4.csv', index_col=0)
y2 = df2.SalePrice
X2 = df2.drop(['SalePrice', 'PID'], axis =1).copy()

In [210]:
numerical_columns_selector = selector(dtype_exclude=object)
categorical_columns_selector = selector(dtype_include=object)

numerical_columns1 = numerical_columns_selector(X1)
categorical_columns1 = categorical_columns_selector(X1)
numerical_columns2 = numerical_columns_selector(X2)
categorical_columns2 = categorical_columns_selector(X2)

#categorical_preprocessor = OneHotEncoder(handle_unknown="ignore", drop= 'first')
categorical_preprocessor = OneHotEncoder(handle_unknown="ignore")
numerical_preprocessor = StandardScaler()

preprocessor1 = ColumnTransformer(
    [
        ("standard_scaler", numerical_preprocessor, numerical_columns1),
        ("one-hot-encoder", categorical_preprocessor, categorical_columns1),
    ]
)

preprocessor2 = ColumnTransformer(
    [
        ("standard_scaler", numerical_preprocessor, numerical_columns2),
        ("one-hot-encoder", categorical_preprocessor, categorical_columns2),
    ]
)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

lasso = linear_model.Lasso()

lasso_pipe1 = Pipeline(steps=[("Preprocess", preprocessor1), ("Lasso", lasso)])

lasso_pipe2 = Pipeline(steps=[("Preprocess", preprocessor2), ("Lasso", lasso)])

param_grid = {
    "regressor__Lasso__alpha": [0.002, 0.00205263, 0.00210526, 0.00215789, 0.00221053,
       0.00226316, 0.00231579, 0.00236842, 0.00242105, 0.00247368,
       0.00252632, 0.00257895, 0.00263158, 0.00268421, 0.00273684,
       0.00278947, 0.00284211, 0.00289474, 0.00294737, 0.003    ]
}

lasso_regr1 = compose.TransformedTargetRegressor(regressor= lasso_pipe1,
                                                func=np.log, inverse_func=np.exp)
lasso_regr2 = compose.TransformedTargetRegressor(regressor= lasso_pipe2,
                                                func=np.log, inverse_func=np.exp)

search1 = GridSearchCV(lasso_regr1, param_grid, n_jobs=2, cv = kf)

search2 = GridSearchCV(lasso_regr2, param_grid, n_jobs=2, cv = kf)

In [221]:
ridge = linear_model.Ridge()

ridge_pipe1 = Pipeline(steps=[("Preprocess", preprocessor1), ("Ridge", ridge)])

ridge_pipe2 = Pipeline(steps=[("Preprocess", preprocessor2), ("Ridge", ridge)])

param_gridR = {
    "regressor__Ridge__alpha": [193.        , 193.57894737, 194.15789474, 194.73684211,
       195.31578947, 195.89473684, 196.47368421, 197.05263158,
       197.63157895, 198.21052632, 198.78947368, 199.36842105,
       199.94736842, 200.52631579, 201.10526316, 201.68421053,
       202.26315789, 202.84210526, 203.42105263, 204.         ] 
}

ridge_regr1 = compose.TransformedTargetRegressor(regressor= ridge_pipe1,
                                                func=np.log, inverse_func=np.exp)
ridge_regr2 = compose.TransformedTargetRegressor(regressor= ridge_pipe2,
                                                func=np.log, inverse_func=np.exp)

Rsearch1 = GridSearchCV(ridge_regr1, param_gridR, n_jobs=2, cv = kf)

Rsearch2 = GridSearchCV(ridge_regr2, param_gridR, n_jobs=2, cv = kf)

In [195]:
search1.fit(X1, y1)
print(search1.best_score_)
print(search1.best_params_)

0.8846743626007652
{'regressor__Lasso__alpha': 0.001}


In [211]:
search2.fit(X2, y2)
print(search2.best_score_)
print(search2.best_params_)

0.9344881872898778
{'regressor__Lasso__alpha': 0.00273684}


In [None]:
0.8825164812004702
0.9308283561200676

In [197]:
Rsearch1.fit(X1, y1)
print(Rsearch1.best_score_)
print(Rsearch1.best_params_)

0.8922082306532779
{'regressor__Ridge__alpha': 100}


In [222]:
Rsearch2.fit(X2, y2)
print(Rsearch2.best_score_)
print(Rsearch2.best_params_)

0.9350065918339736
{'regressor__Ridge__alpha': 196.47368421}
