#### Importing libraries

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_log_error as msle
from sklearn.metrics import make_scorer
from catboost import CatBoostRegressor

import pickle
import numpy as np
import warnings
warnings.filterwarnings("ignore")



#### Loading Artifacts

In [2]:
with open('../artifacts/train_data.pkl', 'rb') as file:
    train_data = pickle.load(file)

with open('../artifacts/train_target.pkl', 'rb') as file:
    train_target = pickle.load(file)

with open('../artifacts/test_data.pkl', 'rb') as file:
    test_data = pickle.load(file)

with open('../artifacts/cat_features.pkl', 'rb') as file:
    cat_features = pickle.load(file)


# split train set
x_train, x_test, y_train, y_test = train_test_split(train_data, train_target, test_size=0.2, random_state=289)

In [3]:
print(
    "train data shape", train_data.shape, '\n' 
    "test data shape", test_data.shape, '\n'
    "train target shape", train_target.shape
      )

train data shape (1433, 79) 
test data shape (1445, 79) 
train target shape (1433,)


In [4]:
## Make score function for grid search CV
def score_func(y_true, y_pred, **kwargs):
    return msle(target_inv_trans(y_true), target_inv_trans(y_pred), **kwargs) ** .5


#### Hyperparameter setting

In [5]:

def make_search(estimator, params, verbose=1):
    scorer = make_scorer(score_func, greater_is_better=False)
    search = GridSearchCV(estimator, params, cv=5, scoring=scorer, verbose=11, n_jobs=-1)
    search.fit(x_train, y_train)
    results = pd.DataFrame()
    for k, v in search.cv_results_.items():
        results[k] = v
    results = results.sort_values(by='rank_test_score')
    best_params_row = results[results['rank_test_score'] == 1]
    mean, std = best_params_row['mean_test_score'].iloc[0], best_params_row['std_test_score'].iloc[0]
    best_params = best_params_row['params'].iloc[0]
    if verbose:
        print('%s: %.4f (%.4f) with params' % (estimator.__class__.__name__, -mean, std), best_params)
    return best_params

depths = list(range(2, 7))
estimators = [1800, 2000, 3000]

# i calculated them earlier
best_params = {
     'n_estimators': 3000,
    'max_depth': 4,
    'random_state': 289,
    'cat_features': cat_features,
    'verbose': False
}

In [6]:
# define target variable conversions
target_trans = lambda price: np.log1p(price) ** .5
target_inv_trans = lambda price: np.expm1(price ** 2)

#### Hyperparameter tuning

In [7]:
# best_params = {
#      'n_estimators': 3000,
#     'max_depth': 4,
#     'random_state': 289,
#     'cat_features': cat_features,
#     'verbose': False
# }

# pass True to rerun search
if True:
    search_params = {
        'n_estimators': estimators,
        'max_depth': depths,
        'random_state': [289],
        'cat_features': [cat_features],
        'verbose': [False]
    }
    best_params = make_search(CatBoostRegressor(), search_params)


# fitting best model
model = CatBoostRegressor()
model.set_params(**best_params)
model.fit(x_train, y_train)

y_true = target_inv_trans(y_test)
y_pred = target_inv_trans(model.predict(x_test))
print('msle = %.4f' % msle(y_true, y_pred) ** .5)

Fitting 5 folds for each of 15 candidates, totalling 75 fits


[CV 2/5; 1/15] START cat_features=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'], max_depth=2, n_estimators=1800, random_state=289, verbose=False
[CV 1/5; 1/15] START cat_features=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual'

  result = getattr(ufunc, method)(*inputs, **kwargs)
Traceback (most recent call last):
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 982, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 253, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 350, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "/var/folders/39/x6wcjm5s50j9t7cmzcfvbb_c0000gn/T/ipykernel_20767/429481807.py", line 3, in score_func
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
    return func(*args, **kwargs)
  File "/opt/anaconda3/envs/develop

[CV 2/5; 1/15] END cat_features=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'], max_depth=2, n_estimators=1800, random_state=289, verbose=False;, score=nan total time=   6.6s
[CV 2/5; 3/15] START cat_features=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCo

  result = getattr(ufunc, method)(*inputs, **kwargs)
Traceback (most recent call last):
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 982, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 253, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 350, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "/var/folders/39/x6wcjm5s50j9t7cmzcfvbb_c0000gn/T/ipykernel_20767/429481807.py", line 3, in score_func
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
    return func(*args, **kwargs)
  File "/opt/anaconda3/envs/develop

[CV 1/5; 1/15] END cat_features=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'], max_depth=2, n_estimators=1800, random_state=289, verbose=False;, score=nan total time=   7.8s
[CV 3/5; 3/15] START cat_features=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCo

  result = getattr(ufunc, method)(*inputs, **kwargs)
Traceback (most recent call last):
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 982, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 253, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 350, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "/var/folders/39/x6wcjm5s50j9t7cmzcfvbb_c0000gn/T/ipykernel_20767/429481807.py", line 3, in score_func
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
    return func(*args, **kwargs)
  File "/opt/anaconda3/envs/develop

[CV 4/5; 3/15] START cat_features=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'], max_depth=2, n_estimators=3000, random_state=289, verbose=False
[CV 5/5; 1/15] END cat_features=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 

  result = getattr(ufunc, method)(*inputs, **kwargs)
Traceback (most recent call last):
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 982, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 253, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 350, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "/var/folders/39/x6wcjm5s50j9t7cmzcfvbb_c0000gn/T/ipykernel_20767/429481807.py", line 3, in score_func
  File "/opt/anaconda3/envs/development_env/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
    return func(*args, **kwargs)
  File "/opt/anaconda3/envs/develop

[CV 1/5; 2/15] END cat_features=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'], max_depth=2, n_estimators=2000, random_state=289, verbose=False;, score=nan total time=   8.7s
[CV 2/5; 4/15] START cat_features=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCo