# Import Packages and Settings

In [1]:
import os
import numpy as np
import pandas as pd
from pandas.api.types import is_numeric_dtype

import matplotlib.pyplot as plt

from xgboost.sklearn import XGBRegressor

from catboost import CatBoostRegressor

%matplotlib inline

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

n_core = os.cpu_count()-3
print(n_core)

9


In [2]:
from sklearn.base import clone
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor
from sklearn.feature_selection import RFECV
from sklearn.model_selection import ShuffleSplit, cross_val_score, GridSearchCV

from sklearnex import patch_sklearn

patch_sklearn()

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


# Load Data and Preprocessing

In [3]:
train_data = pd.read_csv('./Data/train.csv', index_col='Id')
test_data = pd.read_csv('./Data/test.csv', index_col='Id')

feature = 'MSSubClass'
train_data[feature] = train_data[feature].apply(lambda x: str(x))
test_data[feature] = test_data[feature].apply(lambda x: str(x))

assert(not is_numeric_dtype(train_data[feature]))

feature = 'Street'
train_data[feature] = train_data[feature].map({'Pave': 0, 'Grvl':1})
test_data[feature] = test_data[feature].map({'Pave': 0, 'Grvl':1})

assert(is_numeric_dtype(train_data[feature]))

feature = 'Alley'
train_data[feature] = train_data[feature].fillna('None')
test_data[feature] = test_data[feature].fillna('None')

assert(train_data[feature].isna().sum() == 0)

feature = 'MasVnrType'
train_data[feature] = train_data[feature].fillna('None')
test_data[feature] = test_data[feature].fillna('None')

assert(train_data[feature].isna().sum() == 0)

feature = 'MasVnrArea'
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)

assert(train_data[feature].isna().sum() == 0)

quality_map = {'Po':1, 'Fa':2, 'TA':3, 'Gd':4, 'Ex':5}

feature = 'ExterQual'
train_data[feature] = train_data[feature].map(quality_map)
test_data[feature] = test_data[feature].map(quality_map)

assert(is_numeric_dtype(train_data[feature]))

feature = 'ExterCond'
train_data[feature] = train_data[feature].map(quality_map)
test_data[feature] = test_data[feature].map(quality_map)

assert(is_numeric_dtype(train_data[feature]))

feature = 'BsmtQual'
train_data[feature] = train_data[feature].map(quality_map)
test_data[feature] = test_data[feature].map(quality_map)
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(is_numeric_dtype(train_data[feature]))
assert(train_data[feature].isna().sum() == 0)

feature = 'BsmtCond'
train_data[feature] = train_data[feature].map(quality_map)
test_data[feature] = test_data[feature].map(quality_map)
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(is_numeric_dtype(train_data[feature]))
assert(train_data[feature].isna().sum() == 0)

exposure_map = {'No':1, 'Mn':2, 'Av':3, 'Gd':4}

feature = 'BsmtExposure'
train_data[feature] = train_data[feature].map(exposure_map)
test_data[feature] = test_data[feature].map(exposure_map)
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(is_numeric_dtype(train_data[feature]))
assert(train_data[feature].isna().sum() == 0)

type_map = {'Unf':1, 'LwQ':2, 'Rec':3, 'BLQ':4, 'ALQ':5, 'GLQ':6}

feature = 'BsmtFinType1'
train_data[feature] = train_data[feature].map(type_map)
test_data[feature] = test_data[feature].map(type_map)
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(is_numeric_dtype(train_data[feature]))
assert(train_data[feature].isna().sum() == 0)

feature = 'BsmtFinType2'
train_data[feature] = train_data[feature].map(type_map)
test_data[feature] = test_data[feature].map(type_map)
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(is_numeric_dtype(train_data[feature]))
assert(train_data[feature].isna().sum() == 0)

feature = 'HeatingQC'
train_data[feature] = train_data[feature].map(quality_map)
test_data[feature] = test_data[feature].map(quality_map)

assert(is_numeric_dtype(train_data[feature]))

feature = 'CentralAir'
train_data[feature] = train_data[feature].map({'N': 0, 'Y':1})
test_data[feature] = test_data[feature].map({'N': 0, 'Y':1})

assert(is_numeric_dtype(train_data[feature]))

feature = 'KitchenQual'
train_data[feature] = train_data[feature].map(quality_map)
test_data[feature] = test_data[feature].map(quality_map)

assert(is_numeric_dtype(train_data[feature]))

function_map = {'Sal':1, 'Sev':2, 'Maj2':3, 'Maj1':4, 'Mod':5, 'Min2':6, 'Min1':7, 'Typ':8}

feature = 'Functional'
train_data[feature] = train_data[feature].map(function_map)
test_data[feature] = test_data[feature].map(function_map)
test_data[feature] = test_data[feature].fillna(8)

assert(is_numeric_dtype(train_data[feature]))

feature = 'FireplaceQu'
train_data[feature] = train_data[feature].map(quality_map)
test_data[feature] = test_data[feature].map(quality_map)
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(is_numeric_dtype(train_data[feature]))
assert(train_data[feature].isna().sum() == 0)

feature = 'GarageType'
train_data[feature] = train_data[feature].fillna('None')
test_data[feature] = test_data[feature].fillna('None')

assert(train_data[feature].isna().sum() == 0)

feature = 'GarageYrBlt'
train_data[feature] = train_data[feature].fillna(-1)
test_data[feature] = test_data[feature].fillna(-1)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(train_data[feature].isna().sum() == 0)

exposure_map = {'Unf':1, 'RFn':2, 'Fin':3}

feature = 'GarageFinish'
train_data[feature] = train_data[feature].map(exposure_map)
test_data[feature] = test_data[feature].map(exposure_map)
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(is_numeric_dtype(train_data[feature]))
assert(train_data[feature].isna().sum() == 0)

feature = 'GarageQual'
train_data[feature] = train_data[feature].map(quality_map)
test_data[feature] = test_data[feature].map(quality_map)
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(is_numeric_dtype(train_data[feature]))
assert(train_data[feature].isna().sum() == 0)

feature = 'GarageCond'
train_data[feature] = train_data[feature].map(quality_map)
test_data[feature] = test_data[feature].map(quality_map)
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(is_numeric_dtype(train_data[feature]))
assert(train_data[feature].isna().sum() == 0)

feature = 'PoolQC'
train_data[feature] = train_data[feature].map(quality_map)
test_data[feature] = test_data[feature].map(quality_map)
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(is_numeric_dtype(train_data[feature]))
assert(train_data[feature].isna().sum() == 0)

fence_map = {'MnWw':1, 'GdWo':2, 'MnPrv':3, 'GdPrv':4}

feature = 'Fence'
train_data[feature] = train_data[feature].map(fence_map)
test_data[feature] = test_data[feature].map(fence_map)
train_data[feature] = train_data[feature].fillna(0)
test_data[feature] = test_data[feature].fillna(0)
train_data[feature] = train_data[feature].astype(int)
test_data[feature] = test_data[feature].astype(int)

assert(is_numeric_dtype(train_data[feature]))
assert(train_data[feature].isna().sum() == 0)

feature = 'MiscFeature'
train_data[feature] = train_data[feature].fillna('None')
test_data[feature] = test_data[feature].fillna('None')

assert(train_data[feature].isna().sum() == 0)

test_data.loc[test_data['GarageCars'].isnull(), ['GarageType', 'GarageCars', 'GarageArea']] = ['None', 0, 0]
test_data.loc[test_data['BsmtFinSF1'].isnull(), ['BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'BsmtFullBath', 'BsmtHalfBath']] = [0, 0, 0, 0, 0, 0]
test_data.loc[test_data['BsmtFullBath'].isnull(), ['BsmtFullBath', 'BsmtHalfBath']] = [0, 0]

assert(test_data['GarageCars'].isna().sum() == 0)
assert(test_data['BsmtFinSF1'].isna().sum() == 0)
assert(test_data['BsmtFullBath'].isna().sum() == 0)

columns_float = train_data.select_dtypes(include=['float64']).columns
train_data[columns_float] = train_data[columns_float].astype('float32')
test_data[columns_float] = test_data[columns_float].astype('float32')

label = 'SalePrice'

id = test_data.index

In [4]:
train_data_new, test_data_new = train_data.copy(), test_data.copy().reset_index(drop=True)

train_data_new = train_data_new.drop(train_data_new[(train_data_new['OverallQual']==4) & (train_data_new['SalePrice']>200000)].index)
train_data_new = train_data_new.drop(train_data_new[(train_data_new['GrLivArea']>4000) & (train_data_new['SalePrice']<300000)].index).reset_index(drop=True)

train_data_new['TotalSF'] = train_data_new[['TotalBsmtSF', '1stFlrSF', '2ndFlrSF']].sum(axis=1)
test_data_new['TotalSF'] = test_data_new[['TotalBsmtSF', '1stFlrSF', '2ndFlrSF']].sum(axis=1)

train_data_new['TotalFinSF'] = train_data_new[['BsmtFinSF1', 'BsmtFinSF2', '1stFlrSF', '2ndFlrSF']].sum(axis=1)
test_data_new['TotalFinSF'] = test_data_new[['BsmtFinSF1', 'BsmtFinSF2', '1stFlrSF', '2ndFlrSF']].sum(axis=1)

train_data_new['YearSinceBuilt'] = train_data_new['YrSold'] - train_data_new['YearBuilt']
test_data_new['YearSinceBuilt'] = test_data_new['YrSold'] - test_data_new['YearBuilt']

train_data_new['YearSinceRemod'] = train_data_new['YrSold'] - train_data_new['YearRemodAdd']
test_data_new['YearSinceRemod'] = test_data_new['YrSold'] - test_data_new['YearRemodAdd']

train_data_new['TotalBath'] = train_data_new[['FullBath', 'BsmtFullBath']].sum(axis=1) + 0.5 * train_data_new[['HalfBath', 'BsmtHalfBath']].sum(axis=1)
test_data_new['TotalBath'] = test_data_new[['FullBath', 'BsmtFullBath']].sum(axis=1) + 0.5 * test_data_new[['HalfBath', 'BsmtHalfBath']].sum(axis=1)

train_data_new['TotalPorch'] = train_data_new[['OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'WoodDeckSF']].sum(axis=1)
test_data_new['TotalPorch'] = test_data_new[['OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'WoodDeckSF']].sum(axis=1)

train_data_new['HasPool'] = train_data_new['PoolArea'].apply(lambda x: 1 if x > 0 else 0)
test_data_new['HasPool'] = test_data_new['PoolArea'].apply(lambda x: 1 if x > 0 else 0)

train_data_new['Has2ndFloor'] = train_data_new['2ndFlrSF'].apply(lambda x: 1 if x > 0 else 0)
test_data_new['Has2ndFloor'] = test_data_new['2ndFlrSF'].apply(lambda x: 1 if x > 0 else 0)

train_data_new['HasBsmt'] = train_data_new['TotalBsmtSF'].apply(lambda x: 1 if x > 0 else 0)
test_data_new['HasBsmt'] = test_data_new['TotalBsmtSF'].apply(lambda x: 1 if x > 0 else 0)

train_data_new['HasFireplace'] = train_data_new['Fireplaces'].apply(lambda x: 1 if x > 0 else 0)
test_data_new['HasFireplace'] = test_data_new['Fireplaces'].apply(lambda x: 1 if x > 0 else 0)

train_data_new['HasGarage'] = train_data_new['GarageArea'].apply(lambda x: 1 if x > 0 else 0)
test_data_new['HasGarage'] = test_data_new['GarageArea'].apply(lambda x: 1 if x > 0 else 0)

train_data_new['HasFence'] = train_data_new['Fence'].apply(lambda x: 1 if x > 0 else 0)
test_data_new['HasFence'] = test_data_new['Fence'].apply(lambda x: 1 if x > 0 else 0)

train_data_new['HasPorch'] = train_data_new['TotalPorch'].apply(lambda x: 1 if x > 0 else 0)
test_data_new['HasPorch'] = test_data_new['TotalPorch'].apply(lambda x: 1 if x > 0 else 0)

train_data_new['HasWoodDeck'] = train_data_new['WoodDeckSF'].apply(lambda x: 1 if x > 0 else 0)
test_data_new['HasWoodDeck'] = test_data_new['WoodDeckSF'].apply(lambda x: 1 if x > 0 else 0)

In [5]:
columns_to_log = ['LotFrontage', 'LotArea', 'MasVnrArea', 
                  'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 
                  'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 
                  'LowQualFinSF', 'GrLivArea', 'WoodDeckSF', 
                  'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 
                  'ScreenPorch', 'PoolArea', 'MiscVal']

skews = train_data_new.select_dtypes(include='number').skew().sort_values(ascending=False)
columns_to_skew = skews[columns_to_log][skews > 0.75].index

train_data_new[columns_to_skew] = np.log1p(train_data_new[columns_to_skew])
test_data_new[columns_to_skew] = np.log1p(test_data_new[columns_to_skew])

# Functions

In [7]:
def prepareDataDrop(columns_to_drop, label, train_data, test_data, islog):
    X_train = train_data.drop([label] + columns_to_drop, axis=1)
    Y_train = np.log(train_data[label]) if islog else train_data[label]
    X_test = test_data.drop(columns_to_drop, axis=1)
    return X_train, Y_train, X_test

def prepareDataSelect(columns_to_select, label, train_data, test_data, islog):
    X_train = train_data[columns_to_select]
    Y_train = np.log(train_data[label]) if islog else train_data[label]
    X_test = test_data[columns_to_select]
    return X_train, Y_train, X_test

def preprocess(X_train, X_test):
    columns_num = X_train.select_dtypes(include='number').columns
    columns_cat = X_train.select_dtypes(include='O').columns

    X_train[columns_num] = X_train[columns_num].fillna(X_train[columns_num].median())
    X_train[columns_cat] = X_train[columns_cat].fillna(X_train[columns_cat].mode().loc[0])

    X_test[columns_num] = X_test[columns_num].fillna(X_train[columns_num].median())
    X_test[columns_cat] = X_test[columns_cat].fillna(X_train[columns_cat].mode().loc[0])

    encoder_onehot = OneHotEncoder(dtype=int, sparse_output=False, handle_unknown='ignore')

    train_data_onehot = encoder_onehot.fit_transform(X_train[columns_cat])
    train_feature_name_onehot = encoder_onehot.get_feature_names_out()
    train_data_onehot = pd.DataFrame(train_data_onehot, columns=train_feature_name_onehot)

    test_data_onehot = encoder_onehot.transform(X_test[columns_cat])
    test_data_onehot = pd.DataFrame(test_data_onehot, columns=train_feature_name_onehot)

    X_train = pd.concat([X_train[columns_num], train_data_onehot], axis=1)
    X_test = pd.concat([X_test[columns_num], test_data_onehot], axis=1)

    return X_train, X_test

def tuneParameters(models, tuning_parameters, X_train, Y_train, scoring, n_core):
    model_names = [model.__class__.__name__ for model in models]
    model_scores = []
    model_params = []

    cv = ShuffleSplit(random_state=0)

    for model, params in zip(models, tuning_parameters):
        model = clone(model)
        tune_model = GridSearchCV(model, param_grid=params, scoring=scoring, cv=cv, n_jobs=n_core)
        tune_model.fit(X_train, Y_train)
        model_scores.append(-tune_model.best_score_)
        model_params.append(tune_model.best_params_)
    
    tuning_results = pd.DataFrame(columns=['name', 'best_score', 'best_params'])
    tuning_results['name'] = model_names
    tuning_results['best_score'] = model_scores
    tuning_results['best_params'] = model_params

    return tuning_results

def predict(model, X_train, Y_train, X_test, islog, id, label, file_name = 'result.csv'):
    model = clone(model)
    model.fit(X_train, Y_train)
    predictions = model.predict(X_test)

    result = pd.DataFrame({
        id.name: id,
        label: np.exp(predictions) if islog else predictions
    })
    result.to_csv('./Result/' + file_name, index=False)

# Hyperparameters Tuning

In [7]:
columns_to_drop = ['GarageArea', 'GarageYrBlt', 'GarageFinish', 
                   'GarageQual', 'GarageCond', 'BsmtCond', 
                   'BsmtFinType1', 'BsmtFinType2', 'Utilities',
                   'Street', 'PoolQC', 'PoolArea',
                   'YearBuilt', 'YearRemodAdd', 'FullBath', 
                   'BsmtFullBath', 'HalfBath', 'TotalBsmtSF',
                   '1stFlrSF', 'GrLivArea']

X_train, Y_train, X_test = prepareDataDrop(columns_to_drop, label, train_data_new, test_data_new, True)
X_train, X_test = preprocess(X_train, X_test)

## Random Forest Regressor

In [8]:
tuning_parameters = {
    'n_estimators': [50, 100, 300, 500, 1000],
    'min_samples_split': [2, 4, 6, 8, 10],
    'max_features': ['sqrt', 'log2', 1]
}

model = RandomForestRegressor(random_state=0, oob_score=True)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,RandomForestRegressor,0.124681,"{'max_features': 'sqrt', 'min_samples_split': 2, 'n_estimators': 1000}"


## Lasso

In [9]:
tuning_parameters = {
    'alpha': [0.1, 0.5, 1, 2, 5],
}

model = Lasso(max_iter=100000, random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,Lasso,0.163342,{'alpha': 0.1}


## Ridge

In [10]:
tuning_parameters = {
    'alpha': [0.1, 0.5, 1, 2, 5],
}

model = Ridge(max_iter=100000, random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,Ridge,0.109439,{'alpha': 5}


## Decision Tree

In [11]:
tuning_parameters = {
    'splitter': ['best', 'random'],
    'max_depth': [2, 4, 6, 8, 10],
    'min_samples_split': [2, 4, 6, 8, 10],
    'max_features': ['sqrt', 'log2', None]
}

model = DecisionTreeRegressor(random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

  _data = np.array(data, dtype=dtype, copy=copy,


Unnamed: 0,name,best_score,best_params
0,DecisionTreeRegressor,0.175553,"{'max_depth': 8, 'max_features': None, 'min_samples_split': 10, 'splitter': 'best'}"


## Gradient Boosting Regressor

In [12]:
tuning_parameters = {
    'n_estimators': [50, 100, 300, 500, 1000],
}

model = GradientBoostingRegressor(random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,GradientBoostingRegressor,0.115643,{'n_estimators': 500}


In [13]:
tuning_parameters = {
    'n_estimators': [500],
    'max_depth': [2, 4, 6, 8, 10],
    'min_samples_split': [2, 4, 6, 8, 10],
}

model = GradientBoostingRegressor(random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,GradientBoostingRegressor,0.115086,"{'max_depth': 2, 'min_samples_split': 4, 'n_estimators': 500}"


In [14]:
tuning_parameters = {
    'n_estimators': [500],
    'max_depth': [2],
    'min_samples_split': [4],
    'max_features': ['sqrt', 'log2', None]
}

model = GradientBoostingRegressor(random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,GradientBoostingRegressor,0.114108,"{'max_depth': 2, 'max_features': 'sqrt', 'min_samples_split': 4, 'n_estimators': 500}"


In [15]:
tuning_parameters = {
    'n_estimators': [500],
    'max_depth': [2],
    'min_samples_split': [4],
    'max_features': ['sqrt'],
    'learning_rate': [0.01, 0.025, 0.5, 0.75, 0.1],
}

model = GradientBoostingRegressor(random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,GradientBoostingRegressor,0.114108,"{'learning_rate': 0.1, 'max_depth': 2, 'max_features': 'sqrt', 'min_samples_split': 4, 'n_estimators': 500}"


## XGBoost Regressor

In [16]:
tuning_parameters = {
    'n_estimators': [50, 100, 300, 500, 1000],
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.130965,{'n_estimators': 50}


In [17]:
tuning_parameters = {
    'n_estimators': [50],
    'max_depth': [2, 4, 6, 8, 10],
    'min_child_weight': [2, 4, 6]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.121992,"{'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 50}"


In [18]:
tuning_parameters = {
    'n_estimators': [50],
    'max_depth': [4],
    'min_child_weight': [2],
    'gamma': [0, 0.01, 0.1,]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.121459,"{'gamma': 0.01, 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 50}"


In [19]:
tuning_parameters = {
    'n_estimators': [50],
    'max_depth': [4],
    'min_child_weight': [2],
    'gamma': [0.01],
    'subsample':[i/10.0 for i in range(6,10)],
    'colsample_bytree':[i/10.0 for i in range(6,10)]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.121104,"{'colsample_bytree': 0.7, 'gamma': 0.01, 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 50, 'subsample': 0.8}"


In [20]:
tuning_parameters = {
    'n_estimators': [50],
    'max_depth': [4],
    'min_child_weight': [2],
    'gamma': [0.01],
    'subsample':[0.8],
    'colsample_bytree':[0.7],
    'reg_alpha': [1e-5, 1e-2, 0.1, 1, 100]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.120547,"{'colsample_bytree': 0.7, 'gamma': 0.01, 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 50, 'reg_alpha': 0.1, 'subsample': 0.8}"


In [21]:
tuning_parameters = {
    'n_estimators': [50],
    'max_depth': [4],
    'min_child_weight': [2],
    'gamma': [0.01],
    'subsample':[0.8],
    'colsample_bytree':[0.7],
    'reg_alpha': [0.1],
    'reg_lambda': [1e-5, 1e-2, 0.1, 1, 100]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.119214,"{'colsample_bytree': 0.7, 'gamma': 0.01, 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 50, 'reg_alpha': 0.1, 'reg_lambda': 0.1, 'subsample': 0.8}"


In [22]:
tuning_parameters = {
    'n_estimators': [50],
    'max_depth': [4],
    'min_child_weight': [2],
    'gamma': [0.01],
    'subsample':[0.8],
    'colsample_bytree':[0.7],
    'reg_alpha': [0.1],
    'reg_lambda': [0.1],
    'eta': [0.01, 0.05, 0.1, 0.2, 0.3]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.11738,"{'colsample_bytree': 0.7, 'eta': 0.2, 'gamma': 0.01, 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 50, 'reg_alpha': 0.1, 'reg_lambda': 0.1, 'subsample': 0.8}"


## Catboost Regressor

In [23]:
tuning_parameters = {
    'iterations': [100, 300, 500, 1000],
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.107283,{'iterations': 1000}


In [24]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [2, 4, 6, 8, 10]
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.107283,"{'depth': 6, 'iterations': 1000}"


In [25]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [6],
    'l2_leaf_reg': [1e-5, 1e-2, 0.1, 1, 100]
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.106976,"{'depth': 6, 'iterations': 1000, 'l2_leaf_reg': 1e-05}"


In [26]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [6],
    'l2_leaf_reg': [1e-5],
    'random_strength': [0.1, 0.2, 0.5, 0.8, 1]
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.106976,"{'depth': 6, 'iterations': 1000, 'l2_leaf_reg': 1e-05, 'random_strength': 1}"


In [None]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [6],
    'l2_leaf_reg': [1e-5],
    'bagging_temperature': [0, 0.01, 1, 5, 10, 100]
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

In [28]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [6],
    'l2_leaf_reg': [1e-5],
    'bagging_temperature': [0, 0.01, 0.05]
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.106976,"{'bagging_temperature': 0, 'depth': 6, 'iterations': 1000, 'l2_leaf_reg': 1e-05}"


In [30]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [6],
    'l2_leaf_reg': [1e-5],
    'bagging_temperature': [0],
    'grow_policy': ['SymmetricTree', 'Depthwise', 'Lossguide']
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.106829,"{'bagging_temperature': 0, 'depth': 6, 'grow_policy': 'Depthwise', 'iterations': 1000, 'l2_leaf_reg': 1e-05}"


In [31]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [6],
    'l2_leaf_reg': [1e-5],
    'bagging_temperature': [0],
    'grow_policy': ['Depthwise'],
    'min_data_in_leaf': [2, 4, 6, 8, 10]
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.106127,"{'bagging_temperature': 0, 'depth': 6, 'grow_policy': 'Depthwise', 'iterations': 1000, 'l2_leaf_reg': 1e-05, 'min_data_in_leaf': 10}"


## Voting

In [32]:
models = [
    ('Lasso', Lasso(alpha=0.1)),
    ('Ridge', Ridge(alpha=5)),
    ('DecisionTree', DecisionTreeRegressor(max_depth=8, min_samples_split=10)),
    ('RandomForest', RandomForestRegressor(n_estimators=1000, min_samples_split=2, max_features='sqrt', random_state=0, oob_score=True)),
    ('GradientBoost', GradientBoostingRegressor(learning_rate=0.1, max_depth=2, max_features='sqrt', min_samples_split=4, n_estimators=500)),
    ('XGB', XGBRegressor(eta=0.2, n_estimators=50, max_depth=4, min_child_weight=2, gamma=0.01, subsample=0.8, colsample_bytree=0.7, reg_alpha=0.1, reg_lambda=0.1)),
    ('CatBoost', CatBoostRegressor(iterations=1000, depth=6, l2_leaf_reg=1e-5, bagging_temperature=0, grow_policy='Depthwise', min_data_in_leaf=10, random_state=0, verbose=0, allow_writing_files=False))
]

model = VotingRegressor(models)
predict(model, X_train, Y_train, X_test, True, id, label)

# Improve by combining data

In [5]:
all_data = pd.concat([train_data_new.drop('SalePrice', axis=1), test_data_new]).reset_index(drop=True)
print(all_data.shape)

columns_to_log = ['LotFrontage', 'LotArea', 'MasVnrArea', 
                  'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 
                  'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 
                  'LowQualFinSF', 'GrLivArea', 'WoodDeckSF', 
                  'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 
                  'ScreenPorch', 'PoolArea', 'MiscVal']

skews = all_data.select_dtypes(include='number').skew().sort_values(ascending=False)
columns_to_skew = skews[columns_to_log][skews > 0.75].index

all_data[columns_to_skew] = np.log1p(all_data[columns_to_skew])

columns_to_drop = ['GarageArea', 'GarageYrBlt', 'GarageFinish', 
                   'GarageQual', 'GarageCond', 'BsmtCond', 
                   'BsmtFinType1', 'BsmtFinType2', 'Utilities',
                   'Street', 'PoolQC', 'PoolArea',
                   'YearBuilt', 'YearRemodAdd', 'FullBath', 
                   'BsmtFullBath', 'HalfBath', 'TotalBsmtSF',
                   '1stFlrSF', 'GrLivArea']

all_data = all_data.drop(columns_to_drop, axis=1)

columns_num = all_data.select_dtypes(include='number').columns
columns_cat = all_data.select_dtypes(include='O').columns

all_data[columns_num] = all_data[columns_num].fillna(all_data[columns_num].median())
all_data[columns_cat] = all_data[columns_cat].fillna(all_data[columns_cat].mode().loc[0])

encoder_onehot = OneHotEncoder(dtype=int, sparse_output=False, handle_unknown='ignore')

all_data_onehot = encoder_onehot.fit_transform(all_data[columns_cat])
all_feature_name_onehot = encoder_onehot.get_feature_names_out()
all_data_onehot = pd.DataFrame(all_data_onehot, columns=all_feature_name_onehot)

all_data = pd.concat([all_data[columns_num], all_data_onehot], axis=1)

X_train = all_data.iloc[:len(train_data_new)]
X_test = all_data.iloc[len(train_data_new):]
Y_train = np.log(train_data_new[label])

print(X_train.shape, X_test.shape, Y_train.shape)

(2916, 93)
(1457, 238) (1459, 238) (1457,)


In [40]:
tuning_parameters = {
    'alpha': [0.1, 0.5, 1, 2, 5],
}

model = Lasso(max_iter=100000, random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,Lasso,0.163342,{'alpha': 0.1}


In [41]:
tuning_parameters = {
    'alpha': [0.1, 0.5, 1, 2, 5],
}

model = Ridge(max_iter=100000, random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,Ridge,0.109374,{'alpha': 5}


In [42]:
tuning_parameters = {
    'splitter': ['best', 'random'],
    'max_depth': [2, 4, 6, 8, 10],
    'min_samples_split': [2, 4, 6, 8, 10],
    'max_features': ['sqrt', 'log2', None]
}

model = DecisionTreeRegressor(random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

  _data = np.array(data, dtype=dtype, copy=copy,


Unnamed: 0,name,best_score,best_params
0,DecisionTreeRegressor,0.173739,"{'max_depth': 8, 'max_features': None, 'min_samples_split': 10, 'splitter': 'best'}"


In [43]:
tuning_parameters = {
    'n_estimators': [50, 100, 300, 500, 1000],
    'min_samples_split': [2, 4, 6, 8, 10],
    'max_features': ['sqrt', 'log2', 1]
}

model = RandomForestRegressor(random_state=0, oob_score=True)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,RandomForestRegressor,0.124332,"{'max_features': 'sqrt', 'min_samples_split': 2, 'n_estimators': 1000}"


In [44]:
tuning_parameters = {
    'n_estimators': [50, 100, 300, 500, 1000],
}

model = GradientBoostingRegressor(random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,GradientBoostingRegressor,0.114983,{'n_estimators': 1000}


In [45]:
tuning_parameters = {
    'n_estimators': [1000],
    'max_depth': [2, 4, 6, 8, 10],
    'min_samples_split': [2, 4, 6, 8, 10],
}

model = GradientBoostingRegressor(random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,GradientBoostingRegressor,0.114965,"{'max_depth': 2, 'min_samples_split': 6, 'n_estimators': 1000}"


In [46]:
tuning_parameters = {
    'n_estimators': [1000],
    'max_depth': [2],
    'min_samples_split': [6],
    'max_features': ['sqrt', 'log2', None]
}

model = GradientBoostingRegressor(random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,GradientBoostingRegressor,0.114787,"{'max_depth': 2, 'max_features': 'sqrt', 'min_samples_split': 6, 'n_estimators': 1000}"


In [47]:
tuning_parameters = {
    'n_estimators': [1000],
    'max_depth': [2],
    'min_samples_split': [6],
    'max_features': ['sqrt'],
    'learning_rate': [0.01, 0.025, 0.5, 0.75, 0.1],
}

model = GradientBoostingRegressor(random_state=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,GradientBoostingRegressor,0.11333,"{'learning_rate': 0.025, 'max_depth': 2, 'max_features': 'sqrt', 'min_samples_split': 6, 'n_estimators': 1000}"


In [48]:
tuning_parameters = {
    'n_estimators': [50, 100, 300, 500, 1000],
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.131283,{'n_estimators': 100}


In [49]:
tuning_parameters = {
    'n_estimators': [100],
    'max_depth': [2, 4, 6, 8, 10],
    'min_child_weight': [2, 4, 6]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.122583,"{'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 100}"


In [51]:
tuning_parameters = {
    'n_estimators': [100],
    'max_depth': [4],
    'min_child_weight': [2],
    'gamma': [0, 0.01, 0.1,]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.122583,"{'gamma': 0, 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 100}"


In [52]:
tuning_parameters = {
    'n_estimators': [100],
    'max_depth': [4],
    'min_child_weight': [2],
    'gamma': [0],
    'subsample':[i/10.0 for i in range(6,10)],
    'colsample_bytree':[i/10.0 for i in range(6,10)]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.121214,"{'colsample_bytree': 0.6, 'gamma': 0, 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 100, 'subsample': 0.8}"


In [53]:
tuning_parameters = {
    'n_estimators': [100],
    'max_depth': [4],
    'min_child_weight': [2],
    'gamma': [0],
    'subsample':[0.8],
    'colsample_bytree':[0.6],
    'reg_alpha': [1e-5, 1e-2, 0.1, 1, 100]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.121214,"{'colsample_bytree': 0.6, 'gamma': 0, 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 100, 'reg_alpha': 1e-05, 'subsample': 0.8}"


In [54]:
tuning_parameters = {
    'n_estimators': [100],
    'max_depth': [4],
    'min_child_weight': [2],
    'gamma': [0],
    'subsample':[0.8],
    'colsample_bytree':[0.6],
    'reg_alpha': [1e-5],
    'reg_lambda': [1e-5, 1e-2, 0.1, 1, 100]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.12043,"{'colsample_bytree': 0.6, 'gamma': 0, 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 100, 'subsample': 0.8}"


In [55]:
tuning_parameters = {
    'n_estimators': [100],
    'max_depth': [4],
    'min_child_weight': [2],
    'gamma': [0],
    'subsample':[0.8],
    'colsample_bytree':[0.6],
    'reg_alpha': [1e-5],
    'reg_lambda': [100],
    'eta': [0.01, 0.05, 0.1, 0.2, 0.3]
}

model = XGBRegressor(seed=0)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,XGBRegressor,0.119494,"{'colsample_bytree': 0.6, 'eta': 0.2, 'gamma': 0, 'max_depth': 4, 'min_child_weight': 2, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 100, 'subsample': 0.8}"


In [8]:
tuning_parameters = {
    'iterations': [100, 300, 500, 1000],
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.107165,{'iterations': 1000}


In [8]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [2, 4, 6, 8, 10]
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.107165,"{'depth': 6, 'iterations': 1000}"


In [9]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [6],
    'l2_leaf_reg': [1e-5, 1e-2, 0.1, 1, 100]
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.107114,"{'depth': 6, 'iterations': 1000, 'l2_leaf_reg': 1}"


In [10]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [6],
    'l2_leaf_reg': [1],
    'random_strength': [0.1, 0.2, 0.5, 0.8, 1]
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.107114,"{'depth': 6, 'iterations': 1000, 'l2_leaf_reg': 1, 'random_strength': 1}"


In [12]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [6],
    'l2_leaf_reg': [1],
    'random_strength': [1],
    'bagging_temperature': [0, 0.01, 1, 5, 10, 100]
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.107114,"{'bagging_temperature': 0, 'depth': 6, 'iterations': 1000, 'l2_leaf_reg': 1, 'random_strength': 1}"


In [13]:
tuning_parameters = {
    'iterations': [1000],
    'depth': [6],
    'l2_leaf_reg': [1],
    'random_strength': [1],
    'bagging_temperature': [0],
    'grow_policy': ['SymmetricTree', 'Depthwise', 'Lossguide']
}

model = CatBoostRegressor(random_state=0, verbose=0, allow_writing_files=False)

tuneParameters([model], [tuning_parameters], X_train, Y_train, 'neg_root_mean_squared_error', n_core)

Unnamed: 0,name,best_score,best_params
0,CatBoostRegressor,0.107114,"{'bagging_temperature': 0, 'depth': 6, 'grow_policy': 'SymmetricTree', 'iterations': 1000, 'l2_leaf_reg': 1, 'random_strength': 1}"


In [14]:
models = [
    ('Lasso', Lasso(alpha=0.1)),
    ('Ridge', Ridge(alpha=5)),
    ('DecisionTree', DecisionTreeRegressor(max_depth=8, min_samples_split=10)),
    ('RandomForest', RandomForestRegressor(n_estimators=1000, min_samples_split=2, max_features='sqrt', random_state=0, oob_score=True)),
    ('GradientBoost', GradientBoostingRegressor(learning_rate=0.1, max_depth=2, max_features='sqrt', min_samples_split=4, n_estimators=500)),
    ('XGB', XGBRegressor(eta=0.2, n_estimators=100, max_depth=4, min_child_weight=2, gamma=0, subsample=0.8, colsample_bytree=0.6, reg_alpha=1e-05, reg_lambda=100)),
    ('CatBoost', CatBoostRegressor(iterations=1000, depth=6, l2_leaf_reg=1, random_strength=1, bagging_temperature=0, grow_policy='SymmetricTree', random_state=0, verbose=0, allow_writing_files=False))
]

model = VotingRegressor(models)
predict(model, X_train, Y_train, X_test, True, id, label)