In [1]:
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", 300)
pd.set_option("display.max_rows", 100)
pd.set_option("max_colwidth", 200)

from sklearn import preprocessing
from sklearn.ensemble import StackingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from catboost import CatBoostRegressor

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.model_selection import RepeatedKFold

import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
ordinal = pd.read_csv('BaseOrdinal.csv')
ordinal.shape

(2580, 83)

In [3]:
ordinal = ordinal.drop(['Unnamed: 0', 'PID', 'Unnamed: 0.1'], axis=1)
ordinal.shape

(2580, 80)

In [4]:
X = ordinal.drop(['SalePrice'], axis=1)
y = ordinal.SalePrice

In [5]:
gb = GradientBoostingRegressor(random_state=1)
cb = CatBoostRegressor(random_state=1, verbose=False)

### n_jobs @ 2, 1, None makes no impact on stacking_regressor.score

In [6]:
stacking_regressor1 = StackingRegressor(estimators=[('CB', cb)], final_estimator=gb, n_jobs=2,
                                       cv=RepeatedKFold(n_splits=5, n_repeats=1, random_state=1),
                                       passthrough=True)

In [7]:
stacking_regressor1.fit(X,y)

In [8]:
stacking_regressor1.score(X,y)

0.9839039909709154

In [9]:
stacking_regressor2 = StackingRegressor(estimators=[('CB', cb)], final_estimator=gb, n_jobs=1,
                                       cv=RepeatedKFold(n_splits=5, n_repeats=1, random_state=1),
                                       passthrough=True)

In [10]:
stacking_regressor2.fit(X,y)

In [11]:
stacking_regressor2.score(X,y)

0.9839039909709154

In [12]:
stacking_regressor3 = StackingRegressor(estimators=[('CB', cb)], final_estimator=gb,
                                       cv=RepeatedKFold(n_splits=5, n_repeats=1, random_state=1),
                                       passthrough=True)

In [13]:
stacking_regressor3.fit(X,y)

In [14]:
stacking_regressor3.score(X,y)

0.9839039909709154

### cv=5, cv=5, or None has no impact on stacking_regressor.score

In [16]:
stacking_regressor4 = StackingRegressor(estimators=[('CB', cb)], final_estimator=gb,
                                        cv=5,
                                        passthrough=True)

In [17]:
stacking_regressor4.fit(X,y)

In [18]:
stacking_regressor4.score(X,y)

0.9833014343465981

In [19]:
stacking_regressor5 = StackingRegressor(estimators=[('CB', cb)], final_estimator=gb,
                                        cv=5,
                                        passthrough=True)

In [20]:
stacking_regressor5.fit(X,y)

In [21]:
stacking_regressor5.score(X,y)

0.9833014343465981

In [22]:
stacking_regressor6 = StackingRegressor(estimators=[('CB', cb)], final_estimator=gb, passthrough=True)

In [23]:
stacking_regressor6.fit(X,y)

In [24]:
stacking_regressor6.score(X,y)

0.9833014343465981

### scores1 and scores2 produce same results before using same cv + random_state = 1

In [29]:
scores1 = cross_validate(stacking_regressor1, X, y, cv=RepeatedKFold(n_splits=5, n_repeats=1, random_state=1), return_estimator=True)

In [30]:
scores1

{'fit_time': array([7.88962412, 7.80040121, 7.83728385, 7.79691792, 7.83893704]),
 'score_time': array([0.00430584, 0.00422716, 0.00580502, 0.00430298, 0.00417995]),
 'estimator': [StackingRegressor(cv=RepeatedKFold(n_repeats=1, n_splits=5, random_state=1),
                    estimators=[('CB',
                                 <catboost.core.CatBoostRegressor object at 0x7fb8c02525b0>)],
                    final_estimator=GradientBoostingRegressor(random_state=1),
                    n_jobs=2, passthrough=True),
  StackingRegressor(cv=RepeatedKFold(n_repeats=1, n_splits=5, random_state=1),
                    estimators=[('CB',
                                 <catboost.core.CatBoostRegressor object at 0x7fb8c02527c0>)],
                    final_estimator=GradientBoostingRegressor(random_state=1),
                    n_jobs=2, passthrough=True),
  StackingRegressor(cv=RepeatedKFold(n_repeats=1, n_splits=5, random_state=1),
                    estimators=[('CB',
                     

In [31]:
scores2 = cross_validate(stacking_regressor2, X, y, cv=RepeatedKFold(n_splits=5, n_repeats=1, random_state=1), return_estimator=True)

In [32]:
scores2

{'fit_time': array([10.42786717, 10.46387696, 10.48905587, 10.94856071, 10.76889205]),
 'score_time': array([0.00403214, 0.00433707, 0.00434828, 0.00423217, 0.00425816]),
 'estimator': [StackingRegressor(cv=RepeatedKFold(n_repeats=1, n_splits=5, random_state=1),
                    estimators=[('CB',
                                 <catboost.core.CatBoostRegressor object at 0x7fb8e2f1ae50>)],
                    final_estimator=GradientBoostingRegressor(random_state=1),
                    n_jobs=1, passthrough=True),
  StackingRegressor(cv=RepeatedKFold(n_repeats=1, n_splits=5, random_state=1),
                    estimators=[('CB',
                                 <catboost.core.CatBoostRegressor object at 0x7fb8e30e1a60>)],
                    final_estimator=GradientBoostingRegressor(random_state=1),
                    n_jobs=1, passthrough=True),
  StackingRegressor(cv=RepeatedKFold(n_repeats=1, n_splits=5, random_state=1),
                    estimators=[('CB',
                

In [None]:
# Use cross-validation to evaluate model performance

def evaluate_model(model, X, y):
    
    # define the evaluation procedure
    cv = RepeatedKFold(n_splits=5, n_repeats=3, random_state=1)
    
    # evaluate the model and collect the results
    scores = cross_validate(model, X, y, scoring=scoring, 
                                  cv=cv, n_jobs=-1) 
    
    df = pd.DataFrame(scores)
    
    return df