In [1]:
#Packages
import pandas as pd
import numpy as np
import itertools
import catboost as cat

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_auc_score, plot_confusion_matrix, f1_score, accuracy_score, matthews_corrcoef
from catboost import cv, Pool
from sklearn.model_selection import GridSearchCV, StratifiedKFold

In [2]:
#Function for performing cross-validation with CatBoost model.
def catboost_tune_class(param_comb, X, y, num_iterations, colsample, class_imbal, learning_rate, boosting_type, score_function, categorical_feat, cv_metric):

    param_comb = np.hstack((param_comb, np.zeros((param_comb.shape[0], 2))))
    
    #Creating dataset
    ind_cat = X.dtypes == object
    cat_feat = X.columns[ind_cat].tolist()
    cat_dat = Pool(X, y, cat_features=cat_feat)

    #Checking if class imbalance
    if(class_imbal == True): 
        weight_pos = sum(y == 0)/sum(y == 1)
    else: 
        weight_pos = 1

    for i in range(param_comb.shape[0]):
  
        cats_params = {
            'iterations' : num_iterations,
            'learning_rate' : learning_rate,
            'depth' : int(param_comb[i, 0]),
            'rsm' : colsample,
            'loss_function' : 'Logloss',
            'leaf_estimation_iterations' : 1, 
            'leaf_estimation_method' : 'Newton', 
            'random_seed' : 63,
            'verbose' : 10,
            'metric_period' : 1,
            'bagging_temperature' : 0.5,
            'boosting_type' : boosting_type,
            'simple_ctr' : 'BinarizedTargetMeanValue',
            'bootstrap_type' : 'Bayesian',
            'sampling_frequency' : 'PerTree',
            'scale_pos_weight' : weight_pos,
            'grow_policy' : 'SymmetricTree',
            'score_function' : score_function,
            'custom_loss' : cv_metric, 
            'thread_count' : 1,
            }
        
        cat_cv = cat.cv(params = cats_params,
                        dtrain = cat_dat,
                        nfold = 5,
                        stratified = True,
                        partition_random_seed = 543,
                        early_stopping_rounds = 200,
                        verbose = False)
        
        if(cv_metric == 'AUC'):
            num_tree = np.argmax(cat_cv.loc[:,'test-AUC-mean']) + 1 #number of trees
            opt_val = max(cat_cv.loc[:,'test-AUC-mean'])
        else:
            num_tree = np.argmin(cat_cv.loc[:,'test-Logloss-mean']) + 1 #number of trees
            opt_val = min(cat_cv.loc[:,'test-Logloss-mean'])

        param_comb[i,1:3] = num_tree, opt_val
        
        print(i)
    
    return(param_comb)

In [3]:
#Function for fitting CatBoost model.
def catboost_fit_class(param_opt, X, y, colsample, class_imbal, learning_rate, boosting_type, score_function, categorical_feat):

    #Checking if class imbalance
    if(class_imbal == True): 
        weight_pos = sum(y==0)/sum(y==1)
    else: 
        weight_pos = 1
    
    clf_cat = cat.CatBoostClassifier(iterations = int(param_opt[1]),
                                     learning_rate = learning_rate,
                                     depth = int(param_opt[0]),
                                     rsm = colsample,
                                     loss_function = 'Logloss',
                                     leaf_estimation_iterations = 1, 
                                     leaf_estimation_method = 'Newton', 
                                     random_seed = 63,
                                     verbose = False,
                                     metric_period = 1,
                                     bagging_temperature = 1,
                                     boosting_type = boosting_type,
                                     simple_ctr = 'BinarizedTargetMeanValue',
                                     bootstrap_type = 'Bayesian',
                                     sampling_frequency = 'PerTree',
                                     scale_pos_weight = weight_pos,
                                     grow_policy = 'SymmetricTree',
                                     score_function = score_function)

    ind_cat = X.dtypes == object
    cat_feat = X.columns[ind_cat].tolist()
    clf_cat.fit(X, y, cat_features=cat_feat)
    
    return(clf_cat)

In [4]:
#Calculating errors of models.
def errors_model(mod, X_test, y_test):
    
    pred_class = mod.predict(X_test) #Class predictions
    acc = accuracy_score(y_test, pred_class) #Accuracy
    f1 = f1_score(y_test, pred_class) #F1 score 
    matt = matthews_corrcoef(y_test, pred_class) #Matthews Correlation Coefficient
    pred_prob = mod.predict_proba(X_test)[:,1] #Probability predictions
    auc = roc_auc_score(y_test, pred_prob) #AUC
    errs_mod = [acc, f1, matt, auc]
    return(errs_mod)

In [5]:
#Tree depth values considered.
params_depth = np.array(range(1, 11)).reshape(-1,1)

In [6]:
#No scientific notation.
np.set_printoptions(suppress=True)

# AdaBoost Overlap

In [7]:
#loading dataset and setting up data. 
import pickle
file_name = 'Overlap_data.pickle'
file = open(file_name,'rb')
Overlap_data = pickle.load(file)
file.close()

X_train_over = Overlap_data['X_train']
y_train_over = Overlap_data['y_train']
X_test_over = Overlap_data['X_test']
y_test_over = Overlap_data['y_test']

In [None]:
#Ordered vs Plain mode of boosting.

#Tuning parameter values for boosting mode = Plain.
##########################################################################################################################
cv_plain_over = catboost_tune_class(param_comb = params_depth, 
                                    X = pd.DataFrame(X_train_over), 
                                    y = y_train_over, 
                                    num_iterations = 10000, 
                                    colsample = 1, 
                                    class_imbal = False, 
                                    learning_rate = 0.05, 
                                    boosting_type = 'Plain',
                                    categorical_feat = False,
                                    cv_metric = 'Logloss',
                                    score_function = 'Cosine')
##########################################################################################################################

#Fitting optimal model for boosting mode = Plain and calculating errors of model.
##########################################################################################################################
opt_ind_plain = np.argmin(cv_plain_over[:,2])

catmod_plain_over = catboost_fit_class(param_opt = cv_plain_over[opt_ind_plain,0:5], 
                                     X = pd.DataFrame(X_train_over), 
                                     y = y_train_over,  
                                     colsample = 1, 
                                     class_imbal = False, 
                                     learning_rate = 0.05, 
                                     boosting_type = 'Plain', 
                                     categorical_feat = False,
                                     score_function = 'Cosine')

errs_plain_over = errors_model(mod = catmod_plain_over, 
                             X_test = X_test_over, 
                             y_test = y_test_over)
##########################################################################################################################


#Tuning parameter values for boosting mode = Ordered.
##########################################################################################################################
cv_ordered_over = catboost_tune_class(param_comb = cv_plain_over[opt_ind_plain,0].reshape(-1,1), 
                                      X = pd.DataFrame(X_train_over), 
                                      y = y_train_over, 
                                      num_iterations = 10000, 
                                      colsample = 1, 
                                      class_imbal = False, 
                                      learning_rate = 0.05, 
                                      boosting_type = 'Ordered', 
                                      categorical_feat = False,
                                      cv_metric = 'Logloss',
                                      score_function = 'Cosine')
##########################################################################################################################


#Fitting optimal model for boosting mode = Ordered and calculating errors.
##########################################################################################################################
opt_ind_ordered = np.argmin(cv_ordered_over[:,2])

catmod_ordered_over = catboost_fit_class(param_opt = cv_ordered_over[opt_ind_ordered,0:5], 
                                         X = pd.DataFrame(X_train_over), 
                                         y = y_train_over, 
                                         colsample = 1, 
                                         class_imbal = False, 
                                         learning_rate = 0.05, 
                                         boosting_type = 'Ordered', 
                                         categorical_feat = False,
                                         score_function = 'Cosine')

#Calculating errors of model.
errs_ordered_over = errors_model(mod = catmod_ordered_over, 
                                 X_test = X_test_over, 
                                 y_test = y_test_over)
##########################################################################################################################

In [None]:
#Test errors of the Plain and Ordered modes.
test_err_plain = np.append(cv_plain_over[opt_ind_plain,[0,1]],errs_plain_over)
test_err_ordered = np.append(cv_ordered_over[opt_ind_ordered,[0,1]],errs_ordered_over)
test_err_over = np.row_stack((test_err_plain,test_err_ordered))
np.round(test_err_over,3)

In [None]:
#Fitting model with squared error loss.

#Tuning number_of_trees values.
##########################################################################################################################
cv_l2_over = catboost_tune_class(param_comb = params_depth, 
                                    X = pd.DataFrame(X_train_over), 
                                    y = y_train_over, 
                                    num_iterations = 10000, 
                                    colsample = 1, 
                                    class_imbal = False, 
                                    learning_rate = 0.05, 
                                    boosting_type = 'Ordered',
                                    categorical_feat = False,
                                    cv_metric = 'Logloss',
                                    score_function = 'L2')
##########################################################################################################################

#Fitting optimal model anc calculating errors.
##########################################################################################################################
opt_ind_l2 = np.argmin(cv_l2_over[:,2])

catmod_l2_over = catboost_fit_class(param_opt = cv_l2_over[opt_ind_l2,0:5], 
                                    X = pd.DataFrame(X_train_over), 
                                    y = y_train_over,  
                                    colsample = 1, 
                                    class_imbal = False, 
                                    learning_rate = 0.05, 
                                    boosting_type = 'Plain', 
                                    categorical_feat = False,
                                    score_function = 'L2')

errs_l2_over = errors_model(mod = catmod_l2_over, 
                            X_test = X_test_over, 
                            y_test = y_test_over)
##########################################################################################################################

In [None]:
#Test errors for boosting with Cosine and squared error loss.
test_err_plain = np.append(cv_plain_over[opt_ind_plain,[0,1]],errs_plain_over)
test_err_l2 = np.append(cv_l2_over[opt_ind_l2,[0,1]],errs_l2_over)
test_err_over = np.row_stack((test_err_plain,test_err_l2))
np.round(test_err_over,3)

# Phoneme

In [None]:
#Importing and setting up data
file_path_ph = 'C:\\Users\\Matt\\Documents\\Python code thesis\\Datasets\\phoneme.csv'
df_ph = pd.read_csv(file_path_ph)
X_ph = df_ph.drop('target',axis=1).copy()
y_ph = df_ph['target'].copy()
X_train_ph, X_test_ph, y_train_ph, y_test_ph = train_test_split(X_ph, y_ph, random_state=65, stratify=y_ph, test_size=0.2)

In [None]:
#Ordered vs Plain mode of boosting.

#Tuning parameter values for boosting mode = Plain 
##########################################################################################################################
cv_plain_ph = catboost_tune_class(param_comb = params_depth, 
                                  X = X_train_ph, 
                                  y = y_train_ph, 
                                  num_iterations = 10000, 
                                  colsample = 1, 
                                  class_imbal = True, 
                                  learning_rate = 0.05, 
                                  boosting_type = 'Plain', 
                                  categorical_feat = False,
                                  cv_metric = 'AUC',
                                  score_function = 'Cosine')
##########################################################################################################################

#Fitting optimal model for boosting mode = Plain and calculating errors of model.
##########################################################################################################################
opt_ind_plain = np.argmax(cv_plain_ph[:,2])

catmod_plain_ph = catboost_fit_class(param_opt = cv_plain_ph[opt_ind_plain,0:5], 
                                     X = X_train_ph, 
                                     y = y_train_ph,  
                                     colsample = 1, 
                                     class_imbal = True, 
                                     learning_rate = 0.05, 
                                     boosting_type = 'Plain', 
                                     categorical_feat = False,
                                     score_function = 'Cosine')

errs_plain_ph = errors_model(mod = catmod_plain_ph, 
                             X_test = X_test_ph, 
                             y_test = y_test_ph)
##########################################################################################################################

#Tuning parameter values for boosting mode = Ordered.
##########################################################################################################################
cv_ordered_ph = catboost_tune_class(param_comb =  cv_plain_ph[opt_ind_plain,0].reshape(-1,1), 
                                    X = X_train_ph, 
                                    y = y_train_ph, 
                                    num_iterations = 10000, 
                                    colsample = 1, 
                                    class_imbal = True, 
                                    learning_rate = 0.05, 
                                    boosting_type = 'Ordered', 
                                    categorical_feat = False,
                                    cv_metric = 'AUC',
                                    score_function = 'Cosine')
##########################################################################################################################

#Fitting optimal model for boosting mode = Ordered and calculating errors of model.
##########################################################################################################################
opt_ind_ordered = np.argmax(cv_ordered_ph[:,2])

catmod_ordered_ph = catboost_fit_class(param_opt = cv_ordered_ph[opt_ind_ordered,0:5], 
                                       X = X_train_ph, 
                                       y = y_train_ph, 
                                       colsample = 1, 
                                       class_imbal = True, 
                                       learning_rate = 0.05, 
                                       boosting_type = 'Ordered', 
                                       categorical_feat = False,
                                       score_function = 'Cosine')

errs_ordered_ph = errors_model(mod = catmod_ordered_ph, 
                               X_test = X_test_ph, 
                               y_test = y_test_ph)
##########################################################################################################################

In [None]:
#Test errors of the Plain and Ordered modes.
test_err_plain = np.append(cv_plain_ph[opt_ind_plain,[0,1]],errs_plain_ph)
test_err_ordered = np.append(cv_ordered_ph[opt_ind_ordered,[0,1]],errs_ordered_ph)
test_err_ph = np.row_stack((test_err_plain,test_err_ordered))
np.round(test_err_ph,3)

In [None]:
#Fitting model with squared error loss

#Tuning number_of_trees values
##########################################################################################################################
cv_l2_ph = catboost_tune_class(param_comb = params_depth, 
                               X = pd.DataFrame(X_train_ph), 
                               y = y_train_ph, 
                               num_iterations = 10000, 
                               colsample = 1, 
                               class_imbal = True, 
                               learning_rate = 0.05, 
                               boosting_type = 'Plain',
                               categorical_feat = False,
                               cv_metric = 'AUC',
                               score_function = 'L2')
##########################################################################################################################

#Fitting optimal model and calculating errors 
##########################################################################################################################
opt_ind_l2 = np.argmax(cv_l2_ph[:,2])

catmod_l2_ph = catboost_fit_class(param_opt = cv_l2_ph[opt_ind_l2,0:5], 
                                  X = pd.DataFrame(X_train_ph), 
                                  y = y_train_ph,  
                                  colsample = 1, 
                                  class_imbal = True, 
                                  learning_rate = 0.05, 
                                  boosting_type = 'Plain', 
                                  categorical_feat = False,
                                  score_function = 'L2')

errs_l2_ph = errors_model(mod = catmod_l2_ph, 
                            X_test = X_test_ph, 
                            y_test = y_test_ph)
##########################################################################################################################

In [None]:
#Test errors for boosting with Cosine and squared error loss.
test_err_plain = np.append(cv_plain_ph[opt_ind_plain,[0,1]],errs_plain_ph)
test_err_l2 = np.append(cv_l2_ph[opt_ind_l2,[0,1]],errs_l2_ph)
test_err_ph = np.row_stack((test_err_plain,test_err_l2))
np.round(test_err_ph,3)

# Adult

In [7]:
#Importing and setting up data.
file_path = 'C:\\Users\\Matt\\Documents\\Python code thesis\\Datasets\\adult1.csv'
df_adult1 = pd.read_csv(file_path,header=None)
file_path = 'C:\\Users\\Matt\\Documents\\Python code thesis\\Datasets\\adult2.csv'
df_adult2 = pd.read_csv(file_path,header=None)
df_adult = pd.concat([df_adult1, df_adult2])
df_adult.columns = ["age","workclass","fnlwgt","education","education-num","martial-status","occupation","relationship","race","sex","capital-gain","capital-loss","hours-per-week","native-country","target"]
df_adult.replace(' ','',regex=True,inplace=True)
df_adult['target'] = df_adult['target'].apply(lambda x: x.rstrip('.'))
df_adult['target'] = df_adult['target'].apply(lambda x: 0 if x=="<=50K" else 1)
df_adult = df_adult.drop('education',axis=1)

In [8]:
#Training/Test splitt
X_adult = df_adult.drop('target', axis = 1).copy()
y_adult = df_adult['target'].copy()
X_train_adult, X_test_adult, y_train_adult, y_test_adult = train_test_split(X_adult, y_adult, random_state=65, stratify=y_adult, test_size=0.2)

In [None]:
#Ordered vs Plain mode of boosting.

#Tuning parameters for boosting mode = Plain.
##########################################################################################################################
cv_plain_adult = catboost_tune_class(param_comb = params_depth, 
                                     X = X_train_adult, 
                                     y = y_train_adult,
                                     num_iterations = 10000, 
                                     colsample = 1, 
                                     class_imbal = True, 
                                     learning_rate = 0.05, 
                                     boosting_type = 'Plain', 
                                     categorical_feat = True,
                                     cv_metric = 'AUC',
                                     score_function = 'Cosine')
##########################################################################################################################

#Fitting optimal model for boosting mode = Plain and calculating errors. 
##########################################################################################################################
opt_ind_plain = np.argmax(cv_plain_adult[:,2])

catmod_plain_adult = catboost_fit_class(param_opt = cv_plain_adult[opt_ind_plain,:], 
                                     X = X_train_adult, 
                                     y = y_train_adult, 
                                     colsample = 1, 
                                     class_imbal = True, 
                                     learning_rate = 0.05, 
                                     boosting_type = 'Plain', 
                                     categorical_feat = True,
                                     score_function = 'Cosine')

errs_plain_adult = errors_model(mod = catmod_plain_adult, 
                                X_test = X_test_adult, 
                                y_test = y_test_adult)
##########################################################################################################################

#Tuning parameter values for boosting mode = Ordered.
##########################################################################################################################
cv_ordered_adult = catboost_tune_class(param_comb = cv_plain_adult[opt_ind_plain,0].reshape(-1,1), 
                                       X = X_train_adult, 
                                       y = y_train_adult, 
                                       num_iterations = 10000, 
                                       colsample = 1, 
                                       class_imbal = True, 
                                       learning_rate = 0.05, 
                                       boosting_type = 'Ordered', 
                                       categorical_feat = True,
                                       cv_metric = 'AUC',
                                       score_function = 'Cosine')
##########################################################################################################################

#Fitting optimal model for boosting mode = Ordered and calculating errors.
##########################################################################################################################
opt_ind_ordered = np.argmax(cv_ordered_adult[:,2])

catmod_ordered_adult = catboost_fit_class(param_opt = cv_ordered_adult[opt_ind_ordered,0:5], 
                                       X = X_train_adult, 
                                       y = y_train_adult, 
                                       colsample = 1, 
                                       class_imbal = True, 
                                       learning_rate = 0.05, 
                                       boosting_type = 'Ordered', 
                                       categorical_feat = True,
                                       score_function = 'Cosine')

errs_ordered_adult = errors_model(mod = catmod_ordered_adult, 
                             X_test = X_test_adult, 
                             y_test = y_test_adult)
##########################################################################################################################

In [None]:
#Test errors of the Plain and Ordered modes.
test_err_plain = np.append(cv_plain_adult[opt_ind_plain,[0,1]],errs_plain_adult)
test_err_ordered = np.append(cv_ordered_adult[opt_ind_ordered,[0,1]],errs_ordered_adult)
test_err_adult = np.row_stack((test_err_plain,test_err_ordered))
np.round(test_err_adult,3)

In [None]:
#Fitting model with squared error loss.

#Tuning number_of_trees values.
##########################################################################################################################
cv_l2_adult = catboost_tune_class(param_comb = cv_plain_adult[opt_ind_plain,0].reshape(-1,1), 
                                  X = pd.DataFrame(X_train_adult), 
                                  y = y_train_adult, 
                                  num_iterations = 10000, 
                                  colsample = 1, 
                                  class_imbal = False, 
                                  learning_rate = 0.05, 
                                  boosting_type = 'Plain',
                                  categorical_feat = True,
                                  cv_metric = 'AUC',
                                  score_function = 'L2')
##########################################################################################################################

#Fitting optimal model and calculating errors.
##########################################################################################################################
opt_ind_l2 = np.argmax(cv_l2_adult[:,2])

catmod_l2_adult = catboost_fit_class(param_opt = cv_l2_adult[opt_ind_l2,0:5], 
                                  X = pd.DataFrame(X_train_adult), 
                                  y = y_train_adult,  
                                  colsample = 1, 
                                  class_imbal = False, 
                                  learning_rate = 0.05, 
                                  boosting_type = 'Plain', 
                                  categorical_feat = True,
                                  score_function = 'L2')

errs_l2_adult = errors_model(mod = catmod_l2_adult, 
                            X_test = X_test_adult, 
                            y_test = y_test_adult)
##########################################################################################################################

In [None]:
#Test errors for boosting with Cosine and squared error loss.
test_err_plain = np.append(cv_plain_adult[opt_ind_plain,[0,1]],errs_plain_adult)
test_err_l2 = np.append(cv_l2_adult[opt_ind_l2,[0,1]],errs_l2_adult)
test_err_adult = np.row_stack((test_err_plain,test_err_l2))
np.round(test_err_adult,3)

# Santander Customer Satisfaction

In [11]:
#loading dataset
import pickle
file_name = 'customersatformat.pickle'
file = open(file_name,'rb')
df_sat = pickle.load(file)
file.close()

In [12]:
#Training/Test split
X_sat = df_sat.drop('TARGET', axis = 1).copy()
y_sat = df_sat['TARGET'].copy()
X_train_sat, X_test_sat, y_train_sat, y_test_sat = train_test_split(X_sat, y_sat, random_state=71, stratify=y_sat, test_size=0.2)

In [None]:
#Ordered vs Plain mode of boosting.

#Tuning parameters for boosting mode = Plain.
##########################################################################################################################
cv_plain_sat = catboost_tune_class(param_comb = params_depth, 
                                   X = X_train_sat, 
                                   y = y_train_sat, 
                                   num_iterations = 10000, 
                                   colsample = 0.3, 
                                   class_imbal = True, 
                                   learning_rate = 0.05, 
                                   boosting_type = 'Plain', 
                                   categorical_feat = False,
                                   metric='AUC',
                                   score_function = 'Cosine')
##########################################################################################################################

#Fitting optimal model for boosting mode = Plain and Calculating errors of model.
##########################################################################################################################
opt_ind_plain = np.argmax(cv_plain_sat[:,2])

catmod_plain_sat = catboost_fit_class(param_opt = cv_plain_sat[opt_ind_plain,:], 
                                      X = X_train_sat, 
                                      y = y_train_sat,  
                                      colsample = 0.3, 
                                      class_imbal = True, 
                                      learning_rate = 0.05, 
                                      boosting_type = 'Plain', 
                                      categorical_feat = True,
                                      score_function = 'Cosine')

errs_plain_sat = errors_model(mod = catmod_plain_sat, 
                              X_test = X_test_sat, 
                              y_test = y_test_sat)
##########################################################################################################################

#Tuning parameter values for boosting mode = Ordered.
##########################################################################################################################
cv_ordered_sat = catboost_tune_class(param_comb = cv_plain_sat[opt_ind_plain,0].reshape(-1,1), 
                                     X = X_train_sat, 
                                     y = y_train_sat, 
                                     num_iterations = 10000, 
                                     colsample = 0.3, 
                                     class_imbal = True, 
                                     learning_rate = 0.05, 
                                     boosting_type = 'Ordered', 
                                     categorical_feat = True,
                                     cv_metric = 'AUC',
                                     score_function = 'Cosine')
##########################################################################################################################

#Fitting optimal model for boosting mode = Ordered and calculating errors of model.
##########################################################################################################################
opt_ind_ordered = np.argmax(cv_ordered_sat[:,2])

catmod_ordered_sat = catboost_fit_class(param_opt = cv_ordered_sat[opt_ind_ordered,:], 
                                        X = X_train_sat, 
                                        y = y_train_sat,  
                                        colsample = 0.3, 
                                        class_imbal = True, 
                                        learning_rate = 0.05, 
                                        boosting_type = 'Ordered', 
                                        categorical_feat = True,
                                        score_function = 'Cosine')

errs_ordered_sat = errors_model(mod = catmod_ordered_sat, 
                                X_test = X_test_sat, 
                                y_test = y_test_sat)
##########################################################################################################################

In [None]:
#Test errors of the Plain and Ordered modes.
test_err_plain = np.append(cv_plain_sat[opt_ind_plain,[0,1]],errs_plain_sat)
test_err_ordered = np.append(cv_ordered_sat[opt_ind_ordered,[0,1]],errs_ordered_sat)
test_err_sat = np.row_stack((test_err_plain,test_err_ordered))
np.round(test_err_sat,3)

In [None]:
#Fitting model with squared error loss

#Tuning number_of_trees values.
##########################################################################################################################

cv_l2_sat = catboost_tune_class(param_comb = params_depth, 
                                X = pd.DataFrame(X_train_sat), 
                                y = y_train_sat, 
                                num_iterations = 10000, 
                                colsample = 0.3, 
                                class_imbal = True, 
                                learning_rate = 0.05, 
                                boosting_type = 'Plain',
                                categorical_feat = False,
                                cv_metric = 'AUC',
                                score_function = 'L2')
##########################################################################################################################

#Fitting optimal model and calculating errors.
##########################################################################################################################
opt_ind_l2 = np.argmax(cv_l2_sat[:,2])

catmod_l2_sat = catboost_fit_class(param_opt = cv_l2_sat[opt_ind_l2,0:5], 
                                   X = pd.DataFrame(X_train_sat), 
                                   y = y_train_sat,  
                                   colsample = 0.3, 
                                   class_imbal = True, 
                                   learning_rate = 0.05, 
                                   boosting_type = 'Plain', 
                                   categorical_feat = False,
                                   score_function = 'L2')

errs_l2_sat = errors_model(mod = catmod_l2_sat, 
                           X_test = X_test_sat, 
                           y_test = y_test_sat)
##########################################################################################################################

In [None]:
#Test errors for boosting with Cosine and squared error loss.
test_err_plain = np.append(cv_plain_sat[opt_ind_plain,[0,1]],errs_plain_sat)
test_err_l2 = np.append(cv_l2_sat[opt_ind_l2,[0,1]],errs_l2_sat)
test_err_sat = np.row_stack((test_err_plain,test_err_l2))
np.round(test_err_sat,3)