In this notebooke, we load data and models used for experiments in Section 4.2 and assess their performance on the test set. There are four triples of regressor/classifier ensembles: 
1) Models Regressor_CatBoost_1, Regressor_LightGBM_1 and Regressor_XGB_1 for the [Superconductivity dataset](https://archive.ics.uci.edu/ml/datasets/superconductivty+data). They are available along with the test set from the folder Experiment_1.

2) Models Regressor_CatBoost_2, Regressor_LightGBM_2 and Regressor_XGB_2 for the [Ailerons dataset](https://www.dcc.fc.up.pt/~ltorgo/Regression/ailerons.html). They are available along with the test set from the folder Experiment_2. (The test set was specified by the data source.)

3) Models Classifier_CatBoost_3, Classifier_LightGBM_3 and Classifier_XGB_3 for the [Online News Popularity dataset](https://archive.ics.uci.edu/ml/datasets/online+news+popularity). They are available along with the test set from the folder Experiment_3.

4) Models Classifier_CatBoost_4, Classifier_LightGBM_4 and Classifier_XGB_4 for the [Higgs dataset](https://archive.ics.uci.edu/ml/datasets/HIGGS). They are available from the folder Experiment_4. (The test set is large, and should be downloaded from the data source.) 

In [1]:
import sklearn
import pandas as pd
import numpy as np
import shap
import json 
import random 
import matplotlib.pyplot as plt
import pickle
import warnings

import catboost
import lightgbm
import xgboost

from catboost import CatBoostRegressor, CatBoostClassifier
from lightgbm import LGBMRegressor, LGBMClassifier 
from xgboost import XGBRegressor, XGBClassifier

from sklearn.metrics import explained_variance_score, r2_score, roc_auc_score

The cell below loads the models and the test set for an experiment. Only experiment_number should be declared (a number between 1 and 4).

In [2]:
#We load the first dataset and the first triple of models. 
#For the other experiments, change experiment_number accordingly. 

experiment_number=1

if experiment_number==1 or experiment_number==2:
    model_type='Regressor'
elif experiment_number==3 or experiment_number==4:
    model_type='Classifier'
else:
    raise ValueError('experiment_number should be 1,2,3 or 4.')
    
if experiment_number==4:
     warnings.warn('Warning: The test set for the Higgs dataset should be downloaded from the data source.')
    
path='./Experiment_'+str(experiment_number)+'/'

X_test=pd.read_csv(path+'test.csv').iloc[:,:-1]
y_test=pd.read_csv(path+'test.csv').iloc[:,-1]

model_cat=pickle.load(open(path+model_type+'_CatBoost_'+str(experiment_number),'rb'))
#The catboost model is provided as pickle here to avoid issues with loading and calling predict. 
#https://github.com/catboost/catboost/issues/696


model_lgbm=pickle.load(open(path+model_type+'_LightGBM_'+str(experiment_number),'rb'))
#The lightgbm model is provided as pickle.

if model_type=='Regressor':
    model_xgb=XGBRegressor()
    model_xgb.load_model(path+model_type+'_XGB_'+str(experiment_number)+'.model') 
else:
    model_xgb=XGBClassifier()
    model_xgb.load_model(path+model_type+'_XGB_'+str(experiment_number)+'.model')
#The XGBoost is provided as a .model file.
#https://github.com/dmlc/xgboost/issues/6264

In [3]:
#Printing models' hyperparameters
print("\nCatBoost model's hyperparameters:",model_cat._get_params())
print("\nLightGBM model's hyperparameters:",model_lgbm.get_params())
print("\nXGBoost model's hyperparameters:",model_xgb.get_params())


CatBoost model's hyperparameters: {'use_best_model': True, 'eval_metric': 'RMSE', 'iterations': 300, 'verbose': 50, 'logging_level': 'Verbose', 'loss_function': 'RMSE', 'depth': 8, 'random_seed': 1, 'num_trees': 300}

LightGBM model's hyperparameters: {'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 1.0, 'importance_type': 'split', 'learning_rate': 0.1, 'max_depth': -1, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 300, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': 1, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'silent': 'warn', 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0}

XGBoost model's hyperparameters: {'objective': 'reg:squarederror', 'base_score': 0.5, 'booster': 'gbtree', 'colsample_bylevel': 1, 'colsample_bynode': 1, 'colsample_bytree': 1, 'enable_categorical': False, 'gamma': 0, 'gpu_id': -1, 'importance_type': None, 'interaction_constraints': '', 'learning_rate': 0.300000012,

In [4]:
#The prediction vectors:
if model_type=='Regressor':
    y_pred_cat=model_cat.predict(X_test)  
    y_pred_lgbm=model_lgbm.predict(X_test)  
    y_pred_xgb=model_xgb.predict(X_test)
else:
    y_pred_cat=model_cat.predict_proba(X_test)[:,1]
    y_pred_lgbm=model_lgbm.predict_proba(X_test)[:,1] 
    y_pred_xgb=model_xgb.predict_proba(X_test)[:,1]


In [5]:
#The models have reasonable predictive power:
if model_type=='Regressor':
    print(round(r2_score(y_pred=y_pred_cat,y_true=y_test),4))
    print(round(r2_score(y_pred=y_pred_lgbm,y_true=y_test),4))
    print(round(r2_score(y_pred=y_pred_xgb,y_true=y_test),4))

if model_type=='Classifier':
    print(round(roc_auc_score(y_score=y_pred_cat,y_true=y_test),4))
    print(round(roc_auc_score(y_score=y_pred_lgbm,y_true=y_test),4))
    print(round(roc_auc_score(y_score=y_pred_xgb,y_true=y_test),4))

0.9206
0.9212
0.9166


In [6]:
#The outputs of the models over the test set are very close (in terms of the r2 score):
print(round(r2_score(y_true=y_pred_lgbm,y_pred=y_pred_cat),4))
print(round(r2_score(y_true=y_pred_xgb,y_pred=y_pred_cat),4))
print(round(r2_score(y_true=y_pred_cat,y_pred=y_pred_lgbm),4))
print(round(r2_score(y_true=y_pred_xgb,y_pred=y_pred_lgbm),4))
print(round(r2_score(y_true=y_pred_cat,y_pred=y_pred_xgb),4))
print(round(r2_score(y_true=y_pred_lgbm,y_pred=y_pred_xgb),4))

0.9905
0.9788
0.9904
0.9814
0.978
0.9809


In [7]:
#The outputs of the models over the test set are very close (in terms of the explained variance score):
print(round(explained_variance_score(y_true=y_pred_cat,y_pred=y_pred_lgbm),4))
print(round(explained_variance_score(y_true=y_pred_cat,y_pred=y_pred_xgb),4))
print(round(explained_variance_score(y_true=y_pred_lgbm,y_pred=y_pred_cat),4))
print(round(explained_variance_score(y_true=y_pred_lgbm,y_pred=y_pred_xgb),4))
print(round(explained_variance_score(y_true=y_pred_xgb,y_pred=y_pred_cat),4))
print(round(explained_variance_score(y_true=y_pred_xgb,y_pred=y_pred_lgbm),4))

0.9904
0.978
0.9905
0.9809
0.9788
0.9814
