# Import packages and functions

In [1]:
import sys
# force the notebook to look for files in the upper level directory
sys.path.insert(1, '../')

In [2]:
import shap
import time
import json
import pprint
import numpy as np
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
from scipy.stats import iqr
from collections import OrderedDict
from sklearn.preprocessing import LabelBinarizer
from sklearn.dummy import DummyClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from model.model_building import load_data, tune_hyperparam, evaluate_model, plot_eval

# Set up constants

In [3]:
PROCESSED_PATH = "../data/processed/IMT_Classification_Dataset_Full_Feature_Set_v10.xlsx"
TRAIN_RANDOM_SEED = 31415926
SCORING_METRICS = ["precision_weighted", "recall_weighted", "roc_auc", "f1_weighted"]
EVAL_RANDOM_SEEDS = np.arange(0, 10)
SPLIT_RANDOM_SEEDS = np.arange(0, 10)
NUM_FOLDS = 5
SAVE_PLOT = True
SAVE_PLOT_PATH = "../plots/"

# Define some helper functions

In [4]:
def get_important_features(df_input, choice, trained_model, max_n=10):
    """Return the most important features along with their SHAP values and display a bar plot"""
    # load in the data
    x, _ = load_data(df_input, choice)
    # load the shap tree explainer
    explainer = shap.TreeExplainer(trained_model)
    # get the shap values
    shap_values = explainer.shap_values(x)
    # display the feature importance bar plot
    shap.summary_plot(shap_values, x, plot_type="bar", max_display=max_n, show=False)
    # save the figure
    plt.savefig("../plots/{}_top_10_features_full.pdf".format(choice), dpi=300, bbox_inches="tight")
    # get the shap values for the n most important features
    df_important_feautures = get_important_df(x, shap_values, n_max=max_n)
    return df_important_feautures

def get_important_df(x_train, shap_values, n_max):
    """Helper function for get_important_features() and is used to get the most important features in a dataframe format"""
    # get the shap values for each column/feature
    feature_shap_values = np.abs(shap_values).mean(0)
    # create a dataframe with the feature names and shap values
    df_feature_importance = pd.DataFrame(list(zip(x_train.columns, feature_shap_values)),
                                         columns=["feature", "shap_values"])
    # sort the dataframe by descending shap values
    df_feature_importance = df_feature_importance.sort_values(by=["shap_values"], ascending=False, ignore_index=True)
    # return the n most important features
    return df_feature_importance.head(n_max)

# Read in the processed data

In [5]:
df = pd.read_excel(PROCESSED_PATH)
df

Unnamed: 0,Compound,Label,struct_file_path,range_MendeleevNumber,mean_MendeleevNumber,avg_dev_MendeleevNumber,maximum_AtomicWeight,range_AtomicWeight,mean_AtomicWeight,avg_dev_AtomicWeight,...,avg_mx_dists,max_xx_dists,min_xx_dists,avg_xx_dists,v_m,v_x,iv_p1,est_hubbard_u,est_charge_trans,volume_per_site
0,Ba(FeSb3)4,0,../data/Structures/Metals/BaFe4Sb12_CollCode_6...,76,73.470588,16.276817,137.32700,81.482000,107.166294,24.151197,...,2.596481,3.450762,2.871024,3.266718,-23.735803,9.622199,54.91000,21.129322,19.461062,22.917528
1,Ba(NiP)2,0,../data/Structures/Metals/BaNi2P2_SD_1701656.cif,74,59.400000,20.160000,137.32700,106.353238,63.332265,29.597894,...,2.259574,3.709116,3.554335,3.631725,-22.390084,28.414957,35.18700,11.858755,35.445979,18.414152
2,Ba(PIr)2,0,../data/Structures/Metals/BaIr2P2_CollCode_957...,74,59.000000,20.000000,192.21700,161.243238,116.741705,68.614354,...,2.355169,3.794974,3.713769,3.754372,-20.210211,27.763527,28.00000,5.839285,34.042180,19.575756
3,Ba(PRh)2,0,../data/Structures/Metals/BaRh2P2_CollCode_501...,74,58.600000,19.840000,137.32700,106.353238,81.017105,40.034674,...,2.346291,3.776568,3.737587,3.757078,-20.397126,27.833127,31.06000,7.810114,33.195560,19.512571
4,Ba(Sb3Os)4,0,../data/Structures/Metals/BaOs4Sb12_CollCode_6...,76,73.941176,15.612457,190.23000,68.470000,138.786294,24.208803,...,2.635420,3.502512,2.914080,3.315708,-23.385103,9.480030,41.00000,12.916563,10.425355,23.964132
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338,VO2,2,../data/Structures/MIT_materials/HighT/VO2_HT_...,41,73.333333,18.222222,50.94150,34.942100,27.646767,15.529822,...,1.927294,2.725617,2.575183,2.675472,-45.333235,26.337259,65.28165,14.484546,9.779242,9.863262
339,YCoO3,2,../data/Structures/MIT_materials/HighT/YCoO3_C...,75,66.200000,24.960000,88.90585,72.906450,39.167449,27.801659,...,1.933264,3.527817,2.699880,2.881036,-37.430821,24.043918,51.27000,13.895560,12.815560,10.252012
340,YFe4(CuO4)3,2,../data/Structures/MIT_materials/HighT/YCu3Fe4...,75,73.400000,16.320000,88.90585,72.906450,34.745832,22.495719,...,2.356918,2.896772,2.559157,2.745188,-26.680740,24.867599,37.86795,12.807177,16.608178,9.756232
341,YNiO3,2,../data/Structures/MIT_materials/HighT/YNiO3_6...,75,66.800000,24.240000,88.90585,72.906450,39.119490,27.744108,...,1.963601,3.184308,2.736503,2.827539,-36.802534,23.821848,54.92000,15.898371,10.393278,10.686816


In [6]:
selected_features = ["Compound", "Label", "struct_file_path", # these 3 are included for human reference
                     "gii", "est_hubbard_u",  "est_charge_trans", "ewald_energy_per_atom",
                     "avg_dev_Electronegativity", 
                     "range_MendeleevNumber", "avg_dev_CovalentRadius",
                     "avg_mm_dists", "avg_mx_dists", "avg_xx_dists"]
df_reduced = df[selected_features]

# Split the train test data

In [7]:
df_X = df_reduced
df_y = df_reduced[["Label"]]

In [8]:
splits = [train_test_split(df_X, df_y, test_size=0.1, random_state=split_seed, stratify=df_y) for split_seed in SPLIT_RANDOM_SEEDS]

# Set up models and their respective default hyperparameter search grid

In [9]:
lst_of_models = [xgb.XGBClassifier#, DummyClassifier, LogisticRegression, 
                 #DecisionTreeClassifier, RandomForestClassifier, GradientBoostingClassifier
                ]

lst_of_param_grids = [
    #XGBClassifier
    None#,
#     {#DummyClassifier
#         "strategy": ["uniform"],
#         "random_state": [TRAIN_RANDOM_SEED]
#     },
#     {#LogisticRegression with L2 penalty (Ridge classifier)
#         "penalty": ["l2"],
#         "C": np.logspace(-3, 3, num=7),
#         "class_weight": ["balanced"],
#         "solver": ["newton-cg"],
#         "random_state": [TRAIN_RANDOM_SEED]
#     }
#     ,
#     {#DecisionTreeClassifier
#         "criterion": ["gini", "entropy"],
#         "max_leaf_nodes": [5, 10, 15],
#         "class_weight": ["balanced"],
#         "random_state": [TRAIN_RANDOM_SEED]
#     },
#     {#RandomForestClassifier
#         "n_estimators": [100, 150, 200],
#         "max_depth": [2, 3, 4, 5],
#         "criterion": ["gini", "entropy"],
#         "max_leaf_nodes": [5, 10, 15],
#         "class_weight": ["balanced"],
#         # max_samples: None means use all samples
#         "max_samples": [0.5, 0.6, 0.7, 0.8, 0.9, None],
#         "random_state": [TRAIN_RANDOM_SEED]
#     },
#     {#GradientBoostingClassifier
#         "n_estimators": [10, 20, 30, 40, 80, 100, 150, 200],
#         "max_depth": [2, 3, 4, 5],
#         "learning_rate": np.logspace(-3, 2, num=6),
#         "subsample": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
#         "random_state": [TRAIN_RANDOM_SEED]
#     }
]

# Tune the hyperparameters with 5-fold cv

The default parameter grid for XGBClassifier() as follows:

    n_estimators: [10, 20, 30, 40, 80, 100, 150, 200],
    max_depth: [2, 3, 4, 5],
    learning_rate: np.logspace(-3, 2, num=6),
    subsample: [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    scale_pos_weight: [np.sum(y_labels == 0) / np.sum(y_labels == 1)],
    base_score: [0.3, 0.5, 0.7]
    random_state: [seed]
    
The scoring metric for the gridsearch cv is `f1_weighted`, which you can change by specifying the `scoring_metric_for_tuning` parameter in the **tune_hyperparam()** function. You can also change the number of cv fold by specifying `num_folds` and the default parameter grid by specifying `param_grid`.

**Note**: the `scale_pos_weight` parameter is not used when tuning for multiclass classification problems. The class imbalance is instead handled through the `sample_weight` parameter in the [.fit()](https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.XGBRegressor.fit) method of the XGBoost sklearn API.

In [10]:
best_params = OrderedDict()
for i, eval_seed in enumerate(EVAL_RANDOM_SEEDS):
    best_params[str(eval_seed)] = OrderedDict()
    df_train = splits[i][0]
    print("\n------------------------------------------------------\nTrain test split seed: \033[42m{}\033[0m".format(eval_seed))
    for model, params in zip(lst_of_models, lst_of_param_grids):
        model_type = model.__name__
        print("\n------------------------------------------------------\nModel type: {}".format(model_type))
        best_params[str(eval_seed)][model_type] = {choice: tune_hyperparam(df_train, choice, TRAIN_RANDOM_SEED, model=model, param_grid=params) 
                                                   for choice in ["Metal", "Insulator", "MIT", "Multiclass"]}
        # pause the execution for 1 second to ensure proper printout format
        time.sleep(1)


------------------------------------------------------
Train test split seed: [42m0[0m

------------------------------------------------------
Model type: XGBClassifier

Tuning for Metal vs. non-Metal binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 976 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 2976 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done 5776 tasks      | elapsed:   28.5s
[Parallel(n_jobs=-1)]: Done 9376 tasks      | elapsed:   49.5s
[Parallel(n_jobs=-1)]: Done 13776 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Insulator vs. non-Insulator binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 3200 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done 6000 tasks      | elapsed:   28.9s
[Parallel(n_jobs=-1)]: Done 9600 tasks      | elapsed:   49.2s
[Parallel(n_jobs=-1)]: Done 14000 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for MIT vs. non-MIT binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 568 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 2536 tasks      | elapsed:   22.4s
[Parallel(n_jobs=-1)]: Done 5336 tasks      | elapsed:   34.9s
[Parallel(n_jobs=-1)]: Done 8936 tasks      | elapsed:   54.8s
[Parallel(n_jobs=-1)]: Done 13336 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 17040 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.6min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Multiclass classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 864 tasks      | elapsed:   11.3s
[Parallel(n_jobs=-1)]: Done 1448 tasks      | elapsed:   20.8s
[Parallel(n_jobs=-1)]: Done 2736 tasks      | elapsed:   40.7s
[Parallel(n_jobs=-1)]: Done 5600 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 8000 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 10368 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 13264 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 15960 tasks      | elapsed:  3.1min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  3.3min finished



------------------------------------------------------
Train test split seed: [42m1[0m

------------------------------------------------------
Model type: XGBClassifier

Tuning for Metal vs. non-Metal binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 1488 tasks      | elapsed:    9.7s
[Parallel(n_jobs=-1)]: Done 3376 tasks      | elapsed:   23.5s
[Parallel(n_jobs=-1)]: Done 6176 tasks      | elapsed:   35.0s
[Parallel(n_jobs=-1)]: Done 9776 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 14176 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.5min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Insulator vs. non-Insulator binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:    7.0s
[Parallel(n_jobs=-1)]: Done 3200 tasks      | elapsed:   19.3s
[Parallel(n_jobs=-1)]: Done 6000 tasks      | elapsed:   31.7s
[Parallel(n_jobs=-1)]: Done 9600 tasks      | elapsed:   54.6s
[Parallel(n_jobs=-1)]: Done 14000 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.5min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for MIT vs. non-MIT binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.6s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   30.5s
[Parallel(n_jobs=-1)]: Done 9584 tasks      | elapsed:   50.2s
[Parallel(n_jobs=-1)]: Done 13184 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Multiclass classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 920 tasks      | elapsed:   12.1s
[Parallel(n_jobs=-1)]: Done 1850 tasks      | elapsed:   25.4s
[Parallel(n_jobs=-1)]: Done 2912 tasks      | elapsed:   40.9s
[Parallel(n_jobs=-1)]: Done 4712 tasks      | elapsed:   54.9s
[Parallel(n_jobs=-1)]: Done 7584 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 10896 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 14360 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  3.0min finished



------------------------------------------------------
Train test split seed: [42m2[0m

------------------------------------------------------
Model type: XGBClassifier

Tuning for Metal vs. non-Metal binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:    6.7s
[Parallel(n_jobs=-1)]: Done 3200 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done 6000 tasks      | elapsed:   29.1s
[Parallel(n_jobs=-1)]: Done 9600 tasks      | elapsed:   50.4s
[Parallel(n_jobs=-1)]: Done 14000 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Insulator vs. non-Insulator binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.7s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   29.8s
[Parallel(n_jobs=-1)]: Done 11760 tasks      | elapsed:   56.6s
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for MIT vs. non-MIT binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:    7.2s
[Parallel(n_jobs=-1)]: Done 3200 tasks      | elapsed:   18.7s
[Parallel(n_jobs=-1)]: Done 6000 tasks      | elapsed:   29.6s
[Parallel(n_jobs=-1)]: Done 9600 tasks      | elapsed:   49.8s
[Parallel(n_jobs=-1)]: Done 14000 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Multiclass classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 920 tasks      | elapsed:   12.1s
[Parallel(n_jobs=-1)]: Done 1906 tasks      | elapsed:   26.8s
[Parallel(n_jobs=-1)]: Done 3392 tasks      | elapsed:   44.7s
[Parallel(n_jobs=-1)]: Done 6642 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 8666 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 12816 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 17152 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  3.0min finished



------------------------------------------------------
Train test split seed: [42m3[0m

------------------------------------------------------
Model type: XGBClassifier

Tuning for Metal vs. non-Metal binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.7s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   29.5s
[Parallel(n_jobs=-1)]: Done 9056 tasks      | elapsed:   47.3s
[Parallel(n_jobs=-1)]: Done 12656 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 17056 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Insulator vs. non-Insulator binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:    6.9s
[Parallel(n_jobs=-1)]: Done 3200 tasks      | elapsed:   18.8s
[Parallel(n_jobs=-1)]: Done 6000 tasks      | elapsed:   29.6s
[Parallel(n_jobs=-1)]: Done 9600 tasks      | elapsed:   50.5s
[Parallel(n_jobs=-1)]: Done 14000 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for MIT vs. non-MIT binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.7s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   29.1s
[Parallel(n_jobs=-1)]: Done 11760 tasks      | elapsed:   55.2s
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Multiclass classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 920 tasks      | elapsed:   12.0s
[Parallel(n_jobs=-1)]: Done 1850 tasks      | elapsed:   24.6s
[Parallel(n_jobs=-1)]: Done 3000 tasks      | elapsed:   40.4s
[Parallel(n_jobs=-1)]: Done 6600 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 7928 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 11912 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 14864 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  3.0min finished



------------------------------------------------------
Train test split seed: [42m4[0m

------------------------------------------------------
Model type: XGBClassifier

Tuning for Metal vs. non-Metal binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.7s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   29.8s
[Parallel(n_jobs=-1)]: Done 8928 tasks      | elapsed:   46.1s
[Parallel(n_jobs=-1)]: Done 12528 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 16928 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Insulator vs. non-Insulator binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.8s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   29.8s
[Parallel(n_jobs=-1)]: Done 11760 tasks      | elapsed:   56.4s
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for MIT vs. non-MIT binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:    7.1s
[Parallel(n_jobs=-1)]: Done 3200 tasks      | elapsed:   18.5s
[Parallel(n_jobs=-1)]: Done 6000 tasks      | elapsed:   28.9s
[Parallel(n_jobs=-1)]: Done 9600 tasks      | elapsed:   49.3s
[Parallel(n_jobs=-1)]: Done 14000 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Multiclass classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 920 tasks      | elapsed:   12.0s
[Parallel(n_jobs=-1)]: Done 1850 tasks      | elapsed:   25.2s
[Parallel(n_jobs=-1)]: Done 2976 tasks      | elapsed:   41.3s
[Parallel(n_jobs=-1)]: Done 6544 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 8608 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 12514 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 16312 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  3.0min finished



------------------------------------------------------
Train test split seed: [42m5[0m

------------------------------------------------------
Model type: XGBClassifier

Tuning for Metal vs. non-Metal binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.6s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   29.6s
[Parallel(n_jobs=-1)]: Done 11760 tasks      | elapsed:   56.3s
[Parallel(n_jobs=-1)]: Done 16016 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Insulator vs. non-Insulator binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.9s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   30.7s
[Parallel(n_jobs=-1)]: Done 11760 tasks      | elapsed:   57.3s
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for MIT vs. non-MIT binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.6s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   29.2s
[Parallel(n_jobs=-1)]: Done 8928 tasks      | elapsed:   45.4s
[Parallel(n_jobs=-1)]: Done 12528 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 16928 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Multiclass classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 812 tasks      | elapsed:    9.8s
[Parallel(n_jobs=-1)]: Done 1360 tasks      | elapsed:   17.8s
[Parallel(n_jobs=-1)]: Done 2760 tasks      | elapsed:   37.6s
[Parallel(n_jobs=-1)]: Done 4560 tasks      | elapsed:   53.6s
[Parallel(n_jobs=-1)]: Done 7272 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 9872 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 13400 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  3.0min finished



------------------------------------------------------
Train test split seed: [42m6[0m

------------------------------------------------------
Model type: XGBClassifier

Tuning for Metal vs. non-Metal binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.7s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   29.9s
[Parallel(n_jobs=-1)]: Done 11760 tasks      | elapsed:   56.6s
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Insulator vs. non-Insulator binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.9s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   29.8s
[Parallel(n_jobs=-1)]: Done 11760 tasks      | elapsed:   56.5s
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for MIT vs. non-MIT binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.7s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   29.2s
[Parallel(n_jobs=-1)]: Done 11760 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done 16064 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Multiclass classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 920 tasks      | elapsed:   12.2s
[Parallel(n_jobs=-1)]: Done 1760 tasks      | elapsed:   24.0s
[Parallel(n_jobs=-1)]: Done 3160 tasks      | elapsed:   42.9s
[Parallel(n_jobs=-1)]: Done 4968 tasks      | elapsed:   56.1s
[Parallel(n_jobs=-1)]: Done 7640 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 10248 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 14072 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  3.0min finished



------------------------------------------------------
Train test split seed: [42m7[0m

------------------------------------------------------
Model type: XGBClassifier

Tuning for Metal vs. non-Metal binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.8s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   30.0s
[Parallel(n_jobs=-1)]: Done 11760 tasks      | elapsed:   56.6s
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Insulator vs. non-Insulator binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 2160 tasks      | elapsed:   12.7s
[Parallel(n_jobs=-1)]: Done 6160 tasks      | elapsed:   30.7s
[Parallel(n_jobs=-1)]: Done 8608 tasks      | elapsed:   48.7s
[Parallel(n_jobs=-1)]: Done 11536 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 15936 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.5min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for MIT vs. non-MIT binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done 3200 tasks      | elapsed:   21.1s
[Parallel(n_jobs=-1)]: Done 6000 tasks      | elapsed:   31.9s
[Parallel(n_jobs=-1)]: Done 9600 tasks      | elapsed:   55.2s
[Parallel(n_jobs=-1)]: Done 14000 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.5min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Multiclass classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 696 tasks      | elapsed:    8.9s
[Parallel(n_jobs=-1)]: Done 1448 tasks      | elapsed:   21.2s
[Parallel(n_jobs=-1)]: Done 2848 tasks      | elapsed:   45.6s
[Parallel(n_jobs=-1)]: Done 3896 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 6096 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 7807 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 9832 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 12698 tasks      | elapsed:  2.9min
[Parallel(n_jobs=-1)]: Done 14600 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  3.9min finished



------------------------------------------------------
Train test split seed: [42m8[0m

------------------------------------------------------
Model type: XGBClassifier

Tuning for Metal vs. non-Metal binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:    7.6s
[Parallel(n_jobs=-1)]: Done 3200 tasks      | elapsed:   20.7s
[Parallel(n_jobs=-1)]: Done 6000 tasks      | elapsed:   37.9s
[Parallel(n_jobs=-1)]: Done 8712 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 12416 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 16896 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  2.1min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Insulator vs. non-Insulator binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:   11.6s
[Parallel(n_jobs=-1)]: Done 3200 tasks      | elapsed:   28.5s
[Parallel(n_jobs=-1)]: Done 6000 tasks      | elapsed:   44.6s
[Parallel(n_jobs=-1)]: Done 8916 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 12496 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 17242 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  2.4min finished



Tuning for MIT vs. non-MIT binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 656 tasks      | elapsed:    6.3s
[Parallel(n_jobs=-1)]: Done 1656 tasks      | elapsed:   18.7s
[Parallel(n_jobs=-1)]: Done 3056 tasks      | elapsed:   33.1s
[Parallel(n_jobs=-1)]: Done 4856 tasks      | elapsed:   45.4s
[Parallel(n_jobs=-1)]: Done 7056 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 9656 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 15344 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  2.3min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Multiclass classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 696 tasks      | elapsed:   10.2s
[Parallel(n_jobs=-1)]: Done 1496 tasks      | elapsed:   22.4s
[Parallel(n_jobs=-1)]: Done 2208 tasks      | elapsed:   37.0s
[Parallel(n_jobs=-1)]: Done 3520 tasks      | elapsed:   59.2s
[Parallel(n_jobs=-1)]: Done 5720 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 8320 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 11320 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 13828 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 16720 tasks      | elapsed:  3.6min
[Parallel(n_jobs=-1)]: Done 17265 out of 17280 | elapsed:  3.6min remaining:    0.2s
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  3.6min finished



------------------------------------------------------
Train test split seed: [42m9[0m

------------------------------------------------------
Model type: XGBClassifier

Tuning for Metal vs. non-Metal binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done 2368 tasks      | elapsed:   21.7s
[Parallel(n_jobs=-1)]: Done 5168 tasks      | elapsed:   39.5s
[Parallel(n_jobs=-1)]: Done 8768 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 13168 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  2.0min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Insulator vs. non-Insulator binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:    9.8s
[Parallel(n_jobs=-1)]: Done 3200 tasks      | elapsed:   25.3s
[Parallel(n_jobs=-1)]: Done 6000 tasks      | elapsed:   40.9s
[Parallel(n_jobs=-1)]: Done 9600 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 14000 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.8min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for MIT vs. non-MIT binary classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 1200 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done 3200 tasks      | elapsed:   18.7s
[Parallel(n_jobs=-1)]: Done 6000 tasks      | elapsed:   29.3s
[Parallel(n_jobs=-1)]: Done 9600 tasks      | elapsed:   50.8s
[Parallel(n_jobs=-1)]: Done 14000 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.



Tuning for Multiclass classifier
Fitting 5 folds for each of 3456 candidates, totalling 17280 fits


[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 920 tasks      | elapsed:   12.3s
[Parallel(n_jobs=-1)]: Done 1850 tasks      | elapsed:   25.4s
[Parallel(n_jobs=-1)]: Done 3000 tasks      | elapsed:   41.6s
[Parallel(n_jobs=-1)]: Done 6467 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 8528 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 12224 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 14424 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 17280 out of 17280 | elapsed:  3.1min finished


In [11]:
pprint.pprint(best_params)

OrderedDict([('0',
              OrderedDict([('XGBClassifier',
                            {'Insulator': {'base_score': 0.3,
                                           'learning_rate': 0.1,
                                           'max_depth': 5,
                                           'n_estimators': 20,
                                           'random_state': 31415926,
                                           'scale_pos_weight': 0.9130434782608695,
                                           'subsample': 0.8},
                             'MIT': {'base_score': 0.3,
                                     'learning_rate': 0.1,
                                     'max_depth': 3,
                                     'n_estimators': 20,
                                     'random_state': 31415926,
                                     'scale_pos_weight': 4.049180327868853,
                                     'subsample': 0.5},
                             'Metal': {'base_score': 

# Evaluate the tuned model with the test set

In [15]:
metrics_dicts = []
metric_funcs = [precision_score, recall_score, roc_auc_score, f1_score]
average_method = "weighted"
# iterate through all the model types
for model in lst_of_models:
    # get the model type name
    model_name = model.__name__
    # iterate through the classification tasks
    for choice in ["Metal", "Insulator", "MIT"]:
        results = OrderedDict()
        # initialize a dictionary to store the metric results
        for metric_name in [func.__name__ for func in metric_funcs]:
            results[metric_name] = []
        # iterate through all the train test split seeds
        for i, split_seed in enumerate(SPLIT_RANDOM_SEEDS):
            # get the tuned hyperparameters for a given combo of split seed, model name, and classification tasks 
            best_params_split_seed = best_params[str(split_seed)][model_name][choice]
            # initialize the model with tuned hyperparameters
            eval_model = model(**best_params_split_seed)
            # get the train test splits
            df_eval_train = splits[i][0]
            df_eval_test = splits[i][1]
            # process the data into the proper input format
            X_train, y_train = load_data(df_eval_train, choice)
            X_test, y_test = load_data(df_eval_test, choice)
            # fit the model on the training data set
            eval_model.fit(X_train, y_train)
            # predict the labels for the test set
            y_pred = eval_model.predict(X_test)
            # iterate through the metrics
            for metric_func in metric_funcs:
                # get the metric value
                if choice == "Multiclass" and metric_func.__name__ == "roc_auc_score":
                    ohot = LabelBinarizer()
                    ytest = ohot.fit_transform(y_test)
                    ypred = ohot.fit_transform(y_pred)
                    metric_value = metric_func(ytest, ypred, average=average_method, multi_class="ovr")
                else:
                    metric_value = metric_func(y_test, y_pred, average=average_method)
                # add the metric value to the list
                results[metric_func.__name__].append(metric_value)
        print("\n----------------------------------------------------------------------")
        print("Model type: %s" % model_name)
        if choice == "Multiclass":
            print("Evaluating the Multiclass classifier on train test splits with the following seeds\n%s" % SPLIT_RANDOM_SEEDS)
        else:
            print("Evaluating the %s vs. non-%s binary classifier on train test splits with the following seeds\n%s" % (choice, choice, SPLIT_RANDOM_SEEDS))
        for key, metric_values in results.items():
            print("Median %s: %.2f w/ IQR: %.2f" % (key+"_"+average_method, np.median(metric_values), iqr(metric_values)))
            metrics_dict = {"model_type": model_name, "feature_set": "holdout_test", "positive_class": choice, 
                            "metric_name": key+"_"+average_method, "metric_value": np.median(metric_values), "raw_metric": metric_values}
            metrics_dicts.append(metrics_dict)


----------------------------------------------------------------------
Model type: XGBClassifier
Evaluating the Metal vs. non-Metal binary classifier on train test splits with the following seeds
[0 1 2 3 4 5 6 7 8 9]
Median precision_score_weighted: 0.90 w/ IQR: 0.06
Median recall_score_weighted: 0.90 w/ IQR: 0.06
Median roc_auc_score_weighted: 0.84 w/ IQR: 0.08
Median f1_score_weighted: 0.90 w/ IQR: 0.06

----------------------------------------------------------------------
Model type: XGBClassifier
Evaluating the Insulator vs. non-Insulator binary classifier on train test splits with the following seeds
[0 1 2 3 4 5 6 7 8 9]
Median precision_score_weighted: 0.83 w/ IQR: 0.07
Median recall_score_weighted: 0.81 w/ IQR: 0.06
Median roc_auc_score_weighted: 0.82 w/ IQR: 0.06
Median f1_score_weighted: 0.81 w/ IQR: 0.06

----------------------------------------------------------------------
Model type: XGBClassifier
Evaluating the MIT vs. non-MIT binary classifier on train test splits wi

In [19]:
holdout_test_metrics = pd.DataFrame.from_records(metrics_dicts)
holdout_test_metrics

Unnamed: 0,model_type,feature_set,positive_class,metric_name,metric_value,raw_metric
0,XGBClassifier,holdout_test,Metal,precision_score_weighted,0.899115,"[0.9182900432900433, 0.7908163265306122, 0.884..."
1,XGBClassifier,holdout_test,Metal,recall_score_weighted,0.9,"[0.9142857142857143, 0.8, 0.8857142857142857, ..."
2,XGBClassifier,holdout_test,Metal,roc_auc_score_weighted,0.845,"[0.91, 0.7100000000000001, 0.83, 0.81, 0.9, 0...."
3,XGBClassifier,holdout_test,Metal,f1_score_weighted,0.895356,"[0.9154518950437319, 0.788013318534961, 0.8815..."
4,XGBClassifier,holdout_test,Insulator,precision_score_weighted,0.833123,"[0.8732919254658386, 0.8013071895424837, 0.858..."
5,XGBClassifier,holdout_test,Insulator,recall_score_weighted,0.814286,"[0.8285714285714286, 0.8, 0.8571428571428571, ..."
6,XGBClassifier,holdout_test,Insulator,roc_auc_score_weighted,0.816993,"[0.8333333333333333, 0.8006535947712418, 0.857..."
7,XGBClassifier,holdout_test,Insulator,f1_score_weighted,0.812143,"[0.8242857142857143, 0.8, 0.8571428571428571, ..."
8,XGBClassifier,holdout_test,MIT,precision_score_weighted,0.907029,"[0.8857142857142857, 0.9225806451612902, 0.942..."
9,XGBClassifier,holdout_test,MIT,recall_score_weighted,0.9,"[0.8857142857142857, 0.9142857142857143, 0.942..."


In [20]:
holdout_test_metrics = holdout_test_metrics.replace({"metric_name": {"precision_score_weighted": "precision_weighted",
                                                                     "recall_score_weighted": "recall_weighted",
                                                                     "roc_auc_score_weighted": "roc_auc",
                                                                     "f1_score_weighted": "f1_weighted"}})

In [21]:
holdout_test_metrics.to_excel("../data/processed/train_test_split_model_metrics.xlsx", index=False)