In [7]:
# Evaluation metrics
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.metrics import cohen_kappa_score, matthews_corrcoef
from imblearn.metrics import geometric_mean_score


def socre(y_true, y_pred, y_proba, k):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0, 1]).ravel()
    # fpr, tpr, thresholds = roc_curve(y_true, y_pred, pos_label=2)
    
    score_roc_auc = roc_auc_score(y_true, y_proba)
    score_accuracye = accuracy_score(y_true, y_pred)
    score_precision = precision_score(y_true, y_pred)
    score_f1 = f1_score(y_true, y_pred)
    score_recall = recall_score(y_true, y_pred)
    g_mean = np.sqrt((tn/(tn+fp)) * score_recall)   # Specificity * Sensitivity 
    score_kappa = cohen_kappa_score(y_true, y_pred)
    score_mcc = matthews_corrcoef(y_true, y_pred)

    print(f"tn: {tn}, fp: {fp}, fn: {fn}, tp: {tp}")
    print(f'roc_auc_score : {round(score_roc_auc, k)}')
    print(f'accuracy_score : {round(score_accuracye, k)}')
    print(f'precision_score : {round(score_precision, k)}')
    print(f'f1_score : {round(score_f1, k)}')
    print(f'recall_score : {round(score_recall, k)}')
    print(f'cohen_kappa_score : {round(score_kappa, k)}')
    print(f'matthews_corrcoef : {round(score_mcc, k)}')
    print(f'G-mean: {round(g_mean, 4)}')

In [None]:
import os
import pandas as pd
from autogluon.tabular import TabularPredictor


presets = ['best_quality']

predictor = TabularPredictor(label=label1,
                             path=save_path,
                             eval_metric='roc_auc')
hyperparameters = {
                    'GBM': [
                         {'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}},
                         # {},
                         # 'GBMLarge',
                    ],

                    'CAT': {},

                    'XGB': {},

                    # 'FASTAI': {},

                    'RF': [
                         {'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}},
                         # {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}},
                         # {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression']}},
                    ],

                    'XT': [
                          {'criterion': 'gini', 'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}},
                    #     {'criterion': 'entropy', 'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}},
                    #     {'criterion': 'squared_error', 'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression']}},
                    ],
                    
                    # 'KNN': [
                    #      {'weights': 'uniform', 'ag_args': {'name_suffix': 'Unif'}},
                    #     {'weights': 'distance', 'ag_args': {'name_suffix': 'Dist'}},
                    # ],
                }

# Ten-fold cross validation, multi-layer stacking
predictor.fit(train_data=train_data,
             #ag_args_fit={'num_gpus': 1},  # 调用GPU训练
             # tuning_data=val_data,
             time_limit=3000,
             presets=presets,
             # auto_stack=True,
             num_bag_sets=1,
             # use_bag_holdout=True,
             # holdout_frac = 0.3,
             num_stack_levels=1,
             num_bag_folds=10,
             hyperparameters=hyperparameters,
             )

# Outputs the visual stack integration architecture of the model trained by fit().
predictor.plot_ensemble_model()

# Test validation cohort
result = predictor.leaderboard(val_data, silent=True)
result.to_csv(os.path.join(save_path, "result.csv"), index=False)
result

In [None]:
# Train set

model_name = 'WeightedEnsemble_L2'

pred_train  = predictor.get_oof_pred(model=model_name)
pred_train_df  = pd.DataFrame(pred_train, columns=['pred_smote'])
proba_train_df = predictor.get_oof_pred_proba(model=model_name)
proba_train_df.columns = ['proba_smote_0', 'proba_smote_1']

res_train = pd.concat([train_data, pred_train_df, proba_train_df, train_name], axis=1)
res_train = res_train.iloc[0:971, :]
res_train.to_csv(os.path.join(save_path, 'train_pred.csv'), index=False, encoding='utf')
print(res_train.shape)
res_train.head()

print('Train: \n')
socre(res_train['label1'], 
      res_train['pred_smote'], 
      res_train['proba_smote_1'], 
      k=4)

In [None]:
# Validation set

pred_val = predictor.predict(data=val_data, 
                             model=model_name)
proba_val_df = predictor.predict_proba(data=val_data,
                                       model=model_name)
proba_val_df.columns = ['proba_smote_0', 'proba_smote_1']
pred_val_df  = pd.DataFrame(pred_val)
pred_val_df.columns=['pred_smote']

res_val = pd.concat([val_data, pred_val_df, proba_val_df, val_name], axis=1)
res_val.to_csv(os.path.join(save_path, 'val_pred.csv'), 
               index=False, 
               encoding='utf')

print(res_val.shape)
res_val.head()

print('Validation: \n')
socre(res_val['label1'], 
      res_val['pred_smote'], 
      res_val['proba_smote_1'], 
      k=4)