In [1]:
import pandas as pd
import numpy as np
import random as rd
import warnings, glob, os, sys, shutil
from tqdm.notebook import tqdm
from sklearn.metrics import precision_score, f1_score, recall_score, accuracy_score, confusion_matrix, auc, roc_auc_score
from sklearn.metrics import classification_report
pd.set_option("display.max_columns", None)
warnings.filterwarnings("ignore")
# from sklearn.svm import SVC
# from catboost import CatBoostClassifier, Pool

In [118]:

# 已標記資料效能評估
def metric_calc(y_test, y_pred, ths=0.5):
    auc = roc_auc_score(y_test, y_pred)
    if ths!=None:
        y_pred = (y_pred>ths).astype(int)
    
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='macro')
    sens = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    spec = tn/(tn+fp)
    return [acc, prec, sens, spec, f1, auc, f"{tp} / {tn}",f"{fp} / {fn}"]

def neg_pos_cross_table(ans, ped):
    new_comp = pd.DataFrame({'answer':ans,"predict":ped})
    new_comp['type'] = ['neg'if i ==0 else 'pos' for i in new_comp['answer']]
    new_comp_pos = new_comp[new_comp['type']=='pos']
    new_comp_neg = new_comp[new_comp['type']=='neg']
    return new_comp_pos, new_comp_neg

In [140]:
model_list = ['catboost']
data_type = ['A','B','C']
cv_list =  [1,2,3,4,5]
dt = 0
cvl = 0
ml = 0
thersholds = 0.5
model_list[ml]
# performace_table = pd.DataFrame(['Accuracy','Precision [macro]', 'Sensitivity [macro]', 'Specificity', 'F1-score [macro]' ,'AUC', 'TP / TN', 'FP / FN'], columns=[f'TestData Metric[ths={thersholds}]'])

overview_performace_table = pd.DataFrame([],columns=['Model', 'Accuracy','Precision [macro]', 'Sensitivity [macro]', 'Specificity', 'F1-score [macro]' ,'AUC', 'TP / TN', 'FP / FN'])
total_performace_table = pd.DataFrame([],columns=['Model', 'Accuracy [mean±std]','Precision [mean±std]', 'Sensitivity [mean±std]', 'Specificity [mean±std]', 'F1-score [mean±std]' ,'AUC [mean±std]'])


performace_table = pd.DataFrame([],columns=['Model', 'Accuracy','Precision [macro]', 'Sensitivity [macro]', 'Specificity', 'F1-score [macro]' ,'AUC', 'TP / TN', 'FP / FN'])
for next_num in range(len(data_type)*len(cv_list)*len(model_list)):
    metric_df = pd.read_csv(os.path.join('./XX.model_prob_metric_save/',f"{model_list[ml]}",f"{model_list[ml]}_fold_{cv_list[cvl]}_results_{data_type[dt]}.csv"))
    if "fold_prob_score_1" in metric_df.columns:
        pred_prob = metric_df["fold_prob_score_1"].values
        gt = metric_df['Final Judge'].values
        temp_metric = metric_calc(gt,pred_prob, ths=thersholds)
        temp_metric= [f"{model_list[ml]} = CV:{cv_list[cvl]} | DB:{data_type[dt]}"]+temp_metric
        performace_table.loc[next_num] = temp_metric
        overview_performace_table.loc[next_num] = temp_metric
        cvl+=1
        if cvl==len(cv_list):
            single_model_meanstd_df = performace_table[['Accuracy','Precision [macro]', 'Sensitivity [macro]', 'Specificity', 'F1-score [macro]','AUC']].agg(['mean','std'])
            single_model_meanstd_df = (round(single_model_meanstd_df.iloc[0],3).astype(str)+"±"+round(single_model_meanstd_df.iloc[1],2).astype(str)).to_frame().T
            total_performace_table.loc[next_num] = [f"{model_list[ml]}-{data_type[dt]}"] + single_model_meanstd_df.values.tolist()[0]
            performace_table = pd.DataFrame([],columns=['Model', 'Accuracy','Precision [macro]', 'Sensitivity [macro]', 'Specificity', 'F1-score [macro]' ,'AUC', 'TP / TN', 'FP / FN'])
            cvl =0 
            dt+=1
        if dt==len(data_type):
            dt=0
            ml+=1
            

In [141]:
total_performace_table

Unnamed: 0,Model,Accuracy [mean±std],Precision [mean±std],Sensitivity [mean±std],Specificity [mean±std],F1-score [mean±std],AUC [mean±std]
4,catboost-A,0.903±0.0,0.845±0.01,0.753±0.02,0.971±0.0,0.787±0.01,0.926±0.01
9,catboost-B,0.845±0.0,0.671±0.05,0.524±0.01,0.989±0.0,0.509±0.02,0.691±0.03
14,catboost-C,0.846±0.01,0.657±0.06,0.519±0.01,0.989±0.0,0.501±0.01,0.683±0.03


In [143]:
overview_performace_table

Unnamed: 0,Model,Accuracy,Precision [macro],Sensitivity [macro],Specificity,F1-score [macro],AUC,TP / TN,FP / FN
0,catboost = CV:1 | DB:A,0.899477,0.846961,0.732121,0.974883,0.772203,0.923253,3242 / 35127,905 / 3383
1,catboost = CV:2 | DB:A,0.907127,0.858372,0.757626,0.974584,0.795365,0.933666,3583 / 35086,915 / 3044
2,catboost = CV:3 | DB:A,0.899873,0.832911,0.749229,0.967166,0.781651,0.917061,3489 / 34788,1181 / 3078
3,catboost = CV:4 | DB:A,0.899064,0.829399,0.749805,0.965719,0.780994,0.923584,3505 / 34734,1233 / 3060
4,catboost = CV:5 | DB:A,0.909451,0.85504,0.774213,0.970688,0.806519,0.932285,3846 / 35003,1057 / 2811
5,catboost = CV:1 | DB:B,0.843112,0.641323,0.516111,0.98984,0.495739,0.662932,279 / 35560,365 / 6304
6,catboost = CV:2 | DB:B,0.845594,0.685973,0.523774,0.990332,0.509315,0.704047,377 / 35543,347 / 6212
7,catboost = CV:3 | DB:B,0.841862,0.622779,0.516589,0.987161,0.497959,0.680876,301 / 35367,460 / 6240
8,catboost = CV:4 | DB:B,0.844732,0.658091,0.519068,0.989896,0.501281,0.67028,315 / 35467,362 / 6215
9,catboost = CV:5 | DB:B,0.850916,0.748034,0.542972,0.990066,0.542455,0.735311,635 / 35580,357 / 5988
