In [None]:
import pandas as pd
import glob
from sklearn.metrics import classification_report, roc_auc_score, roc_curve, auc, precision_recall_curve

In [4]:
def culc_integrate_upper_limit(run_name_list, target_emo, epoch=None):
    root_dir = "/mnt/iot-qnap3/mochida/medical-care/emotionestimation/reports/PIMD_A"
    preds = pd.DataFrame()
    
    for i, run_name in enumerate(run_name_list):
        if glob.glob(f"{root_dir}/{run_name}/epoch{epoch[i]}/*_pred_all.csv"):
            filepath = glob.glob(f"{root_dir}/{run_name}/epoch{epoch[i]}/*_pred_all.csv")[0]
            _pred = pd.read_csv(filepath)
        else:
            raise RuntimeError(f"{run_name} is not found")
        
        if i == 0:
            preds['img_path'] = _pred['img_path']
            preds['emo_gt'] = _pred['emo_gt']
            
        preds[f'{run_name}_emo_pred'] = _pred['emo_pred']
        preds[f'{run_name}_emo_pos'] = _pred['emo_pos']
        
    # extract column if column name has 'emo_pred'
    gt_and_pred = pd.DataFrame()
    gt_list = preds['emo_gt'].copy()
    if target_emo == 'comfort':
        gt_list = gt_list.replace(2, 0)
    elif target_emo == 'discomfort':
        gt_list = gt_list.replace(1, 0)
        gt_list = gt_list.replace(2, 1)
        
    gt_and_pred['emo_gt'] = gt_list
    gt_and_pred = gt_and_pred.join(preds.filter(like='emo_pred'))
    gt_and_pred['upper_limit'] = [0] * len(gt_and_pred)
    
    #gt_and_pred.iloc[i, 1:]の中で1つでもgt_and_pred["emo_gt"]と一致するものがあればgt_and_pred["upper_limit_emo_pred"] = gt_and_pred["emo_gt"]とする
    for i in range(len(gt_and_pred)):
        for j in range(1, len(gt_and_pred.columns)):
            if gt_and_pred.iloc[i, j] == gt_and_pred["emo_gt"][i]:
                gt_and_pred['upper_limit'][i] = gt_and_pred["emo_gt"][i]
                break
            else:
                gt_and_pred['upper_limit'][i] = gt_and_pred.iloc[i, 1:].max()
                
    # calculate metrics
    print(classification_report(gt_and_pred['emo_gt'], gt_and_pred['upper_limit']))
    
    print(f"roc_auc_score: {roc_auc_score(gt_and_pred['emo_gt'], gt_and_pred['upper_limit'])}")
    pre, rec, _ = precision_recall_curve(gt_and_pred['emo_gt'], gt_and_pred['upper_limit'])
    print(f"pr_auc_score: {auc(rec, pre)}")
        
    return gt_and_pred     

In [5]:
dis_run_name_list = ['4_d_a', '4_d_g', '4_d_h']

p = culc_integrate_upper_limit(dis_run_name_list, 'discomfort', epoch=[10, 10, 5])

              precision    recall  f1-score   support

           0       0.93      0.98      0.95       308
           1       0.98      0.92      0.95       308

    accuracy                           0.95       616
   macro avg       0.95      0.95      0.95       616
weighted avg       0.95      0.95      0.95       616

roc_auc_score: 0.952922077922078
pr_auc_score: 0.9718689614883386


In [6]:
com_run_name_list = ['4_c_a', '4_c_g', '4_c_h']

p = culc_integrate_upper_limit(com_run_name_list, 'comfort', epoch=[10, 10, 5])

              precision    recall  f1-score   support

           0       0.96      1.00      0.98       775
           1       1.00      0.96      0.98       717

    accuracy                           0.98      1492
   macro avg       0.98      0.98      0.98      1492
weighted avg       0.98      0.98      0.98      1492

roc_auc_score: 0.9790794979079498
pr_auc_score: 0.9891331172108988


In [61]:
def culc_integrate_average(run_name_list, target_emo, threshold=0.5):
    root_dir = "/mnt/iot-qnap3/mochida/medical-care/emotionestimation/reports/PIMD_A"
    preds = pd.DataFrame()
    
    for i, run_name in enumerate(run_name_list):
        if glob.glob(f"{root_dir}/{run_name}/*_pred_all.csv"):
            filepath = glob.glob(f"{root_dir}/{run_name}/*_pred_all.csv")[0]
            _pred = pd.read_csv(filepath)
        else:
            raise RuntimeError(f"{run_name} is not found")
        
        if i == 0:
            preds['img_path'] = _pred['img_path']
            preds['emo_gt'] = _pred['emo_gt']
            
        preds[f'{run_name}_emo_pred'] = _pred['emo_pred']
        preds[f'{run_name}_emo_pos'] = _pred['emo_pos']
        
    # extract column if column name has 'emo_pos' and calculate average, if average is over threshold, emo_pred is 1
    gt_and_pred = pd.DataFrame()
    gt_list = preds['emo_gt'].copy()
    if target_emo == 'comfort':
        gt_list = gt_list.replace(2, 0)
    elif target_emo == 'discomfort':
        gt_list = gt_list.replace(1, 0)
        gt_list = gt_list.replace(2, 1)
        
    gt_and_pred['emo_gt'] = gt_list
    gt_and_pred = gt_and_pred.join(preds.filter(like='emo_pos'))
    gt_and_pred['average_emo_pos'] = gt_and_pred.iloc[:, 1:].mean(axis=1)
    gt_and_pred['average_emo_pred'] = gt_and_pred['average_emo_pos'].apply(lambda x: 1 if x >= threshold else 0)
    
    # calculate metrics
    print(classification_report(gt_and_pred['emo_gt'], gt_and_pred['average_emo_pred']))
    print(f"roc_auc_score: {roc_auc_score(gt_and_pred['emo_gt'], gt_and_pred['average_emo_pred'])}")
    pre, rec, _ = precision_recall_curve(gt_and_pred['emo_gt'], gt_and_pred['average_emo_pred'])
    print(f"pr_auc_score: {auc(rec, pre)}")
    
    return gt_and_pred