In [1]:
#imports to work with...
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.metrics import precision_recall_curve, PrecisionRecallDisplay, average_precision_score
import torch

In [2]:
results_path = [
                '../runs_trainings/no_freeze/multilabel/weighted',
                '../runs_trainings/no_freeze/joint_incremental_multilabel/weighted',
                '../runs_trainings/no_freeze/incremental_decremental_multilabel/weighted',
                '../runs_trainings/no_freeze/decremental_multilabel/weighted',

                '../runs_trainings/freeze_backbone/baseline_multilabel/weighted/new',
                '../runs_trainings/freeze_backbone/joint_incremental_multilabel/weighted/new',
                '../runs_trainings/freeze_backbone/incremental_decremental_multilabel/weighted/new',
                '../runs_trainings/freeze_backbone/decremental_multilabel/weighted/new',

                '../runs_trainings/lwf/baseline/lambda_1',
                '../runs_trainings/lwf/joint_incremental/lambda_1',
                '../runs_trainings/lwf/incremental_decremental/lambda_1/temp_1',
                '../runs_trainings/lwf/decremental/lambda_1',

                '../runs_trainings/lwf/baseline/lambda_1',
                '../runs_trainings/fd/joint_incremental/lambda_01',
                '../runs_trainings/fd/incremental_decremental/lambda_01',
                '../runs_trainings/fd/decremental/lambda_01',
                ]
save_exp_name = [
                'no_freeze_baseline',
                'no_freeze_joint',
                'no_freeze_incdec',
                'no_freeze_dec',
                
                'freeze_baseline',
                'freeze_joint',
                'freeze_incdec',
                'freeze_dec',

                'lwf_baseline',
                'lwf_joint',
                'lwf_incdec',
                'lwf_dec',

                'fd_baseline',
                'fd_joint',
                'fd_incdec',
                'fd_dec',
                 ]


In [3]:
def extract_data(task_dataframe):
    predictions_names = ['food', 'phone','smoking','fatigue','selfcare']
    targets_names = ['target_food','target_phone','target_smoking','target_fatigue','target_selfcare']
    probabilities = []
    targets = []
    for i in range(len(predictions_names)):
        class_targets = task_dataframe[targets_names[i]].tolist()
        targets.append(class_targets)
        class_predictions = task_dataframe[predictions_names[i]].tolist()
        probabilities.append(class_predictions)
    
    
    return probabilities, targets


def extract_subcategory_data(task_dataframe, class_name, subcategory):
    predictions_names = ['food', 'phone','smoking','fatigue','selfcare']
    targets_names = ['target_food','target_phone','target_smoking','target_fatigue','target_selfcare']
    
    probabilities = []
    targets = []
    subcat_dataframe = task_dataframe[task_dataframe["subcategory"] == subcategory]
    idx_class = predictions_names.index(class_name)
    other_classes_dataframe = task_dataframe[task_dataframe[targets_names[idx_class]] != 1]
    new_dataframe = pd.concat([subcat_dataframe,other_classes_dataframe])
    for i in range(len(predictions_names)):
        class_targets = new_dataframe[targets_names[i]].tolist()
        targets.append(class_targets)
        class_predictions = new_dataframe[predictions_names[i]].tolist()
        probabilities.append(class_predictions)
    
    
    return probabilities, targets


def get_precision_recall_for_prcurve(probabilities, labels, num_classes):
     # precision recall curve
        Y = labels
        precision = dict()
        recall = dict()
        ap = dict()
        
        for i in range(num_classes):
            precision[i], recall[i], _ = precision_recall_curve(Y[:, i],probabilities[:, i])
            ap[i] = average_precision_score(Y[:, i],probabilities[:, i])


        return precision, recall, ap


def get_precision_recall_for_prcurve_subcat(probabilities, labels, num_classes, idx_class):
     # precision recall curve
        Y = labels
        precision = dict()
        recall = dict()
        ap = dict()
        for i in range(num_classes):
            precision[i], recall[i], _ = precision_recall_curve(Y[:, i],probabilities[:, i])
            ap[i] = average_precision_score(Y[:, i],probabilities[:, i])


        return precision[idx_class], recall[idx_class], ap[idx_class]


def plot_pr_curve(precision, recall, ap, num_tasks, idx_class, class_name, output_path):
        """
        Returns a matplotlib figure containing the plotted confusion matrix.
        """

        figure, ax = plt.subplots(figsize=(8, 8))
        for i in range(num_tasks):
            if i == 0 or i==num_tasks-1:
                display = PrecisionRecallDisplay(
                    recall=recall[i][idx_class],
                    precision=precision[i][idx_class],
                    average_precision=ap[i][idx_class],
                )
                if i==0:
                    display.plot(ax=ax, name=f"PR Curve for task_id {i} for class {class_name}")
                else:
                    display.plot(ax=ax, name=f"PR Curve for task_id {i} for class {class_name}", ls='--')
                ax.legend(loc=0)
        figure.savefig(output_path + '/' + class_name + '.png')
        
        plt.close(figure)


def plot_pr_curve_seeds(precision, recall, ap, seeds, num_tasks, idx_class, class_name, output_path):
        """
        Returns a matplotlib figure containing the plotted confusion matrix.
        """

        figure, ax = plt.subplots(figsize=(8, 8))
        for idx_seed in seeds:
            for i in range(num_tasks):
                if i == 0 or i==num_tasks-1:
                    display = PrecisionRecallDisplay(
                        recall=recall[idx_seed][i][idx_class],
                        precision=precision[idx_seed][i][idx_class],
                        average_precision=ap[idx_seed][i][idx_class],
                    )
                    if i==0:
                        display.plot(ax=ax, name=f"PR Curve for task_id {i} for class {class_name} seed {idx_seed}")
                    else:
                        display.plot(ax=ax, name=f"PR Curve for task_id {i} for class {class_name} seed {idx_seed}", ls='--')
                    ax.legend(loc=0)
        figure.savefig(output_path + '/' + class_name + '.png')
        
        plt.close(figure)

def plot_pr_curve_subcat(precision, recall, ap, num_tasks, subcat_name, output_path):
        """
        Returns a matplotlib figure containing the plotted confusion matrix.
        """

        figure, ax = plt.subplots(figsize=(8, 8))
        for i in range(num_tasks):
            if i == 0 or i==num_tasks-1:
                display = PrecisionRecallDisplay(
                    recall=recall[i],
                    precision=precision[i],
                    average_precision=ap[i],
                )
                display.plot(ax=ax, name=f"Precision-recall for task_id {i} for class {subcat_name}")
        figure.savefig(output_path + '/' + subcat_name + '.png')
        plt.close(figure)

In [4]:
#this is a version where i do it just for 3 seeds
seeds = [0,1,2]
output_path = '../statistics_to_save/pr_curves/three_seeds/'
for idx_exp in range (len(results_path)):  
    output_name_path = os.path.join(output_path,save_exp_name[idx_exp])
    if not os.path.exists(output_name_path):
            os.mkdir(output_name_path)

    precision = []
    recall = []
    ap = []
    
    precision_micro = []
    recall_micro = []
    for idx_seed in seeds:  
        seed_path = os.path.join(results_path[idx_exp],'seed_' + str(idx_seed))
        seed_precision = []
        seed_recall = []
        seed_ap = []
        for name_exp in os.listdir(seed_path):
            exp_path = os.path.join(seed_path,name_exp)
            ea_name = 'error_analysis'
            ea_path = os.path.join(exp_path,ea_name)
            for i in range(6):
                task_name = 'task_' + str(i) + '_test_error_analysis.csv'
                task_ea_path = os.path.join(ea_path,task_name)
                task_dataframe = pd.read_csv(task_ea_path)
                tmp_probabilities, tmp_targets = extract_data(task_dataframe)
                probabilities = torch.Tensor(tmp_probabilities).permute(1,0).numpy()
                targets = torch.Tensor(tmp_targets).permute(1,0).numpy()
                
                tmp_precision, tmp_recall, tmp_ap = get_precision_recall_for_prcurve(probabilities,targets,5)
                seed_precision.append(tmp_precision)
                seed_recall.append(tmp_recall)
                seed_ap.append(tmp_ap)
        precision.append(seed_precision)
        recall.append(seed_recall)
        ap.append(seed_ap)

    classes = ['food', 'phone','smoking','fatigue','selfcare']
    for idx_class in range(len(classes)):
        plot_pr_curve_seeds(precision, recall, ap, seeds, 6, idx_class, classes[idx_class], output_name_path)

In [5]:
#this is a version where i do it just for 1 seed each
seeds = [0,1,2]


    
for idx_exp in range (len(results_path)): 
    precision_micro = []
    recall_micro = []
    for idx_seed in seeds:  
        seed_path = os.path.join(results_path[idx_exp],'seed_' + str(idx_seed))
        output_path = '../statistics_to_save/pr_curves/{}_seed/'.format(idx_seed)
        if not os.path.exists(output_path):
            os.mkdir(output_path)
         
        output_name_path = os.path.join(output_path,save_exp_name[idx_exp])
        if not os.path.exists(output_name_path):
                os.mkdir(output_name_path)
        precision = []
        recall = []
        ap = []
        for name_exp in os.listdir(seed_path):
            exp_path = os.path.join(seed_path,name_exp)
            ea_name = 'error_analysis'
            ea_path = os.path.join(exp_path,ea_name)
            for i in range(6):
                task_name = 'task_' + str(i) + '_test_error_analysis.csv'
                task_ea_path = os.path.join(ea_path,task_name)
                task_dataframe = pd.read_csv(task_ea_path)
                tmp_probabilities, tmp_targets = extract_data(task_dataframe)
                probabilities = torch.Tensor(tmp_probabilities).permute(1,0).numpy()
                targets = torch.Tensor(tmp_targets).permute(1,0).numpy()
                
                tmp_precision, tmp_recall, tmp_ap = get_precision_recall_for_prcurve(probabilities,targets,5)
                precision.append(tmp_precision)
                recall.append(tmp_recall)
                ap.append(tmp_ap)

            classes = ['food', 'phone','smoking','fatigue','selfcare']
            for idx_class in range(len(classes)):
                plot_pr_curve(precision, recall, ap, 6, idx_class, classes[idx_class], output_name_path)

In [6]:
""" data_dict = {
    'food': [
        'eating burger', 'eating cake', 'eating carrots', 'eating chips', 'eating doughnuts',
        'eating hotdog', 'eating ice cream', 'eating spaghetti', 'eating watermelon',
        'sucking lolly', 'tasting beer', 'tasting food', 'tasting wine', 'sipping cup'
    ],
    'phone': [
        'texting', 'talking on cell phone', 'looking at phone'
    ],
    'smoking': [
        'smoking', 'smoking hookah', 'smoking pipe'
    ],
    'fatigue': [
        'sleeping', 'yawning', 'headbanging', 'headbutting', 'shaking head'
    ],
    'selfcare': [
        'scrubbing face', 'putting in contact lenses', 'putting on eyeliner', 'putting on foundation',
        'putting on lipstick', 'putting on mascara', 'brushing hair', 'brushing teeth', 'braiding hair',
        'combing hair', 'dyeing eyebrows', 'dyeing hair'
    ]
    } """

" data_dict = {\n    'food': [\n        'eating burger', 'eating cake', 'eating carrots', 'eating chips', 'eating doughnuts',\n        'eating hotdog', 'eating ice cream', 'eating spaghetti', 'eating watermelon',\n        'sucking lolly', 'tasting beer', 'tasting food', 'tasting wine', 'sipping cup'\n    ],\n    'phone': [\n        'texting', 'talking on cell phone', 'looking at phone'\n    ],\n    'smoking': [\n        'smoking', 'smoking hookah', 'smoking pipe'\n    ],\n    'fatigue': [\n        'sleeping', 'yawning', 'headbanging', 'headbutting', 'shaking head'\n    ],\n    'selfcare': [\n        'scrubbing face', 'putting in contact lenses', 'putting on eyeliner', 'putting on foundation',\n        'putting on lipstick', 'putting on mascara', 'brushing hair', 'brushing teeth', 'braiding hair',\n        'combing hair', 'dyeing eyebrows', 'dyeing hair'\n    ]\n    } "

In [7]:
""" #this is a version where i do it just for 1 seed each but for each subcategory
seeds = [0,1,2]


    
for idx_exp in range (len(results_path)): 
    precision_micro = []
    recall_micro = []
    for idx_seed in seeds:
        seed_path = os.path.join(results_path[idx_exp],'seed_' + str(idx_seed))
        output_path = '../statistics_to_save/pr_curves/{}_seed/'.format(idx_seed)
        if not os.path.exists(output_path):
            os.mkdir(output_path)

        subcat_path = os.path.join(output_path,'subcategories')
        if not os.path.exists(subcat_path):
            os.mkdir(subcat_path)
         
        output_name_path = os.path.join(subcat_path,save_exp_name[idx_exp])
        if not os.path.exists(output_name_path):
                os.mkdir(output_name_path)
        
        for name_exp in os.listdir(seed_path):
            exp_path = os.path.join(seed_path,name_exp)
            ea_name = 'error_analysis'
            ea_path = os.path.join(exp_path,ea_name)

            # now iterate over subcategories

            classes = ['food', 'phone','smoking','fatigue','selfcare']

            for class_name in classes:
                for subcat in data_dict[class_name]:
                    precision = []
                    recall = []
                    ap = []
                    for i in range(6):
                        task_name = 'task_' + str(i) + '_test_error_analysis.csv'
                        task_ea_path = os.path.join(ea_path,task_name)
                        task_dataframe = pd.read_csv(task_ea_path)
                        tmp_probabilities, tmp_targets = extract_subcategory_data(task_dataframe,class_name,subcat)
                        probabilities = torch.Tensor(tmp_probabilities).permute(1,0).numpy()
                        targets = torch.Tensor(tmp_targets).permute(1,0).numpy()
                        
                        tmp_precision, tmp_recall, tmp_ap = get_precision_recall_for_prcurve_subcat(probabilities,targets,len(classes),classes.index(class_name))
                        precision.append(tmp_precision)
                        recall.append(tmp_recall)
                        ap.append(tmp_ap)
                    plot_pr_curve_subcat(precision,recall,ap,6,subcat,output_name_path)
 """

" #this is a version where i do it just for 1 seed each but for each subcategory\nseeds = [0,1,2]\n\n\n    \nfor idx_exp in range (len(results_path)): \n    precision_micro = []\n    recall_micro = []\n    for idx_seed in seeds:\n        seed_path = os.path.join(results_path[idx_exp],'seed_' + str(idx_seed))\n        output_path = '../statistics_to_save/pr_curves/{}_seed/'.format(idx_seed)\n        if not os.path.exists(output_path):\n            os.mkdir(output_path)\n\n        subcat_path = os.path.join(output_path,'subcategories')\n        if not os.path.exists(subcat_path):\n            os.mkdir(subcat_path)\n         \n        output_name_path = os.path.join(subcat_path,save_exp_name[idx_exp])\n        if not os.path.exists(output_name_path):\n                os.mkdir(output_name_path)\n        \n        for name_exp in os.listdir(seed_path):\n            exp_path = os.path.join(seed_path,name_exp)\n            ea_name = 'error_analysis'\n            ea_path = os.path.join(exp_pat

In [8]:
""" seeds = [0,1,2]
#this is a version where i put all the examples from all three seeds
output_path = '../statistics_to_save/pr_curves/'
for idx_exp in range (len(results_path)):  
    output_name_path = os.path.join(output_path,save_exp_name[idx_exp])
    if not os.path.exists(output_name_path):
            os.mkdir(output_name_path)

    precision = []
    recall = []
    precision_micro = []
    recall_micro = []
    for i in range(6):
        task_name = 'task_' + str(i) + '_test_error_analysis.csv'
        probabilities = []
        targets = []
        
        for idx_seed in seeds:  
            seed_path = os.path.join(results_path[idx_exp],'seed_' + str(idx_seed))
            for name_exp in os.listdir(seed_path):
                exp_path = os.path.join(seed_path,name_exp)
                ea_name = 'error_analysis'
                ea_path = os.path.join(exp_path,ea_name)
                
                task_ea_path = os.path.join(ea_path,task_name)
                task_dataframe = pd.read_csv(task_ea_path)
                tmp_probabilities, tmp_targets = extract_data(task_dataframe)
                probabilities.append(torch.Tensor(tmp_probabilities).permute(1,0))
                targets.append(torch.Tensor(tmp_targets).permute(1,0))

        probabilities = torch.cat(probabilities,0).numpy()
        targets = torch.cat(targets,0).numpy()
        
        tmp_precision, tmp_recall = get_precision_recall_for_prcurve(probabilities,targets,5)
        precision.append(tmp_precision)
        recall.append(tmp_recall)



    classes = ['food', 'phone','smoking','fatigue','selfcare']
    for idx_class in range(len(classes)):
        plot_pr_curve(precision, recall, 6, idx_class, classes[idx_class], output_name_path) """

" seeds = [0,1,2]\n#this is a version where i put all the examples from all three seeds\noutput_path = '../statistics_to_save/pr_curves/'\nfor idx_exp in range (len(results_path)):  \n    output_name_path = os.path.join(output_path,save_exp_name[idx_exp])\n    if not os.path.exists(output_name_path):\n            os.mkdir(output_name_path)\n\n    precision = []\n    recall = []\n    precision_micro = []\n    recall_micro = []\n    for i in range(6):\n        task_name = 'task_' + str(i) + '_test_error_analysis.csv'\n        probabilities = []\n        targets = []\n        \n        for idx_seed in seeds:  \n            seed_path = os.path.join(results_path[idx_exp],'seed_' + str(idx_seed))\n            for name_exp in os.listdir(seed_path):\n                exp_path = os.path.join(seed_path,name_exp)\n                ea_name = 'error_analysis'\n                ea_path = os.path.join(exp_path,ea_name)\n                \n                task_ea_path = os.path.join(ea_path,task_name)\n  