In [1]:
#imports to work with...
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from torch.utils.data import DataLoader
import torch
import torchvision
from torchvision import transforms
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import label_binarize

from cl_framework.continual_learning.metrics.metric_evaluator_incdec import MetricEvaluatorIncDec
from cl_framework.utilities.matrix_logger import IncDecLogger
from torchmetrics import Recall

In [2]:
# this is used later to create the class_to_idx
def kinetics_classes(classes_csv):
    df = pd.read_csv(classes_csv)
    classes_behaviors = {}

    for _, row in df.iterrows():
        class_name = row['Class']
        subcategory = row['Subcategory']
        
        # Check if the class_name is already in the dictionary, if not, create a new entry
        if class_name not in classes_behaviors:
            classes_behaviors[class_name] = []
        
        # Add the subcategory to the corresponding class_name entry in the dictionary
        classes_behaviors[class_name].append(subcategory)

    return classes_behaviors

In [3]:
#create a mapping between classes - behaviors
folder_csv = '../Kinetics/Info/'
class_csv = os.path.join(folder_csv, 'classes.csv')
classes_behaviors = kinetics_classes(class_csv)

#create a index for each class -- {class: idx}
class_to_idx = {key: i for i, key in enumerate(classes_behaviors.keys())}


In [4]:
predictions_names = ['food', 'phone','smoking','fatigue','selfcare']
targets_names = ['target_food','target_phone','target_smoking','target_fatigue','target_selfcare']

all_behaviors_dict = {
    'food': [
        'eating burger', 'eating cake', 'eating carrots', 'eating chips', 'eating doughnuts',
        'eating hotdog', 'eating ice cream', 'eating spaghetti', 'eating watermelon',
        'sucking lolly', 'tasting beer', 'tasting food', 'tasting wine', 'sipping cup'
    ],
    'phone': [
        'texting', 'talking on cell phone', 'looking at phone'
    ],
    'smoking': [
        'smoking', 'smoking hookah', 'smoking pipe'
    ],
    'fatigue': [
        'sleeping', 'yawning', 'headbanging', 'headbutting', 'shaking head'
    ],
    'selfcare': [
        'scrubbing face', 'putting in contact lenses', 'putting on eyeliner', 'putting on foundation',
        'putting on lipstick', 'putting on mascara', 'brushing hair', 'brushing teeth', 'braiding hair',
        'combing hair', 'dyeing eyebrows', 'dyeing hair'
    ]
    }

criterion_type = 'multiclass'
#criterion_type = 'multilabel'
n_task = 6
total_classes = 5

In [5]:
out_path = '../tmp_results_data/'
task_dict = None
def extract_data(task_data, metric_eval):
    predictions_list = []
    targets_list = []
    for i in range(len(predictions_names)):
        if criterion_type == 'multilabel':
            class_targets = task_data[targets_names[i]].tolist()
            targets_list.append(class_targets)
        class_predictions = task_data[predictions_names[i]].tolist()
        predictions_list.append(class_predictions)
    
    predictions = torch.Tensor(predictions_list).permute(1,0)
    if criterion_type == 'multilabel':
        targets = torch.Tensor(targets_list).permute(1,0)
    else:
        targets = torch.Tensor(task_data['target'])

    subcategory = task_data['subcategory']
    data_path = task_data['video_path']

    if criterion_type == 'multilabel':
        binarized_targets = targets
        targets = torch.Tensor(torch.argmax(targets, axis=1))
    else:
        binarized_targets = torch.Tensor(label_binarize(targets, classes=[i for i in range(5)]))

    
    metric_eval.update(targets, binarized_targets, predictions, subcategory, data_path)

In [6]:
error_analysis_csv_path = '../runs_trainings/baseline_run/new_test/weighted/seed_2/incdec_khujcts6/error_analysis/'

In [7]:
logger = IncDecLogger(out_path=out_path, n_task=n_task, task_dict=task_dict, all_behaviors_dict = all_behaviors_dict, class_to_idx= class_to_idx, num_classes=total_classes, criterion_type=criterion_type)
for i in range(n_task):
    task_csv = error_analysis_csv_path + 'task_{}_test_error_analysis.csv'.format(str(i))
    task_data = pd.read_csv(task_csv)
    metric_eval = MetricEvaluatorIncDec('../random_tries/', num_classes=total_classes, criterion_type=criterion_type, all_behaviors_dict=all_behaviors_dict, class_to_idx=class_to_idx)
    extract_data(task_data,metric_eval)
    acc, ap, acc_per_class, mean_ap, map_weighted, precision_per_class, recall_per_class, exact_match, ap_per_subcategory, recall_per_subcategory, accuracy_per_subcategory = metric_eval.get(verbose=True)
    logger.update_accuracy(current_training_task_id=i, acc_value=acc, ap_value=ap, 
                           acc_per_class=acc_per_class, mean_ap=mean_ap, map_weighted=map_weighted, 
                           precision_per_class=precision_per_class, recall_per_class=recall_per_class, 
                           exact_match=exact_match, ap_per_subcategory=ap_per_subcategory, recall_per_subcategory=recall_per_subcategory, 
                           accuracy_per_subcategory=accuracy_per_subcategory)
    logger.update_forgetting(current_training_task_id=i)
    logger.print_latest(current_training_task_id=i)
logger.compute_average()
logger.print_file()

 - task accuracy: 0.4837837837837838
 - task average precision: tensor([0.6329, 0.1542, 0.1359, 0.3116, 0.6469])
 - task acc per class: [0.7, 0.8810810810810811, 0.8297297297297297, 0.827027027027027, 0.7297297297297297]
 - task precision per class: tensor([0.6239, 0.2083, 0.1633, 0.3542, 0.5758])
 - task recall per class: tensor([0.5214, 0.1667, 0.2667, 0.3400, 0.6333])
 - task mAP: 0.37630367279052734
 - task weighted mAP: 0.514910101890564

 >>> Test on task  0 : acc= 48.4%,  |  <<<
 - task accuracy: 0.44324324324324327
 - task average precision: tensor([0.6269, 0.2143, 0.1522, 0.2568, 0.6662])
 - task acc per class: [0.6891891891891891, 0.8162162162162162, 0.7918918918918919, 0.8189189189189189, 0.7702702702702703]
 - task precision per class: tensor([0.6506, 0.2121, 0.1948, 0.2927, 0.6699])
 - task recall per class: tensor([0.3857, 0.4667, 0.5000, 0.2400, 0.5750])
 - task mAP: 0.38326960802078247
 - task weighted mAP: 0.5176829695701599

 >>> Test on task  1 : acc= 44.3%,  |  <<<


In [8]:
val_logger = IncDecLogger(out_path=out_path, n_task=n_task, task_dict=task_dict, all_behaviors_dict = all_behaviors_dict, class_to_idx= class_to_idx, num_classes=total_classes, criterion_type=criterion_type, validation_mode=True)
for i in range(n_task):
    task_csv = error_analysis_csv_path + 'task_{}_validation_error_analysis.csv'.format(str(i))
    task_data = pd.read_csv(task_csv)
    metric_eval = MetricEvaluatorIncDec('../random_tries/', num_classes=total_classes, criterion_type=criterion_type, all_behaviors_dict=all_behaviors_dict, class_to_idx=class_to_idx)
    extract_data(task_data,metric_eval)
    acc, ap, acc_per_class, mean_ap, map_weighted, precision_per_class, recall_per_class, exact_match, ap_per_subcategory, recall_per_subcategory, accuracy_per_subcategory = metric_eval.get(verbose=True)
    val_logger.update_accuracy(current_training_task_id=i, acc_value=acc, ap_value=ap, 
                           acc_per_class=acc_per_class, mean_ap=mean_ap, map_weighted=map_weighted, 
                           precision_per_class=precision_per_class, recall_per_class=recall_per_class, 
                           exact_match=exact_match, ap_per_subcategory=ap_per_subcategory, recall_per_subcategory=recall_per_subcategory, 
                           accuracy_per_subcategory=accuracy_per_subcategory)
    val_logger.update_forgetting(current_training_task_id=i)
    val_logger.print_latest(current_training_task_id=i)
val_logger.compute_average()
val_logger.print_file()

 - task accuracy: 0.41081081081081083
 - task average precision: tensor([0.5686, 0.1525, 0.2019, 0.2648, 0.6739])
 - task acc per class: [0.6756756756756757, 0.7810810810810811, 0.8108108108108109, 0.8, 0.754054054054054]
 - task precision per class: tensor([0.6429, 0.1600, 0.2143, 0.2857, 0.6465])
 - task recall per class: tensor([0.3214, 0.4000, 0.5000, 0.3200, 0.5333])
 - task mAP: 0.3723345994949341
 - task weighted mAP: 0.498223215341568

 >>> Test on task  4 : acc= 41.1%,  |  <<<
 - task accuracy: 0.3972972972972973
 - task average precision: tensor([0.6105, 0.2198, 0.1913, 0.2821, 0.6792])
 - task acc per class: [0.6810810810810811, 0.7864864864864864, 0.7081081081081081, 0.845945945945946, 0.772972972972973]
 - task precision per class: tensor([0.6964, 0.1733, 0.1750, 0.3600, 0.6915])
 - task recall per class: tensor([0.2786, 0.4333, 0.7000, 0.1800, 0.5417])
 - task mAP: 0.3965661823749542
 - task weighted mAP: 0.5227201581001282

 >>> Test on task  5 : acc= 39.7%,  |  <<<
 - t

 - task accuracy: 0.32432432432432434
 - task average precision: tensor([0.6063, 0.2730, 0.1416, 0.2645, 0.6970])
 - task acc per class: [0.6459459459459459, 0.8351351351351352, 0.5972972972972973, 0.8054054054054054, 0.7648648648648648]
 - task precision per class: tensor([0.6098, 0.1961, 0.1394, 0.2800, 0.7619])
 - task recall per class: tensor([0.1786, 0.3333, 0.7667, 0.2800, 0.4000])
 - task mAP: 0.39647096395492554
 - task weighted mAP: 0.5248225927352905

 >>> Test on task  3 : acc= 32.4%,  |  <<<
 - task accuracy: 0.37027027027027026
 - task average precision: tensor([0.6048, 0.1996, 0.1419, 0.2685, 0.6603])
 - task acc per class: [0.6621621621621622, 0.7675675675675676, 0.7432432432432432, 0.8, 0.7675675675675676]
 - task precision per class: tensor([0.6190, 0.1957, 0.1649, 0.2600, 0.7500])
 - task recall per class: tensor([0.2786, 0.6000, 0.5333, 0.2600, 0.4250])
 - task mAP: 0.3750515878200531
 - task weighted mAP: 0.50700443983078

 >>> Test on task  4 : acc= 37.0%,  |  <<<
