In [66]:
from typing import List, Dict

import numpy as np
import pandas as pd
import glob
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns
from fitter import Fitter, get_common_distributions

from src.NoXi.preprocessing.labels_preprocessing import read_noxi_label_file, clean_labels

<h3> Functions for reading labels </h3>

In [67]:
# function for loading data
def load_annotation_files_by_paths(paths:List[str])->Dict[str, np.ndarray]:
    """Loads NoXi *annotation~ files using provided list of paths

    :param paths: List[str]
            Paths to files for laoding
    :return: List[np.ndarray]
            List of np.ndarrays with labels.
    """
    labels={}
    for path in paths:
        loaded_labels=read_noxi_label_file(path)
        labels[path]=loaded_labels
    return labels

In [68]:
# separate paths according to the filenames
def separate_paths_according_to_filenames(paths:List[str])->Dict[str, List[str]]:
    """Separates provided paths according to the filenames they contain.

    :param paths: List[str]
            List of paths
    :return: Dict[str, List[str]]
            Dictionary of paths in the format Dict[filename->full_path]
    """
    filename_to_paths={}
    for path in paths:
        # extract the filename
        filename=re.search(r"\d+_.*(Nottingham|Paris|Augsburg)", path)
        filename=filename.group()
        # take all paths with this filename
        filename_to_paths[filename]=[item for item in paths if filename in item]
    return filename_to_paths

In [69]:
path_to_labels=r'E:\Databases\NoXi\NoXi_annotations_all'
gold_standard_paths=glob.glob(os.path.join(path_to_labels,'**','**','engagement_novice.annotation~')) + glob.glob(os.path.join(path_to_labels,'**','**','engagement_expert.annotation~'))
all_paths=glob.glob(os.path.join(path_to_labels,'**','**','*.annotation~'))
all_experts_paths=glob.glob(os.path.join(path_to_labels,'**','**','*expert*.annotation~'))
all_novices_paths=glob.glob(os.path.join(path_to_labels,'**','**','*novice*.annotation~'))

In [70]:
# load labels files
gold_standard_labels=load_annotation_files_by_paths(gold_standard_paths)
all_labels=load_annotation_files_by_paths(all_paths)
all_experts_labels=load_annotation_files_by_paths(all_experts_paths)
all_novices_labels=load_annotation_files_by_paths(all_novices_paths)
# separate paths according to the filenames
gold_standard_paths_separated=separate_paths_according_to_filenames(gold_standard_paths)
all_paths_separated=separate_paths_according_to_filenames(all_paths)
all_experts_paths_separated=separate_paths_according_to_filenames(all_experts_paths)
all_novices_paths_separated=separate_paths_according_to_filenames(all_novices_paths)

<h3> Check the correlaction coefficients of annotations </h3>

In [106]:
expert_files_to_check=[item for item in all_paths_separated["084_2016-05-31_Augsburg"] if "expert" in item]
novice_files_to_check=[item for item in all_paths_separated["084_2016-05-31_Augsburg"] if "novice" in item]

In [107]:
# take loaded labels
loaded_labels_expert=[all_labels[key][:,0].reshape((-1,1)) for key in expert_files_to_check]
loaded_labels_novice=[all_labels[key][:,0].reshape((-1,1)) for key in novice_files_to_check]

In [108]:
loaded_labels_expert=pd.DataFrame(data=np.concatenate(loaded_labels_expert, axis=1), columns=[item.split(os.path.sep)[-1].split("_")[-1] for item in expert_files_to_check])
loaded_labels_novice=pd.DataFrame(data=np.concatenate(loaded_labels_novice, axis=1), columns=[item.split(os.path.sep)[-1].split("_")[-1] for item in novice_files_to_check])

In [109]:
loaded_labels_expert.corr()

Unnamed: 0,jorrit.annotation~,marc.annotation~,silvan.annotation~,wladislaw.annotation~
jorrit.annotation~,1.0,0.629407,0.439191,0.027911
marc.annotation~,0.629407,1.0,0.478344,-0.043209
silvan.annotation~,0.439191,0.478344,1.0,0.148129
wladislaw.annotation~,0.027911,-0.043209,0.148129,1.0


In [110]:
loaded_labels_novice.corr()

Unnamed: 0,jorrit.annotation~,marc.annotation~,silvan.annotation~,wladislaw.annotation~
jorrit.annotation~,1.0,0.825991,0.671234,0.226491
marc.annotation~,0.825991,1.0,0.600186,0.273559
silvan.annotation~,0.671234,0.600186,1.0,0.286381
wladislaw.annotation~,0.226491,0.273559,0.286381,1.0
