### Multilab - Cleanlab extension for multi-label multi-annotator dataset
This notebook demonstrates an approach to label quality and active learning for training classification models with multilab.

In [67]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt

### Import dependencies and get data
Please install the dependencies specified in this [requirements.txt](Multilab/main/requirements.txt) file before running the notebook.
We load the following datafiles:

labels_multiannotator is a DataFrame that contains labels from multiple annotators for each example (different number of annotations per example)  

pred_probs are are the predicted probabilites for examples that have existing annotator labels or are the predicted probabilites for examples that do not have any annotator labels  


In [3]:
dataset = pd.read_csv("grouped_df.csv")

In [12]:
pred_probs_df = pd.read_csv("logs/2023-10-13T16-49-47_four/Predcition_prob.csv")

In [13]:
labels_df=pd.read_csv("logs/2023-10-13T16-49-47_four/Predcition_pred.csv")

In [14]:
#get image name from path
pred_probs_df['image_name'] = pred_probs_df['path'].apply(lambda x: x.split('/')[-1])

In [15]:
#get image name from path
labels_df['image_name'] = labels_df['path'].apply(lambda x: x.split('/')[-1])

In [16]:
#retaining the images that are common in dataset and predicted probabilities
df1 = dataset[dataset['image_name'].isin(pred_probs_df['image_name'])]
df1=df1.drop(columns=['comments','_id','date','end_time', 'filepath','start_time', 'time_in_seconds','no_particular_expression',
'patientID', 'smile', 'start_time', 'time_in_seconds','wrinkled_forehead', 'unclear'])

In [17]:
#renaming columns for consistency
name_map={'au10_raising_of_upper_lip':'AU10',
       'au12_pulling_at_corner_lip':'AU12', 'au20_lip_stretcher':'AU20', 'au24_lip_presser':'AU24',
       'au25_parting_lips':'AU25', 'au26_jaw_drop':'AU26', 'au27_mouth_stretch':'AU27',
       'au43_eyes_closed':'AU43', 'au4_brow_lowering':'AU4', 'au6_cheek_raising':'AU6',
       'au7_tightning_of_eyelids':'AU7', 'au9_wrinkling_of_nose':'AU9'}
df1.rename(columns=name_map, inplace=True)

In [18]:
#grouping by image_name
grouped_df = df1.groupby(['image_name','annotator'], as_index=False).first()
grouped_df

Unnamed: 0,image_name,annotator,AU10,AU12,AU20,AU24,AU25,AU26,AU27,AU43,AU4,AU6,AU7,AU9
0,2021-06-01 15-01-02_000000000519.jpg,babatundeshofolu,0,0,0,0,1,0,0,1,0,0,0,0
1,2021-06-01 15-01-02_000000000519.jpg,hannahweisman,0,0,0,0,1,0,0,1,0,0,0,0
2,2021-06-01 15-01-02_000000000519.jpg,jennifer.noa,0,0,0,0,1,0,0,1,0,0,0,0
3,2021-06-01 15-01-02_000000000519.jpg,rishika.patel@ufl.edu\health,0,0,0,0,1,0,0,1,0,0,0,0
4,2021-06-01 15-01-02_000000000525.jpg,babatundeshofolu,0,0,0,0,1,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128047,2022-03-30 17-14-24_000000001763.jpg,aribahali,0,0,0,0,1,0,0,0,0,0,0,0
128048,2022-03-30 17-14-24_000000001764.jpg,aribahali,0,0,0,0,0,1,0,0,0,0,0,0
128049,2022-03-30 17-14-24_000000001765.jpg,aribahali,0,0,0,0,0,1,0,0,0,0,0,0
128050,2022-03-30 17-14-24_000000001767.jpg,aribahali,0,0,0,0,1,0,0,0,0,0,0,0


In [19]:
#drop the uncommon AU's
labels_df=labels_df.drop(columns=['path','AU1','AU2', 'AU14','AU15', 'AU17','AU23'])

In [20]:
order_mapping = {value: index for index, value in enumerate(labels_df['image_name'])}
grouped_df['order'] = grouped_df['image_name'].map(order_mapping)

In [21]:
grouped_df = grouped_df.sort_values(by='order').drop(columns='order')

In [23]:
grouped_df

Unnamed: 0,image_name,annotator,AU10,AU12,AU20,AU24,AU25,AU26,AU27,AU43,AU4,AU6,AU7,AU9
79919,2022-01-29 08-54-02_000000000260.jpg,hannahweisman,0,0,0,0,1,0,0,1,0,0,0,0
79918,2022-01-29 08-54-02_000000000260.jpg,ezequielbautista,0,0,0,0,1,0,0,1,0,0,0,0
79920,2022-01-29 08-54-02_000000000260.jpg,kmaisuria,0,0,0,0,0,1,0,1,0,0,0,0
79921,2022-01-29 08-54-02_000000000260.jpg,wkratochvil,0,0,0,0,0,1,0,1,0,0,0,0
79924,2022-01-29 09-09-07_000000000184.jpg,kmaisuria,0,0,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71516,2022-01-20 08-37-38_000000000430.jpg,hannahweisman,0,0,0,0,1,0,0,1,0,0,0,0
71517,2022-01-20 08-37-38_000000000459.jpg,hannahweisman,0,0,0,0,0,0,0,1,0,0,0,0
71518,2022-01-20 08-37-38_000000000460.jpg,hannahweisman,0,0,0,0,1,0,0,1,0,0,0,0
71519,2022-01-20 08-37-38_000000000462.jpg,hannahweisman,0,0,0,0,1,0,0,1,0,0,0,0


In [25]:
df = grouped_df
annotators = df['annotator'].unique()
labels = df.columns[2:]

In [29]:
AU=['AU4', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU20', 'AU24', 'AU25', 'AU26','AU27', 'AU43'] #replace with the labels in your dataset

In [31]:
df

Unnamed: 0,image_name,annotator,AU4,AU6,AU7,AU9,AU10,AU12,AU20,AU24,AU25,AU26,AU27,AU43
79919,2022-01-29 08-54-02_000000000260.jpg,hannahweisman,0,0,0,0,0,0,0,0,1,0,0,1
79918,2022-01-29 08-54-02_000000000260.jpg,ezequielbautista,0,0,0,0,0,0,0,0,1,0,0,1
79920,2022-01-29 08-54-02_000000000260.jpg,kmaisuria,0,0,0,0,0,0,0,0,0,1,0,1
79921,2022-01-29 08-54-02_000000000260.jpg,wkratochvil,0,0,0,0,0,0,0,0,0,1,0,1
79924,2022-01-29 09-09-07_000000000184.jpg,kmaisuria,0,0,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71516,2022-01-20 08-37-38_000000000430.jpg,hannahweisman,0,0,0,0,0,0,0,0,1,0,0,1
71517,2022-01-20 08-37-38_000000000459.jpg,hannahweisman,0,0,0,0,0,0,0,0,0,0,0,1
71518,2022-01-20 08-37-38_000000000460.jpg,hannahweisman,0,0,0,0,0,0,0,0,1,0,0,1
71519,2022-01-20 08-37-38_000000000462.jpg,hannahweisman,0,0,0,0,0,0,0,0,1,0,0,1


In [33]:
#converting the dataframe into a array of multiple given labels for each label per example with shape (N, M,K)
annotators = df['annotator'].unique()
indices = df.columns.difference(['image_name', 'annotator'])
classes = indices.reindex(AU)[0]  
num_samples = len(df['image_name'].unique())
num_annotators = len(annotators)
num_classes = len(classes)
labels_multiannotator = np.full((num_samples, num_annotators, num_classes), np.nan)

for i, row in df.iterrows():
    sample_idx = np.where(df['image_name'].unique() == row['image_name'])[0][0]
    annotator_idx = np.where(annotators == row['annotator'])[0][0]
    class_indices = [np.where(classes == col)[0][0] for col in classes]
    labels_multiannotator[sample_idx, annotator_idx, class_indices] = row[classes].values

In [36]:
pred_prob = pred_probs_df[pred_probs_df['image_name'].isin(df['image_name'])]

In [37]:
pred_prob=pred_prob.drop_duplicates(subset=['image_name'])

In [38]:
pred_prob=pred_prob.drop(columns=['path','AU1','AU2', 'AU14','AU15', 'AU17','AU23'])

In [39]:
pred_prob=pred_prob.drop(columns=['image_name'])

In [43]:
pred_probs=pred_prob.iloc[:, 0:].values

### Checking format and shape of parameters
 labels_multiannotator : 3D pandas DataFrame or array of multiple given labels per class for each example with shape (N, M, K)  
   
 N is the number of examples, M is the number of annotators. labels_multiannotator[n][m][k] - label for n-th example given by m-th annotator for k-th class.  
 
 For a dataset with K classes, each given label must be binary either 0(absent), 1(present) or NaN if this annotator did not label a particular example.  
        
pred_probs : np.ndarray
        An array of shape (N, K) of predicted class probabilities from a trained classifier model.

In [64]:
labels_multiannotator[0]

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  1.],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]])

In [48]:
labels_multiannotator.shape #shape is (N,M,K)

(54714, 9, 12)

In [66]:
pred_probs[0]

array([0.00962529, 0.00341886, 0.00177602, 0.00043265, 0.00053205,
       0.00328887, 0.00169592, 0.0208411 , 0.02978184, 0.01950531,
       0.20444597, 0.01785214])

In [47]:
pred_probs.shape #shape is (N,K)

(54714, 12)

### Import and run code

In [49]:
from main import multiannotators
from multiannotators import get_label_quality_multiannotator

  from .autonotebook import tqdm as notebook_tqdm


In [50]:
import imp
imp.reload(multiannotators)

  import imp


<module 'multiannotators' from '/home/ponnurus/AUDetection-lightning/multiannotators.py'>

In [51]:
results=get_label_quality_multiannotator(labels_multiannotator,pred_probs,consensus_method = "majority_vote",quality_method = "crowdlab",verbose=False)

In [52]:
results["label_quality"]["consensus_quality_score"]

0        0.383235
1        0.515467
2        0.725891
3        0.478902
4        0.873535
           ...   
54709    0.458766
54710    0.867633
54711    0.383977
54712    0.449378
54713    0.371080
Name: consensus_quality_score, Length: 54714, dtype: float64

In [53]:
results["annotator_stats"]

Unnamed: 0,annotator_quality,agreement_with_consensus,num_of_exampels_labelled
ezequielbautista,0.787998,0.787998,1204
hannahweisman,0.860456,0.860457,3269
kaelynnrodriguez,0.877053,0.877053,9923
aribahali,0.904396,0.913755,17714
rishika.patel@ufl.edu\health,0.923018,0.923019,5930
babatundeshofolu,0.924886,0.924886,20632
wkratochvil,0.939487,0.96046,45039
jennifer.noa,0.953227,0.953227,8160
kmaisuria,0.958181,0.958181,16181


### Obtaining active learning score

We get the active learning scores for each datapoint (both labeled and unlabeled) by using a combination of the annotators' agremeent and model confidence. These scores represent how confident we are about an example's true label based on the currently obtained annotations; examples with the lowest scores are those for which additional labels should be collected (i.e. likely the most informative). These scores are estimated via an ActiveLab algorithm developed by the Cleanlab team, and may sometimes prioritize an already-labeled example over an unlabeled example if the annotations for the labeled example are deemed unreliable (ActiveLab appropriately estimates the value of collecting new annotations for unlabeled data vs already-labeled data).

Similar to above, the labels_multiannotator DataFrame here should only include examples that have received at least one annotation (labels_multiannotator should have the same number of rows as pred_probs). This method returns one of the two dataframes: active_learning_scores represents the scores for examples with existing annotations, and active_learning_scores_unlabeled represents the scores for examples with no annotations so far.

In [54]:
from multiannotators import get_active_learning_scores

In [55]:
import imp
imp.reload(multiannotators)

<module 'multiannotators' from '/home/ponnurus/AUDetection-lightning/multiannotators.py'>

In [56]:
column_name=['AU4', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU20', 'AU24', 'AU25', 'AU26','AU27', 'AU43']

In [57]:
active_learning_scores= get_active_learning_scores(labels_multiannotator,pred_probs, None,column_name)

In [58]:
active_learning_scores

Unnamed: 0,AU4,AU6,AU7,AU9,AU10,AU12,AU20,AU24,AU25,AU26,AU27,AU43
0,0.619422,0.624329,0.625628,0.626690,0.998926,0.993412,0.625691,0.610555,0.506242,0.960706,0.465404,0.519698
1,0.706572,0.706909,0.689189,0.705619,0.999467,0.996132,0.706889,0.689597,0.696428,0.994309,0.706253,0.546671
2,0.833510,0.833983,0.783315,0.834174,0.998912,0.995485,0.834060,0.826635,0.794973,0.998783,0.833135,0.474916
3,0.669159,0.682863,0.593585,0.684491,0.999671,0.995341,0.683989,0.659051,0.536766,0.953165,0.682875,0.700518
4,0.914878,0.921079,0.922453,0.922803,0.999491,0.996736,0.922408,0.875625,0.894174,0.997225,0.922588,0.962588
...,...,...,...,...,...,...,...,...,...,...,...,...
54709,0.666753,0.670474,0.671441,0.671916,0.981322,0.994080,0.665226,0.403166,0.521079,0.898190,0.669189,0.678015
54710,0.917620,0.919761,0.919764,0.919720,0.999420,0.997256,0.919277,0.883785,0.875861,0.961017,0.910288,0.884032
54711,0.627287,0.625733,0.626968,0.627449,0.998762,0.996850,0.627035,0.581895,0.542570,0.940850,0.618491,0.382151
54712,0.663857,0.666632,0.666259,0.666305,0.995527,0.996345,0.665010,0.659515,0.561601,0.992634,0.666815,0.600494


In [59]:
active_learning_scores_unlabeled= get_active_learning_scores(None, None,pred_probs,column_name)

In [60]:
active_learning_scores_unlabeled

Unnamed: 0,AU4,AU6,AU7,AU9,AU10,AU12,AU20,AU24,AU25,AU26,AU27,AU43
0,0.526300,0.536209,0.538831,0.540976,0.540817,0.536416,0.538959,0.508395,0.494122,0.510528,0.215280,0.513167
1,0.540563,0.541445,0.495165,0.538074,0.541124,0.537682,0.541392,0.496231,0.514071,0.535897,0.539730,0.178627
2,0.540127,0.541122,0.530032,0.541525,0.540765,0.537916,0.541284,0.525663,0.516501,0.540703,0.539339,0.105022
3,0.494083,0.535553,0.539751,0.540481,0.541283,0.536104,0.538961,0.463493,0.301084,0.485842,0.535589,0.487778
4,0.495293,0.528551,0.535920,0.537798,0.540591,0.534750,0.535679,0.284753,0.471760,0.535911,0.536646,0.462348
...,...,...,...,...,...,...,...,...,...,...,...,...
54709,0.539247,0.540760,0.541152,0.541345,0.538668,0.540717,0.538627,0.432141,0.490063,0.525329,0.540237,0.231713
54710,0.540313,0.541554,0.541555,0.541530,0.541534,0.541038,0.541273,0.520697,0.516103,0.532738,0.536062,0.105602
54711,0.541518,0.540323,0.541273,0.541643,0.541293,0.540712,0.541324,0.506617,0.524581,0.523726,0.534755,0.122234
54712,0.539641,0.541413,0.541175,0.541204,0.540540,0.540747,0.540377,0.536870,0.502679,0.539837,0.541530,0.093988


### Results
Lastly, we can obtain relevant findings of the examples by their active learning scores, and obtain the index of the examples with the lowest scores or by setting a threshold; these are the least confident examples which we will want to collect more labels for.


In [61]:
image_names = df['image_name'].unique()

In [63]:
mask = active_learning_scores < 0.5 #set a threshold
rows, cols = (mask & (active_learning_scores < 0.5)).values.nonzero()
for row, col in zip(rows, cols):
    print(f"Image: {image_names[row]}, Label: {active_learning_scores.columns[col]}, Value: {active_learning_scores.iloc[row, col]}")

Image: 2022-01-29 08-54-02_000000000260.jpg, Label: AU27, Value: 0.4654036388334286
Image: 2022-01-29 09-09-07_000000000728.jpg, Label: AU43, Value: 0.4749162732690343
Image: 2022-01-29 09-24-12_000000000168.jpg, Label: AU7, Value: 0.4537036218751608
Image: 2022-01-29 09-24-12_000000000168.jpg, Label: AU25, Value: 0.46351799458836035
Image: 2022-01-29 09-24-12_000000000278.jpg, Label: AU24, Value: 0.48570374462862426
Image: 2022-01-29 09-24-12_000000000278.jpg, Label: AU25, Value: 0.4803763102106576
Image: 2022-01-29 09-24-12_000000000305.jpg, Label: AU25, Value: 0.4942741770930204
Image: 2022-01-29 09-24-12_000000000308.jpg, Label: AU25, Value: 0.48354881274829425
Image: 2022-01-29 09-24-12_000000000315.jpg, Label: AU43, Value: 0.43951379100953075
Image: 2022-01-29 09-24-12_000000000469.jpg, Label: AU43, Value: 0.08454542879407033
Image: 2022-01-29 09-39-17_000000000872.jpg, Label: AU25, Value: 0.419222087883731
Image: 2022-01-29 09-39-17_000000000872.jpg, Label: AU43, Value: 0.391402