### Multilab - Cleanlab extension for multi-label multi-annotator dataset
This notebook demonstrates an approach to label quality and active learning for training classification models with multilab.

In [3]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt

Matplotlib is building the font cache; this may take a moment.


### Import dependencies and get data
Please install the dependencies specified in this [requirements.txt](Multilab/main/requirements.txt) file before running the notebook.
We load the following datafiles:

labels_multiannotator is a DataFrame that contains labels from multiple annotators for each example (different number of annotations per example)  

pred_probs are are the predicted probabilites for examples that have existing annotator labels or are the predicted probabilites for examples that do not have any annotator labels  


In [4]:
dataset = pd.read_csv("/Users/keerthanaponnuru/Downloads/Multilab-master/annotations.annotations.face 3.csv")

In [11]:
pred_probs_df = pd.read_csv("/Users/keerthanaponnuru/Downloads/Multilab-master/probs.csv")

In [12]:
labels_df=pd.read_csv("preds.csv")

In [None]:
#cleaning pred probs
pred_probs_df=pred_probs_df.drop(columns=['dataset'])

In [22]:
#get image name from path
pred_probs_df['image_name'] = pred_probs_df['path'].apply(lambda x: x.split('/')[-1])
labels_df['image_name'] = labels_df['path'].apply(lambda x: x.split('/')[-1])
dataset['image_name'] = dataset['filePathRaw'].apply(lambda x: x.split('/')[-1])

In [None]:
#retaining the images that are common in dataset and predicted probabilities
df1 = dataset[dataset['image_name'].isin(pred_probs_df['image_name'])]
df1=df1.drop(columns=[ 'smile','wrinkledForehead', 'filePathRaw','tool'])

In [44]:
#renaming columns for consistency
df1=df1.copy()
name_map={'Au10RaisingOfUpperLip':'AU10',
       'Au12PullingAtCornerLip':'AU12', 'Au20LipStretcher':'AU20', 'Au24LipPresser':'AU24',
       'Au25PartingLips':'AU25', 'Au26JawDrop':'AU26', 'Au27MouthStretch':'AU27',
       'Au43EyesClosed':'AU43', 'Au4BrowLowering':'AU4', 'Au6CheekRaising':'AU6',
       'Au7TightningOfEyelids':'AU7', 'Au9WrinklingOfNose':'AU9'}
df1.rename(columns=name_map, inplace=True)

In [46]:
#grouping by image_name
grouped_df = df1.groupby(['image_name','annotator'], as_index=False).first()

In [40]:
order_mapping = {value: index for index, value in enumerate(labels_df['image_name'])}
grouped_df['order'] = grouped_df['image_name'].map(order_mapping)

In [41]:
grouped_df = grouped_df.sort_values(by='order').drop(columns='order')

In [None]:
#grouping by annotators
grouped_df

Unnamed: 0,image_name,annotator,AU4,AU6,AU7,AU9,AU10,AU12,AU20,AU24,AU25,AU26,AU27,AU43
0,2021-06-23 12-17-44_000000000593.jpg,hannah_weisman,0,0,0,0,0,0,0,0,0,0,0,0
1,2021-06-23 12-17-44_000000000662.jpg,hannah_weisman,0,0,0,0,0,0,0,0,0,0,0,0
2,2021-06-23 12-17-44_000000000719.jpg,hannah_weisman,0,0,0,0,0,0,0,0,0,0,0,0
3,2021-06-26 11-35-02_000000000117.jpg,hannah_weisman,0,0,0,0,0,0,0,0,0,0,0,0
4,2021-06-26 11-35-02_000000000246.jpg,hannah_weisman,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14599,2022-03-30 17-14-24_000000001763.jpg,ali_aribah,0,0,0,0,0,0,0,0,1,0,0,0
14600,2022-03-30 17-14-24_000000001764.jpg,ali_aribah,0,0,0,0,0,0,0,0,0,1,0,0
14601,2022-03-30 17-14-24_000000001765.jpg,ali_aribah,0,0,0,0,0,0,0,0,0,1,0,0
14602,2022-03-30 17-14-24_000000001767.jpg,ali_aribah,0,0,0,0,0,0,0,0,1,0,0,0


In [48]:
df = grouped_df
annotators = df['annotator'].unique()
labels = df.columns[2:]

In [49]:
#replace with the labels in your dataset
AU=['AU4', 'AU6', 'AU7', 'AU9', 'AU10', 'AU12', 'AU20', 'AU24', 'AU25', 'AU26','AU27', 'AU43'] 

In [50]:
df

Unnamed: 0,image_name,annotator,AU4,AU6,AU7,AU9,AU10,AU12,AU20,AU24,AU25,AU26,AU27,AU43
0,2021-06-23 12-17-44_000000000593.jpg,hannah_weisman,0,0,0,0,0,0,0,0,0,0,0,0
1,2021-06-23 12-17-44_000000000662.jpg,hannah_weisman,0,0,0,0,0,0,0,0,0,0,0,0
2,2021-06-23 12-17-44_000000000719.jpg,hannah_weisman,0,0,0,0,0,0,0,0,0,0,0,0
3,2021-06-26 11-35-02_000000000117.jpg,hannah_weisman,0,0,0,0,0,0,0,0,0,0,0,0
4,2021-06-26 11-35-02_000000000246.jpg,hannah_weisman,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14599,2022-03-30 17-14-24_000000001763.jpg,ali_aribah,0,0,0,0,0,0,0,0,1,0,0,0
14600,2022-03-30 17-14-24_000000001764.jpg,ali_aribah,0,0,0,0,0,0,0,0,0,1,0,0
14601,2022-03-30 17-14-24_000000001765.jpg,ali_aribah,0,0,0,0,0,0,0,0,0,1,0,0
14602,2022-03-30 17-14-24_000000001767.jpg,ali_aribah,0,0,0,0,0,0,0,0,1,0,0,0


In [51]:
#converting the dataframe into a array of multiple given labels for each label per example with shape (N, M,K)
annotators = df['annotator'].unique()
indices = df.columns.difference(['image_name', 'annotator'])
classes = indices.reindex(AU)[0]  
num_samples = len(df['image_name'].unique())
num_annotators = len(annotators)
num_classes = len(classes)
labels_multiannotator = np.full((num_samples, num_annotators, num_classes), np.nan)

for i, row in df.iterrows():
    sample_idx = np.where(df['image_name'].unique() == row['image_name'])[0][0]
    annotator_idx = np.where(annotators == row['annotator'])[0][0]
    class_indices = [np.where(classes == col)[0][0] for col in classes]
    labels_multiannotator[sample_idx, annotator_idx, class_indices] = row[classes].values

In [66]:
pred_prob = pred_probs_df[pred_probs_df['image_name'].isin(df['image_name'])]

In [67]:
pred_prob=pred_prob.drop_duplicates(subset=['image_name'])

In [68]:
pred_prob=pred_prob.drop(columns=['path','AU1','AU2', 'AU14','AU15', 'AU17','AU23'])

In [69]:
pred_prob=pred_prob.drop(columns=['image_name'])

In [70]:
pred_probs=pred_prob.iloc[:, 0:].values

### Checking format and shape of parameters
 labels_multiannotator : 3D pandas DataFrame or array of multiple given labels per class for each example with shape (N, M, K)  
   
 N is the number of examples, M is the number of annotators. labels_multiannotator[n][m][k] - label for n-th example given by m-th annotator for k-th class.  
 
 For a dataset with K classes, each given label must be binary either 0(absent), 1(present) or NaN if this annotator did not label a particular example.  
        
pred_probs : np.ndarray
        An array of shape (N, K) of predicted class probabilities from a trained classifier model.

In [407]:
labels_multiannotator[0][1]

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])

In [842]:
labels_multiannotator.shape #shape is (N,M,K)

(12549, 7, 12)

In [73]:
pred_probs[0]

array([0.00319901, 0.00647417, 0.00382968, 0.00013262, 0.00198555,
       0.00170082, 0.00148687, 0.11618687, 0.09490481, 0.04801873,
       0.02248868, 0.00936568])

In [74]:
pred_probs.shape #shape is (N,K)

(12549, 12)

### Import and run code

In [902]:
import sys
sys.path.append('/Users/keerthanaponnuru/Downloads/Multilab-master/main')

In [906]:
import main

from main import multiannotators
from main import multiannotator_utils

from main.multiannotators import get_label_quality_multiannotator
from main.multiannotators import get_multilabel_active_learning_scores
from main.multiannotators import get_majority_vote_label

In [938]:
import importlib
importlib.reload(multiannotators)

<module 'main.multiannotators' from '/Users/keerthanaponnuru/Downloads/Multilab-master/main/multiannotators.py'>

In [937]:
results=get_label_quality_multiannotator(labels_multiannotator,pred_probs,consensus_method = "majority_vote",quality_method = "crowdlab",verbose=False)

TypeError: get_majority_vote_label() missing 1 required positional argument: 'annotator_agreement_with_annotators'

In [927]:
results["label_quality"]

Unnamed: 0,num_annotations,consensus_label,annotator_agreement,consensus_quality_score
0,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.999181
1,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.996293
2,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.996436
3,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.999290
4,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.998118
...,...,...,...,...
12544,1,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.207891
12545,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.208352
12546,1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.208068
12547,1,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",0.207774


In [861]:
results["detailed_label_quality"]

Unnamed: 0,Quality_hannah_weisman,Quality_ezequiel_bautista,Quality_kaelynn_rodriguez,Quality_lucas_geraldi,Quality_jay_maisuria,Quality_william_kratochvil,Quality_ali_aribah
0,0.999181,,,,,,
1,0.996293,,,,,,
2,0.996436,,,,,,
3,0.999290,,,,,,
4,0.998118,,,,,,
...,...,...,...,...,...,...,...
12544,,,,,,,0.207891
12545,,,,,,,0.208352
12546,,,,,,,0.208068
12547,,,,,,,0.207774


In [862]:
results["annotator_stats"]

Unnamed: 0,annotator_quality,agreement_with_consensus,worst_class,num_of_examples_labeled
ali_aribah,0.874497,0.979855,AU24,848
hannah_weisman,0.939333,0.992706,AU25,11734
kaelynn_rodriguez,0.950956,0.952495,AU4,942
lucas_geraldi,0.959549,0.961028,AU25,201
ezequiel_bautista,0.970369,0.971742,AU43,867
jay_maisuria,0.970984,0.972222,AU25,6
william_kratochvil,0.998705,1.0,AU4,6


### Obtaining active learning score

We get the active learning scores for each datapoint (both labeled and unlabeled) by using a combination of the annotators' agremeent and model confidence. These scores represent how confident we are about an example's true label based on the currently obtained annotations; examples with the lowest scores are those for which additional labels should be collected (i.e. likely the most informative). These scores are estimated via an ActiveLab algorithm developed by the Cleanlab team, and may sometimes prioritize an already-labeled example over an unlabeled example if the annotations for the labeled example are deemed unreliable (ActiveLab appropriately estimates the value of collecting new annotations for unlabeled data vs already-labeled data).

Similar to above, the labels_multiannotator DataFrame here should only include examples that have received at least one annotation (labels_multiannotator should have the same number of rows as pred_probs). This method returns one of the two dataframes: active_learning_scores represents the scores for examples with existing annotations, and active_learning_scores_unlabeled represents the scores for examples with no annotations so far.

In [917]:
from main.multiannotators import get_active_learning_scores

In [918]:
active_learning_scores_labelled, relabelling_labeled = get_active_learning_scores(labels_multiannotator,pred_probs, None)

version3


In [922]:
active_learning_scores_labelled

Unnamed: 0,AU4,AU6,AU7,AU9,AU10,AU12,AU20,AU24,AU25,AU26,AU27,AU43
0,0.999309,0.999338,0.999324,0.999324,0.999339,0.999315,0.999325,0.999675,0.999559,0.999415,0.999372,0.999292
1,0.999277,0.999286,0.999271,0.999269,0.999284,0.999257,0.999360,0.999218,0.999582,0.999585,0.999267,0.999311
2,0.999290,0.999304,0.999283,0.999280,0.999294,0.999271,0.999361,0.999231,0.999580,0.999607,0.999280,0.999311
3,0.999421,0.999344,0.999357,0.999349,0.999350,0.999329,0.999338,0.999798,0.999346,0.999399,0.999339,0.999303
4,0.999072,0.999099,0.999091,0.999100,0.999112,0.999084,0.999095,0.999039,0.999074,0.999197,0.999147,0.999031
...,...,...,...,...,...,...,...,...,...,...,...,...
12544,0.578509,0.591615,0.587267,0.590681,0.597813,0.584623,0.589910,0.570306,0.566705,0.589438,0.595005,0.603015
12545,0.578549,0.591680,0.588884,0.590871,0.597889,0.584412,0.589958,0.566509,0.568580,0.610977,0.591937,0.620876
12546,0.578785,0.591869,0.587527,0.590995,0.598000,0.584835,0.590112,0.581782,0.584438,0.591096,0.603065,0.653358
12547,0.578313,0.591380,0.587300,0.590628,0.597610,0.584112,0.589711,0.565274,0.563061,0.585002,0.591137,0.582376


##### Images required to be re-labelled based on the criteria and threshold set in mutliannotators.get_active_learning_scores

In [920]:
relabelling_labeled

Unnamed: 0,Index,ClassLabel,ActiveLearningScore,QualityOfConsensusLabeled,AnnotatorLabels,MajorityLabel,Pred,ConfidenceScore,NormalizedSumAnnotatorWeights
2627,218,AU43,0.260896,0.055079,"[0.0, nan, nan, 1.0, nan, nan, nan]",1,0.960023,0.920047,0.497878
3503,291,AU43,0.249104,0.055758,"[1.0, nan, 0.0, nan, nan, nan, nan]",1,0.982531,0.965063,0.482335
3860,321,AU25,0.359855,0.207492,"[0.0, nan, nan, 1.0, nan, nan, nan]",1,0.839037,0.678075,0.491986
5311,442,AU24,0.436341,0.207223,"[0.0, nan, nan, 1.0, nan, nan, nan]",1,0.571928,0.143856,0.482760
8027,668,AU43,0.400967,0.214210,"[0.0, nan, 1.0, nan, nan, nan, nan]",0,0.333501,0.332998,0.482335
...,...,...,...,...,...,...,...,...,...
131999,10999,AU43,0.365518,0.211628,"[0.0, nan, nan, 1.0, nan, nan, nan]",1,0.840550,0.681100,0.497878
138179,11514,AU43,0.342679,0.212251,"[0.0, nan, nan, 1.0, nan, nan, nan]",1,0.975813,0.951626,0.497878
142736,11894,AU25,0.346748,0.208509,"[nan, nan, 1.0, nan, nan, nan, 0.0]",1,0.915049,0.830098,0.492577
144047,12003,AU43,0.452360,0.208924,"[nan, nan, 1.0, nan, nan, nan, 0.0]",1,0.542724,0.085449,0.484951


In [923]:
false_negatives= relabelling_labeled[(relabelling_labeled['MajorityLabel'] == 1) & (relabelling_labeled['Pred'] < 0.5)]

#### Unlabelled data

In [100]:
active_learning_scores_unlabeled, relabelling_unlabeled = get_active_learning_scores(None, None,pred_probs)

In [101]:
active_learning_scores_unlabeled

Unnamed: 0,AU4,AU6,AU7,AU9,AU10,AU12,AU20,AU24,AU25,AU26,AU27,AU43
0,0.739673,0.729100,0.737637,0.749572,0.743590,0.744509,0.745200,0.374930,0.443632,0.594988,0.677403,0.719766
1,0.708050,0.727116,0.735932,0.749732,0.745800,0.748605,0.631435,0.749408,0.407267,0.421254,0.749465,0.625935
2,0.704639,0.717417,0.735126,0.749745,0.747450,0.745345,0.641247,0.748813,0.412218,0.411768,0.748070,0.638161
3,0.609719,0.743918,0.712506,0.734353,0.749711,0.748164,0.749348,0.315462,0.661202,0.630455,0.748282,0.727803
4,0.747141,0.749511,0.745821,0.745815,0.749781,0.747325,0.749261,0.744711,0.693973,0.613511,0.692635,0.747538
...,...,...,...,...,...,...,...,...,...,...,...,...
12544,0.749422,0.749329,0.747909,0.749935,0.749462,0.747562,0.749476,0.729549,0.726386,0.713649,0.736801,0.649480
12545,0.749875,0.749697,0.744291,0.749981,0.749793,0.748653,0.749889,0.739706,0.722287,0.665043,0.745124,0.614338
12546,0.749249,0.749195,0.747767,0.749647,0.749496,0.747547,0.749478,0.702231,0.684813,0.710163,0.717382,0.556969
12547,0.749848,0.749854,0.747743,0.749989,0.749907,0.748797,0.749908,0.742220,0.735459,0.724363,0.746563,0.694650


In [102]:
relabelling_unlabeled

Unnamed: 0,Row_Index,AU,Predicted_Probability
0,0,AU24,0.374930
1,0,AU25,0.443632
2,1,AU25,0.407267
3,1,AU26,0.421254
4,2,AU25,0.412218
...,...,...,...
13228,12511,AU43,0.443640
13229,12520,AU26,0.486238
13230,12520,AU43,0.259947
13231,12526,AU43,0.399810


### Results
Lastly, we can obtain relevant findings of the examples by their active learning scores, and obtain the index of the examples with the lowest scores or by setting a threshold; these are the least confident examples which we will want to collect more labels for.


In [103]:
image_names = df['image_name'].unique()

In [104]:
relabelling_labeled['Image_Name']=image_names[relabelling_labeled['Index']]

In [105]:
relabelling_labeled

Unnamed: 0,Index,ClassLabel,ActiveLearningScore,QualityOfConsensusLabeled,AnnotatorLabels,CL,Pred,ConfidenceScore,NormalizedSumAnnotatorWeights,Image_Name
285,23,AU26,0.387681,0.288413,"[0.0, 1.0, nan, nan, nan, nan, nan]",0,0.013281,0.973437,0.499570,2021-06-26 11-35-02_000000000471.jpg
352,29,AU10,0.312791,0.132179,"[0.0, nan, 1.0, nan, nan, nan, nan]",0,0.000395,0.999211,0.498963,2021-06-26 11-35-02_000000000478.jpg
839,69,AU43,0.383267,0.294422,"[0.0, 1.0, nan, nan, nan, nan, nan]",0,0.009751,0.980499,0.492112,2021-07-20 20-02-15_000000000005.jpg
1411,117,AU24,0.270786,0.087907,"[1.0, 0.0, nan, nan, nan, nan, nan]",0,0.000173,0.999653,0.496328,2021-07-20 20-02-15_000000000068.jpg
1495,124,AU24,0.344932,0.144712,"[1.0, nan, nan, 0.0, nan, nan, nan]",0,0.226397,0.547206,0.482760,2021-07-20 20-02-15_000000000075.jpg
...,...,...,...,...,...,...,...,...,...,...
148014,12334,AU20,0.481658,0.489556,"[nan, nan, 1.0, nan, nan, nan, 0.0]",0,0.000378,0.999245,0.473185,2022-03-07 18-00-58_000000001203.jpg
148806,12400,AU20,0.493753,0.512929,"[nan, nan, 1.0, nan, nan, nan, 0.0]",0,0.000455,0.999089,0.473185,2022-03-07 18-00-58_000000001757.jpg
149756,12479,AU25,0.399075,0.342705,"[nan, nan, nan, 0.0, nan, nan, 1.0]",0,0.016159,0.967682,0.468114,2022-03-30 17-14-24_000000001648.jpg
149954,12496,AU7,0.440652,0.419281,"[nan, nan, nan, 1.0, nan, nan, 0.0]",0,0.014709,0.970583,0.463305,2022-03-30 17-14-24_000000001679.jpg


In [106]:
relabelling_unlabeled['Image_Name']=image_names[relabelling_unlabeled['Row_Index']]

In [107]:
relabelling_unlabeled

Unnamed: 0,Row_Index,AU,Predicted_Probability,Image_Name
0,0,AU24,0.374930,2021-06-23 12-17-44_000000000593.jpg
1,0,AU25,0.443632,2021-06-23 12-17-44_000000000593.jpg
2,1,AU25,0.407267,2021-06-23 12-17-44_000000000662.jpg
3,1,AU26,0.421254,2021-06-23 12-17-44_000000000662.jpg
4,2,AU25,0.412218,2021-06-23 12-17-44_000000000719.jpg
...,...,...,...,...
13228,12511,AU43,0.443640,2022-03-30 17-14-24_000000001701.jpg
13229,12520,AU26,0.486238,2022-03-30 17-14-24_000000001712.jpg
13230,12520,AU43,0.259947,2022-03-30 17-14-24_000000001712.jpg
13231,12526,AU43,0.399810,2022-03-30 17-14-24_000000001722.jpg
