# Imports

In [1]:
import pandas as pd
import numpy as np
import statsmodels.stats.inter_rater as IR
from nltk.metrics.agreement import AnnotationTask
import pingouin as pg

# Selected Frames

In [2]:
df = pd.read_csv('Selction_Frame.csv', sep=';')
df = df.set_index(df['Question']).drop(columns=['Question'])

In [3]:
df['model_label'] = df['flow_prediction'].apply(lambda x: int(x>=0.5))
df.head()

Unnamed: 0_level_0,flow_prediction,file_path,filename,vidnr,flow_annotated,model_eq_label,control_flow,control_no_flow,model_label
Question,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0.588452,[b'data/processed/YouTube/img/35_1486_F_0_.jpg'],35_1486_F_0_.jpg,35,0,False,False,True,1
2,0.701101,[b'data/processed/YouTube/img/7_3413_F_1_.jpg'],7_3413_F_1_.jpg,7,1,True,True,False,1
3,0.586361,[b'data/processed/YouTube/img/59_4840_F_1_.jpg'],59_4840_F_1_.jpg,59,1,True,False,False,1
4,0.574279,[b'data/processed/YouTube/img/1_222_F_1_.jpg'],1_222_F_1_.jpg,1,1,True,False,False,1
5,0.281444,[b'data/processed/YouTube/img/45_3484_F_0_.jpg'],45_3484_F_0_.jpg,45,0,True,False,False,0


In [4]:
dupli = df['file_path']
dupli = dupli[dupli.duplicated(keep=False)]
dupli = dupli.groupby(list(dupli)).apply(lambda x: tuple(x.index)).tolist()

In [5]:
control = [x[1] for x in dupli]

In [6]:
df.drop(control)['file_path'][df.drop(control)['file_path'].duplicated(keep=False)]

Series([], Name: file_path, dtype: object)

# Survey Answers

In [7]:
survey_df = pd.read_csv('responses.csv')

In [8]:
# Transpose and Extract Survey Answers
survey_answers = survey_df.transpose().iloc[1:151].reset_index(drop=True)
survey_answers = survey_answers.add_suffix('_survey')
survey_answers.index = list(range(1,151))
survey_answers.tail()

Unnamed: 0,0_survey,1_survey,2_survey,3_survey,4_survey,5_survey,6_survey
146,0,0,0,0,0,0,0
147,1,1,1,0,1,0,1
148,1,0,1,0,0,0,1
149,1,1,1,0,1,1,1
150,1,0,0,0,1,0,1


# Combined

In [9]:
cols_to_keep = ['model_label', 'flow_annotated']
answers = pd.merge(df[cols_to_keep], survey_answers, left_index=True, right_index=True)
answers.tail()

Unnamed: 0_level_0,model_label,flow_annotated,0_survey,1_survey,2_survey,3_survey,4_survey,5_survey,6_survey
Question,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
146,0,0,0,0,0,0,0,0,0
147,1,1,1,1,1,0,1,0,1
148,1,1,1,0,1,0,0,0,1
149,1,1,1,1,1,0,1,1,1
150,0,1,1,0,0,0,1,0,1


In [10]:
cols_drop = ['model_label']

In [11]:
answers_control = answers.loc[control,:]
answers_no_control = answers.drop(control).drop(columns=cols_drop)
answers=answers.drop(columns=cols_drop)

In [12]:
answers.head()

Unnamed: 0_level_0,flow_annotated,0_survey,1_survey,2_survey,3_survey,4_survey,5_survey,6_survey
Question,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,0,0,1,1,0,1,0,0
2,1,1,0,1,1,1,1,1
3,1,1,1,1,1,1,1,1
4,1,0,1,1,0,1,1,0
5,0,0,1,1,0,1,0,0


# Kippendorf Alpha, Fleiss, Scotts

In [13]:
kappa_data = answers_no_control.copy()

In [14]:
formatted_codes=list()
for rater in kappa_data.columns:
    for row in kappa_data.index:
        formatted_codes.append([rater, row, kappa_data.loc[row, rater]])
        
complete = AnnotationTask(data=formatted_codes)

print('Krippendorff\'s alpha:',complete.alpha()*100)
print('Scott\'s pi:',complete.pi())
print('Fleiss\'s Kappa:',round(complete.multi_kappa(),4))

Krippendorff's alpha: 30.653517967677203
Scott's pi: 0.30566726375646736
Fleiss's Kappa: 0.3117


# Pairwise Agreement

In [15]:
pe_pa = list()
for rater in kappa_data.drop(columns=['flow_annotated']).columns:
    pe_pa.append(complete.Ao('flow_annotated',rater)*100)
    print(rater, complete.Ao('flow_annotated',rater)*100)

0_survey 73.0
1_survey 68.0
2_survey 61.0
3_survey 70.0
4_survey 74.0
5_survey 73.0
6_survey 74.0


In [16]:
pd.DataFrame({'a':pe_pa}).describe()

Unnamed: 0,a
count,7.0
mean,70.428571
std,4.720775
min,61.0
25%,69.0
50%,73.0
75%,73.5
max,74.0


# Total Agreement with flow_annotated

In [17]:
kappa_data.apply(lambda x: sum(x['flow_annotated'] == x[1:]), axis=1).value_counts(normalize=True, sort=False)*100

0     1.0
1     4.0
2     4.0
3     8.0
4    19.0
5    21.0
6    25.0
7    18.0
dtype: float64

In [18]:
kappa_data.apply(lambda x: sum(x['flow_annotated'] == x[1:]), axis=1).describe()

count    100.000000
mean       4.930000
std        1.659104
min        0.000000
25%        4.000000
50%        5.000000
75%        6.000000
max        7.000000
dtype: float64

# Majority Vote


In [19]:
crowd_labels = kappa_data.drop(columns=['flow_annotated']).sum(axis=1).apply(lambda x: (x>3)).apply(int)

In [20]:
sum(crowd_labels==kappa_data['flow_annotated'])

83

# Vitality Score

https://arxiv.org/pdf/1912.10107.pdf

In [21]:
for dropped_rater in kappa_data.columns:
    temp_codes=list()
    for rater in kappa_data.drop(columns=[dropped_rater]).columns:
        for row in kappa_data.index:
            temp_codes.append([rater, row, kappa_data.loc[row, rater]])
    vitality =round(complete.alpha()- AnnotationTask(data=temp_codes).alpha(),4)
    print(dropped_rater, vitality)

flow_annotated 0.0346
0_survey -0.015
1_survey 0.0069
2_survey -0.0342
3_survey -0.0192
4_survey 0.0249
5_survey 0.012
6_survey -0.0037


# Cronbach


In [22]:
pg.cronbach_alpha(data=kappa_data.transpose().apply(pd.to_numeric))

(0.9096065351925338, array([0.792, 0.978]))

# Backup

In [23]:
result = {
    'Question':[],
    'all':[],
    'flow_annotated':[],
    '0_survey':[],
    '1_survey':[],
    '2_survey':[],
    '3_survey':[],
    '4_survey':[],
    '5_survey':[],
    '6_survey':[]
}

for row in kappa_data.index:
    temp_all = list()
    dropped = dict()
    
    for rater in kappa_data.columns:
        temp_all.append([rater, row, kappa_data.loc[row, rater]])
        
        
        dropped[rater] = list()
        for other_rater in kappa_data.drop(columns=[rater]).columns:
            dropped[rater].append([other_rater, row, kappa_data.loc[row, other_rater]])
    
    ratingtask = AnnotationTask(data=temp_all)
    K_a_all = ratingtask.alpha()
    
    result['Question'].append(row)
    result['all'].append(K_a_all)
    
    for dropped_rater, codes in dropped.items():
        d_task = AnnotationTask(data=codes)
        vitality = K_a_all - d_task.alpha()
        result[dropped_rater].append(vitality)
pd.DataFrame(result)

Unnamed: 0,Question,all,flow_annotated,0_survey,1_survey,2_survey,3_survey,4_survey,5_survey,6_survey
0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0
2,3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
95,135,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96,136,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
97,137,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0
98,139,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
ratings=pd.DataFrame()
ratings['flow'] = kappa_data.sum(axis=1).astype(int)
ratings['no_flow'] = (len(kappa_data.columns)- ratings['flow']).astype(int)
ratings

Unnamed: 0_level_0,flow,no_flow
Question,Unnamed: 1_level_1,Unnamed: 2_level_1
1,3,5
2,7,1
3,8,0
4,5,3
5,3,5
...,...,...
135,5,3
136,3,5
137,7,1
139,8,0
