# Comparison against Human Reliability

In [1]:
import pandas as pd
import copy
from scipy import stats as st
import json

from memolon.src import utils, constants

In [2]:
def comparison_against_human_one_dataset(
              target_gold1,
              target_gold2,
              target_pred):
   
    rt = {}
    
    # intersect datasets 
    gold1 = copy.deepcopy(target_gold1)
    gold2 = copy.deepcopy(target_gold2)
    shared_rows = set(gold1.index).intersection(set(gold2.index))
    shared_rows = list(shared_rows.intersection(set(target_pred.index)))
    shared_columns = list(set(gold1.columns).intersection(set(gold2.columns)))
    
    target_pred = target_pred.loc[shared_rows, shared_columns]
    gold1 = gold1.loc[shared_rows, shared_columns]
    gold2 = gold2.loc[shared_rows, shared_columns]
    
    rt['overlap'] = len(shared_rows)
    
    
    if rt['overlap'] <= 50:
        # too little overlap, exclude pair of datasets
        return rt
    
    for c in shared_columns:
        rt[c] = {}
        rt[c]['g1vg2'] = st.pearsonr(gold1[c], gold2[c])[0]
        rt[c]['g1vpr'] = st.pearsonr(gold1[c], target_pred[c])[0]
        rt[c]['g2vpr'] = st.pearsonr(target_pred[c], gold2[c])[0]
    
    return rt

Run analysis for all datasets and save results to disk:

In [3]:
results = {}
for key1, value1 in utils.conditions.items():
    for key2, value2 in utils.conditions.items():
        print(key1, key2, end='\r')
        if (not key1 == key2) and (value1['emo'] == value2['emo']) and (value1['iso'] == value2['iso'])\
            and ((key2, key1) not in results.keys()):
            results[(key1, key2)] = comparison_against_human_one_dataset(target_gold1=value1['get'](),
                                                             target_gold2=value2['get'](),
                                                             target_pred=utils.get_TargetPred(value1['iso'], split='test'))
results  = {str(key): value for key, value in results.items()} # convert tuples into strings to store as json

with open(constants.COMPARISON_AGAINST_HUMAN_RELIABILITY, 'w') as f:
    json.dump(results, f)

en1 en1en1 en2

  exec(code_obj, self.user_global_ns, self.user_ns)


hr hr22

Loading stored results...

In [4]:
with open(constants.COMPARISON_AGAINST_HUMAN_RELIABILITY, 'r') as f:
     results = json.load(f)
results = {key:value for key, value in results.items() if value['overlap']>100}
results

{"('en1', 'en2')": {'overlap': 1032,
  'arousal': {'g1vg2': 0.7595171525999059,
   'g1vpr': 0.7606949829924109,
   'g2vpr': 0.7111269484013218},
  'valence': {'g1vg2': 0.9525522303285292,
   'g1vpr': 0.9405435609709295,
   'g2vpr': 0.9215537996480875},
  'dominance': {'g1vg2': 0.7944747393076227,
   'g1vpr': 0.8789710139173657,
   'g2vpr': 0.7818546349491288}},
 "('es1', 'es2')": {'overlap': 610,
  'arousal': {'g1vg2': 0.7580136953288056,
   'g1vpr': 0.7144294971195944,
   'g2vpr': 0.7248922028605523},
  'valence': {'g1vg2': 0.9755136936906599,
   'g1vpr': 0.9050852715975625,
   'g2vpr': 0.9119167020199449}},
 "('es2', 'es3')": {'overlap': 222,
  'arousal': {'g1vg2': 0.710390831345051,
   'g1vpr': 0.7244598469013257,
   'g2vpr': 0.6905900644162456},
  'valence': {'g1vg2': 0.9763604326812766,
   'g1vpr': 0.9060814638417142,
   'g2vpr': 0.9068022366205312}},
 "('de2', 'de3')": {'overlap': 498,
  'arousal': {'g1vg2': 0.7603380554631205,
   'g1vpr': 0.7206806675167734,
   'g2vpr': 0.663448

In [5]:
df = pd.DataFrame(columns=['G1', 'G2', '#', 'emo', 'G1vsG2', 'G1vsPred', 'G2vsPred'])
for key, value in results.items():
    gold1, gold2 = eval(key)
    for var in ['valence', 'arousal', 'dominance']:
        if var in value.keys():
            df.loc[len(df)] = [gold1, gold2, value['overlap'], var, value[var]['g1vg2'], value[var]['g1vpr'], value[var]['g2vpr']]
df = df.round(3)
df

Unnamed: 0,G1,G2,#,emo,G1vsG2,G1vsPred,G2vsPred
0,en1,en2,1032,valence,0.953,0.941,0.922
1,en1,en2,1032,arousal,0.76,0.761,0.711
2,en1,en2,1032,dominance,0.794,0.879,0.782
3,es1,es2,610,valence,0.976,0.905,0.912
4,es1,es2,610,arousal,0.758,0.714,0.725
5,es2,es3,222,valence,0.976,0.906,0.907
6,es2,es3,222,arousal,0.71,0.724,0.691
7,de2,de3,498,valence,0.963,0.806,0.812
8,de2,de3,498,arousal,0.76,0.721,0.663
9,pl1,pl2,445,valence,0.943,0.838,0.852


In [6]:
s = df.to_latex(index=False, float_format=lambda x: "{:.3f}".format(x).lstrip('0'))
s = s.replace('valence', 'V').replace('arousal', 'A').replace('dominance', 'D')
print(s)

\begin{tabular}{llllrrr}
\toprule
  G1 &   G2 &     \# &        emo &  G1vsG2 &  G1vsPred &  G2vsPred \\
\midrule
 en1 &  en2 &  1032 &    V &    .953 &      .941 &      .922 \\
 en1 &  en2 &  1032 &    A &    .760 &      .761 &      .711 \\
 en1 &  en2 &  1032 &  D &    .794 &      .879 &      .782 \\
 es1 &  es2 &   610 &    V &    .976 &      .905 &      .912 \\
 es1 &  es2 &   610 &    A &    .758 &      .714 &      .725 \\
 es2 &  es3 &   222 &    V &    .976 &      .906 &      .907 \\
 es2 &  es3 &   222 &    A &    .710 &      .724 &      .691 \\
 de2 &  de3 &   498 &    V &    .963 &      .806 &      .812 \\
 de2 &  de3 &   498 &    A &    .760 &      .721 &      .663 \\
 pl1 &  pl2 &   445 &    V &    .943 &      .838 &      .852 \\
 pl1 &  pl2 &   445 &    A &    .725 &      .764 &      .643 \\
 zh1 &  zh2 &   140 &    V &    .932 &      .918 &      .898 \\
 zh1 &  zh2 &   140 &    A &    .482 &      .556 &      .455 \\
\bottomrule
\end{tabular}



---