# Gold vs. Silver Evaluation

In [1]:
import numpy as np
import pandas as pd
from scipy import stats as st

from memolon.src import utils, constants

Load gold and silver evaluation results from disk:

In [2]:
gold = pd.read_csv(constants.GOLD_EVALUATION_RESULTS, index_col=0)
gold

Unnamed: 0,valence,arousal,dominance,joy,anger,sadness,fear,disgust,shared,(%)
en1,0.940544,0.760695,0.878971,,,,,,1032.0,100.0
en2,0.921625,0.708846,0.782097,,,,,,1034.0,100.0
en3,,,,0.890146,0.834981,0.79838,0.821245,0.776637,1033.0,99.0
es1,0.905345,0.714292,0.823378,,,,,,612.0,59.0
es2,0.789475,0.637861,,,,,,,7685.0,54.0
es3,0.909323,0.729571,,,,,,,363.0,41.0
es4,,,,0.864622,0.841729,0.840375,0.840676,0.757108,363.0,41.0
es5,,,,0.642162,0.71511,0.720232,0.720716,0.625003,6096.0,58.0
es6,,,,0.800913,0.741935,0.706522,0.721831,0.683599,992.0,43.0
de1,0.888657,0.780253,0.679127,,,,,,677.0,67.0


In [3]:
silver = pd.read_csv(constants.SILVER_EVALUATION_RESULTS, index_col=0)
silver

Unnamed: 0_level_0,valence,arousal,dominance,joy,anger,sadness,fear,disgust
iso,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
af,0.804851,0.575980,0.740162,0.759676,0.747276,0.736776,0.708647,0.743772
sq,0.799467,0.567820,0.734991,0.746040,0.752304,0.749164,0.719019,0.722643
am,0.558573,0.307365,0.518451,0.484960,0.531145,0.542322,0.524378,0.470908
ar,0.777428,0.531497,0.696889,0.700904,0.752839,0.722357,0.705438,0.741181
hy,0.796443,0.517502,0.721456,0.748405,0.769858,0.733432,0.708661,0.734218
...,...,...,...,...,...,...,...,...
vi,0.649125,0.474731,0.581106,0.594995,0.646116,0.590801,0.580929,0.621701
cy,0.721699,0.509873,0.670642,0.643234,0.687167,0.648685,0.638461,0.657627
fy,0.607579,0.434331,0.541094,0.530889,0.602396,0.591576,0.553233,0.579467
yi,0.485836,0.343337,0.401658,0.433388,0.500003,0.465789,0.453527,0.443429


In [4]:
utils.language_table

Unnamed: 0_level_0,fastText_fullname,google_fullname,google_iso,articles_wiki
iso,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
af,Afrikaans,afrikaans,af,85954
sq,Albanian,albanian,sq,77869
am,Amharic,amharic,am,14849
ar,Arabic,arabic,ar,964345
hy,Armenian,armenian,hy,259237
...,...,...,...,...
vi,Vietnamese,vietnamese,vi,1237440
cy,Welsh,welsh,cy,106263
fy,West Frisian,frisian,fy,42880
yi,Yiddish,yiddish,yi,14881


Merge cases with multiple datasets for the same language in gold evaluation (averaging their values):

In [5]:
isos = {value['iso'] for key, value in utils.conditions.items()}
isos

{'de', 'el', 'en', 'es', 'hr', 'id', 'it', 'nl', 'pl', 'pt', 'tr', 'zh'}

In [6]:
tmp = pd.DataFrame(columns=constants.emotions)
for iso in isos:
    cnds = [key for key,value in utils.conditions.items() if value['iso'] == iso]
    rt = gold.loc[cnds]
    if isinstance(rt, pd.DataFrame):
        rt = rt.mean(axis=0)
    tmp.loc[iso] = rt
gold = tmp
gold

Unnamed: 0,valence,arousal,dominance,joy,anger,sadness,fear,disgust
id,0.840994,0.639077,0.62548,,,,,
it,0.885933,0.626435,0.764268,,,,,
de,0.812331,0.695353,0.679127,0.801323,0.660418,0.516198,0.679258,0.424616
pt,0.893192,0.706353,0.752721,,,,,
el,0.860182,0.495823,0.73738,,,,,
en,0.931084,0.734771,0.830534,0.890146,0.834981,0.79838,0.821245,0.776637
hr,0.80981,0.663992,,,,,,
nl,0.850305,0.57869,,,,,,
pl,0.823053,0.672962,0.602694,0.775178,0.707091,0.659587,0.686863,0.710734
es,0.868048,0.693908,0.823378,0.769232,0.766258,0.755709,0.761074,0.68857


In [7]:
results = pd.DataFrame(index=constants.emotions, columns=['pearson', 'mae', '#langs'])
for e in constants.emotions:
    isos = gold[~(gold[e]).isna()].index
    pearson = st.pearsonr(gold.loc[isos, e], silver.loc[isos, e])
    mae = np.mean(np.abs(gold.loc[isos, e] - silver.loc[isos, e]))
    #print(mae)
    results.loc[e] = {'pearson': pearson[0], 'mae':mae, '#langs': len(isos)}

results = results.T.loc[['#langs', 'pearson', 'mae']]
results.to_csv(constants.GOLD_SILVER_AGREEMENT)

Loading and formatting results:

In [8]:
results = pd.read_csv(constants.GOLD_SILVER_AGREEMENT, index_col=0).round(2)
results["mean"] = results.mean(axis=1)
results

Unnamed: 0,valence,arousal,dominance,joy,anger,sadness,fear,disgust,mean
#langs,12.0,12.0,8.0,5.0,5.0,5.0,5.0,5.0,7.125
pearson,0.54,0.57,0.52,0.91,0.85,0.57,0.87,0.27,0.6375
mae,0.03,0.06,0.08,0.02,0.12,0.14,0.08,0.16,0.08625


In [9]:
s = results[constants.emotions].to_latex()
print(s)

\begin{tabular}{lrrrrrrrr}
\toprule
{} &  valence &  arousal &  dominance &   joy &  anger &  sadness &  fear &  disgust \\
\midrule
\#langs  &    12.00 &    12.00 &       8.00 &  5.00 &   5.00 &     5.00 &  5.00 &     5.00 \\
pearson &     0.54 &     0.57 &       0.52 &  0.91 &   0.85 &     0.57 &  0.87 &     0.27 \\
mae     &     0.03 &     0.06 &       0.08 &  0.02 &   0.12 &     0.14 &  0.08 &     0.16 \\
\bottomrule
\end{tabular}



---