In [1]:
import utils
import relations

import pandas as pd
import os
import csv

from collections import defaultdict

pd.set_option('display.max_rows', None)


def get_overview(metrics, level, model_name, evidence_type):
    path_dir = f'../analysis/{model_name}/{level}'
    overview_dict = defaultdict(dict)
    if level == 'properties':
        target = 'property'
    elif level == 'relations' or level  == 'relations-hyp':
        target = 'relation'
    for metric in metrics:
        if target == 'relation':
            metric_f = metric+'_'+evidence_type
        else:
            metric_f = metric
        path = f'{path_dir}/{metric_f}.csv'
        with open(path) as infile:
            data = list(csv.DictReader(infile))
        for d in data:
            if target in d:
                prop = d[target]
            else:
                prop = d['']
            sc = d[evidence_type]
            if sc == '':
                sc = 0
            overview_dict[prop][metric] = float(sc)
            if 'n_pairs' in d:
                overview_dict[prop]['n_pairs'] = float(d['n_pairs'])
                
    df = pd.DataFrame(overview_dict)
    return df.T

# Overview properties

## Giga

In [11]:
metrics = ['proportion', 'diversity', 'coherence', 
           'dist-mean', 'dist-max', 
           'str-mean', 'str-max'
          ]
level = 'properties'
evidence_type = 'all'
model_name = 'giga_full_updated'
df = get_overview(metrics, level, model_name, evidence_type)
df = utils.raw_to_distance(df, score_names = metrics, reference_name = 'median', 
                            score = 'dist-percent', sum_scores = ['sum', 'bin'])

df = df.sort_values('bin', ascending=False)
#print(df[3:7].round(4).to_latex())
df

Unnamed: 0,proportion,diversity,coherence,dist-mean,dist-max,str-mean,str-max,sum,bin
median-reference,1.0,798.0,0.102214,0.622362,0.859828,0.003332,0.036232,114.374853,1.0
used_in_cooking,0.0,-0.283208,0.67042,0.203307,0.103085,0.871576,0.68337,0.321221,0.714286
wheels,0.0,-0.694236,0.063788,0.031268,0.036579,0.665159,0.594954,0.099645,0.714286
square,0.0,0.884712,-0.00029,0.038753,0.163024,0.026938,0.757645,0.267254,0.714286
cold,0.0,0.354637,-0.025727,-0.068152,0.044456,0.113811,0.580755,0.142825,0.571429
sweet,0.0,-0.95614,0.783795,0.169767,0.03439,1.167841,-0.251945,0.135387,0.571429
juicy,0.0,0.015038,0.18759,-0.021475,-0.037137,0.320877,0.700845,0.166534,0.571429
blue,0.0,1.799499,-0.095464,0.055059,0.055487,-0.444086,0.00148,0.195997,0.571429
roll,0.0,3.868421,-0.096532,0.043449,0.038384,-0.320745,0.083552,0.516647,0.571429
female,0.0,-0.943609,0.468208,0.115295,-0.01054,0.879962,0.181409,0.098675,0.571429


In [105]:
metrics = ['proportion', 
           'diversity', 
           'coherence', 
           'dist-mean', 
           'dist-max', 
           'str-mean', 
           'str-max'
          ]
level = 'properties'
evidence_type = 'prop-specific'
model_name = 'giga_full_updated'
df = get_overview(metrics, level, model_name, evidence_type)

#df = utils.raw_to_distance(df, score_names = metrics, reference_name = 'median', 
 #                          score = 'dist-percent', sum_scores = ['sum', 'bin'])
#df['sum'] = df.sum(axis=1)
#df = df.sort_values('sum', ascending=False)
#print(df.round(2).fillna('-').to_latex())
#df.round(2)
df.sort_values('str-mean').round(4)

Unnamed: 0,proportion,diversity,coherence,dist-mean,dist-max,str-mean,str-max
square,0.0,0.0,0.0,0.0,0.0,0.0,0.0
dangerous,0.0206,23.0,0.2389,0.7224,0.8404,0.0034,0.0095
swim,0.0007,1.0,0.0,0.6726,0.6726,0.0035,0.0035
warm,0.0043,9.0,0.2714,0.5767,0.7465,0.004,0.0085
fly,0.0095,9.0,0.3466,0.8334,0.9007,0.0043,0.0084
round,0.0038,2.0,0.1244,0.5884,0.6811,0.0047,0.0083
female,0.0667,3.0,0.5798,0.6333,0.6949,0.0049,0.0078
roll,0.0026,10.0,0.2076,0.6724,0.7864,0.0052,0.0156
wings,0.0125,7.0,0.3916,0.759,0.8531,0.0058,0.0084
cold,0.0056,6.0,0.3368,0.6641,0.7741,0.0068,0.0113


# Both models

In [2]:
from collections import defaultdict
import numpy as np
import csv
import os

metrics = [
            'proportion', 
            'diversity', 
           'coherence', 
           'dist-mean', 'dist-max', 
           'str-mean', 'str-max'
]

level = 'properties'
evidence_types = ['all', 'prop-specific', 'non-specific', 'u']

# model_name = 'wiki_updated'
# model_name_d = 'wiki_corpus'
model_name = 'giga_full_updated'
model_name_d = 'giga_corpus'
# load diagnostic results
path = '../data/diagnostic_classification/selectivity-giga_corpus_wiki_corpus_googlenews.csv'
df_d = pd.read_csv(path, index_col=0)


model_dict = defaultdict(dict)
new_model_dict = dict()

for evidence_type in evidence_types:
    if evidence_type == 'all':
        metrics_new = [m for m in metrics if m != 'proportion']
    else:
        metrics_new = metrics
    df = get_overview(metrics_new, level, model_name, evidence_type)
    df = utils.raw_to_distance(df, score_names = metrics_new, reference_name = 'median', 
                           score = 'dist-percent', sum_scores = ['sum', 'bin'])
    
    learned = df_d[model_name_d]['learned'].split(' ')
    not_learned = df_d[model_name_d]['not learned'].split(' ')
    for i, row in df.iterrows():
        model_dict[i][f'{evidence_type}-sum'] = round(row['sum'], 2)
        model_dict[i][f'{evidence_type}-bin'] = round(row['bin'], 2)
for p, d in model_dict.items():
    if p not in ['median', 'median-reference']:
        if p in learned:
            diag = True
        elif p in not_learned:
            diag = False
        elif p == 'female':
            diag = True
        d = model_dict[p]
        d[f'cl'] = diag
        enough_scores = set()
        evidence_types_enough = set()
        for e in evidence_types:
            s = d[f'{e}-sum']
            b = d[f'{e}-bin']
            if s > 0.0 and b > 0.5:
                enough_scores.add(True)
                evidence_types_enough.add(e)
        d['et_enough'] = ' '.join(sorted(list(evidence_types_enough)))
        new_model_dict[p] = d

             
df = pd.DataFrame(new_model_dict).T.fillna('-')
# print(df.to_latex())    
#print(df.to_latex())
path_dir = '../analysis/diagnostic_classification'
os.makedirs(path_dir, exist_ok=True)
df.to_csv(f'{path_dir}/comparison-{model_name}.csv')

df.sort_values('cl')

Unnamed: 0,all-sum,all-bin,prop-specific-sum,prop-specific-bin,non-specific-sum,non-specific-bin,u-sum,u-bin,cl,et_enough
warm,0.27,0.33,0.14,0.14,0.06,0.29,0.34,0.43,False,
black,-0.0,0.33,-0.22,0.0,-0.27,0.0,0.09,0.43,False,
red,0.15,0.5,0.23,0.67,0.05,0.29,0.13,0.57,False,prop-specific u
dangerous,0.0,0.5,1.42,0.43,0.07,0.57,0.05,0.57,False,non-specific u
round,-0.07,0.33,-0.33,0.0,0.03,0.43,0.01,0.43,False,
cold,0.17,0.67,0.14,0.29,-0.05,0.29,0.26,0.71,False,all u
hot,0.09,0.33,1.15,0.57,0.49,0.57,-0.08,0.29,False,non-specific prop-specific
roll,0.6,0.67,0.25,0.29,0.0,0.14,0.64,0.71,False,all u
green,-0.04,0.33,-0.01,0.67,0.31,0.71,-0.1,0.29,False,non-specific
made_of_wood,-0.15,0.0,0.34,0.71,-0.21,0.0,-0.14,0.29,False,prop-specific


In [3]:
# compare evidence type scores

def get_comparison_evidence(evidence_type, model_name, metrics):
    
    path_dir = '../analysis/diagnostic_classification'
    df = pd.read_csv(f'{path_dir}/comparison-{model_name}.csv', index_col = 0)
    prop_dict = df.to_dict('index')
    
    level = 'properties'
    if evidence_type == 'all':
        metrics_new = [m for m in metrics if m != 'proportion']
    else:
        metrics_new = metrics
    df_overview = get_overview(metrics_new, level, model_name, evidence_type)
    prop_overview_dict = df_overview.to_dict('index')
   
    new_df_dict_enough = dict()
    new_df_dict_not_enough = dict()
    df_total = dict()
    
    cl_dict = dict()

    for prop, d in prop_overview_dict.items():
        if prop not in ['median', 'median-reference']:
            cl = prop_dict[prop]['cl']
            if cl == False:
                new_df_dict_not_enough[(prop+'-'+evidence_type)] = d
                df_total[prop] = dict()
                df_total[prop].update(d)
                #df_total[prop]['cl'] = cl

            else:
                #et_types = prop_dict[prop]['et_enough']
                #if type(et_types) == str:
                    #if evidence_type in et_types:
                new_df_dict_enough[(prop+'-'+evidence_type)] = d
            df_total[prop] = dict()
            df_total[prop].update(d)
            df_total[prop]['cl'] = cl
            
            #df_total[prop]['i'] = i
            
           
    df_enough = pd.DataFrame(new_df_dict_enough).T.sort_index()

    df_mean = pd.DataFrame()

    df_mean[('mean', True)] = df_enough.mean(axis = 0)
    df_mean[('median', True )] = df_enough.median(axis = 0)
    df_mean[('std', True)] = df_enough.std(axis = 0)

    df_not_enough = pd.DataFrame(new_df_dict_not_enough).T.sort_index()
    df_mean[('mean', False)] = df_not_enough.mean(axis = 0, skipna = True)
    df_mean[('median', False )] = df_not_enough.median(axis = 0, skipna = True)
    df_mean[('std', False )] = df_not_enough.std(axis = 0, skipna = True)
#     print('n enough', len(new_df_dict_enough.keys()))
#     print('n not enough', len(new_df_dict_not_enough.keys()))
    
    df_total = pd.DataFrame(df_total).T
    return df_mean, df_total




In [4]:

metrics = [
            'proportion',
            'diversity', 
           #'coherence', 
           'dist-mean', 'dist-max', 
           'str-mean', 'str-max'
          ]


evidence_type = 'all-p' #'prop-specific'#, 'non-specific', 'u']
#model_name = 'giga_full_updated'
model_name = 'wiki_updated'
df_mean, df_total = get_comparison_evidence(evidence_type, model_name, metrics)
df_mean.round(4).T.sort_index()
#df_total.sort_values('cl')
#df_mean
#print(df_mean.round(3).T.sort_index().to_latex())

Unnamed: 0,Unnamed: 1,proportion,diversity,dist-mean,dist-max,str-mean,str-max
mean,False,0.1271,70.8,0.6088,0.7896,0.007,0.0263
mean,True,0.2134,55.8333,0.7006,0.8639,0.0084,0.0302
median,False,0.0753,70.0,0.591,0.7942,0.0054,0.0284
median,True,0.1842,30.0,0.6945,0.865,0.0073,0.0267
std,False,0.1247,44.594,0.0749,0.0527,0.0043,0.008
std,True,0.1531,70.5161,0.065,0.0668,0.004,0.0146


In [53]:
#df_total.sort_values('cl')

In [86]:

metrics = [
            'proportion',
            'diversity', 
           'coherence', 
           'dist-mean', 'dist-max', 
           'str-mean', 'str-max'
          ]


#model_name = 'giga_full_updated'
model_name = 'wiki_updated'

evidence_type = 'prop-specific'#, 'non-specific', 'u']
df_mean, df_total = get_comparison_evidence(evidence_type, model_name, metrics)
df_mean.round(4).T.sort_index()
#df_total.sort_values('cl')
df_mean.round(4).T.sort_index()
print(df_mean.round(3).T.sort_index().to_latex())

\begin{tabular}{llrrrrrrr}
\toprule
    &       &  proportion &  diversity &  coherence &  dist-mean &  dist-max &  str-mean &  str-max \\
\midrule
mean & False &       0.015 &      3.900 &      0.159 &      0.683 &     0.740 &     0.010 &    0.015 \\
    & True  &       0.019 &      3.833 &      0.277 &      0.560 &     0.602 &     0.008 &    0.011 \\
median & False &       0.004 &      3.000 &      0.203 &      0.686 &     0.777 &     0.010 &    0.014 \\
    & True  &       0.011 &      4.000 &      0.333 &      0.745 &     0.780 &     0.005 &    0.007 \\
std & False &       0.028 &      3.348 &      0.144 &      0.130 &     0.115 &     0.005 &    0.008 \\
    & True  &       0.026 &      3.326 &      0.228 &      0.341 &     0.367 &     0.010 &    0.012 \\
\bottomrule
\end{tabular}



In [90]:

metrics = [
            'proportion',
            'diversity', 
           'coherence', 
           'dist-mean', 'dist-max', 
           'str-mean', 'str-max'
          ]


model_name = 'giga_full_updated'
#model_name = 'wiki_updated'


evidence_type = 'non-specific'#, 'non-specific', 'u']
df_mean, df_total = get_comparison_evidence(evidence_type, model_name, metrics)
df_mean.round(4).T.sort_index()

#print(df_mean.round(3).T.sort_index().to_latex())
df_total.round(4).sort_values('cl')

Unnamed: 0,proportion,diversity,coherence,dist-mean,dist-max,str-mean,str-max,cl
warm,0.0531561,112,0.138535,0.528577,0.703606,0.00559121,0.0352123,False
black,0.0478677,55,0.13123,0.582553,0.731915,0.00323633,0.0107202,False
red,0.0537634,95,0.139391,0.536143,0.708003,0.00361831,0.0487161,False
dangerous,0.0987433,110,0.133403,0.679184,0.859854,0.00421924,0.0171904,False
round,0.0769231,40,0.217503,0.561775,0.777071,0.00791498,0.0260727,False
cold,0.0619796,67,0.14359,0.564065,0.728242,0.00598912,0.0263585,False
hot,0.261905,22,0.215453,0.622759,0.731384,0.0120889,0.0349191,False
roll,0.0357786,139,0.148745,0.619351,0.793596,0.00380414,0.0186652,False
green,0.16436,95,0.206876,0.585602,0.774663,0.00565863,0.0342444,False
made_of_wood,0.0585242,46,0.136994,0.58646,0.730592,0.0050408,0.0125254,False


In [88]:

metrics = [
            'proportion',
            'diversity', 
           'coherence', 
           'dist-mean', 'dist-max', 
           'str-mean', 'str-max'
          ]


#model_name = 'giga_full_updated'
model_name = 'wiki_updated'


evidence_type = 'u'#, 'non-specific', 'u']
df_mean, df_total = get_comparison_evidence(evidence_type, model_name, metrics)
df_mean.round(4).T.sort_index()

print(df_mean.round(3).T.sort_index().to_latex())

\begin{tabular}{llrrrrrrr}
\toprule
    &       &  proportion &  diversity &  coherence &  dist-mean &  dist-max &  str-mean &  str-max \\
\midrule
mean & False &       0.873 &   1016.100 &      0.108 &      0.588 &     0.813 &     0.003 &    0.029 \\
    & True  &       0.786 &    346.583 &      0.120 &      0.631 &     0.856 &     0.004 &    0.020 \\
median & False &       0.924 &    871.500 &      0.101 &      0.588 &     0.807 &     0.002 &    0.030 \\
    & True  &       0.816 &    189.000 &      0.113 &      0.630 &     0.851 &     0.003 &    0.019 \\
std & False &       0.125 &    966.329 &      0.012 &      0.061 &     0.081 &     0.001 &    0.016 \\
    & True  &       0.153 &    444.579 &      0.022 &      0.059 &     0.072 &     0.002 &    0.006 \\
\bottomrule
\end{tabular}



# Overview relations

In [34]:
import numpy as np

metrics = [
    'proportion', 
#     'coherence',
#     'dist-mean', 
#     'dist-max', 
    'str-mean', 
    'str-max']

columns = ['proportion', 'str-mean', 'str-max', 'n_pairs']
evidence_type = 'non-specific'
#model_name = 'giga_full_updated'
model_name = 'wiki_updated'

level = 'relations'
df = get_overview(metrics, level, model_name, evidence_type)
# df = utils.raw_to_distance(df_raw, score_names = metrics, reference_name = 'all', 
#                             score = 'raw', sum_scores = ['sum'])

df_strict = df[columns].fillna(0.0)

level = 'relations-hyp'
df = get_overview(metrics, level, model_name, evidence_type)
#df = utils.raw_to_distance(df_raw, score_names = metrics, reference_name = 'all', 
 #                           score = 'raw', sum_scores = ['sum'])

df_hyp = df[columns].fillna(0.0)


dict_total = dict()
all_rels = df_hyp.index



for rel in all_rels:
    row_total = dict()
    if rel in df_strict.index:
        row_strict = df_strict.loc[rel]
    else:
        row_strict = dict()
    if rel in df_hyp.index:
        row_hyp = df_hyp.loc[rel]
    else:
        row_hyp = dict()
    for k in columns:
        k_strict = f'{k}-strict'
        k_hyp = f'{k}-loose'
        if k in row_strict:
            v_strict = row_strict[k]
        else:
            v_strict = np.nan
        v_hyp = row_hyp[k]
        row_total[k_strict] = v_strict
        row_total[k_hyp] = v_hyp
    dict_total[rel] = row_total

columns = ['proportion-strict', 'str-mean-strict', 'str-max-strict', 'n_pairs-strict', 
           'proportion-loose', 'str-mean-loose', 'str-max-loose', 'n_pairs-loose']   
    
df = pd.DataFrame(dict_total).T[columns]
#print(df.round(4).to_latex())
df.round(4)

Unnamed: 0,proportion-strict,str-mean-strict,str-max-strict,n_pairs-strict,proportion-loose,str-mean-loose,str-max-loose,n_pairs-loose
pos,0.1061,0.0065,0.0798,1766.0,0.1061,0.0065,0.0798,1766.0
neg,0.0592,0.0017,0.0211,1186.0,0.0592,0.0017,0.0211,1186.0
all,0.1576,0.0076,0.0927,1021.0,0.1576,0.0076,0.0927,1021.0
some,0.0777,0.0053,0.0645,745.0,0.0777,0.0053,0.0645,745.0
few,0.0592,0.0017,0.0211,1186.0,0.0592,0.0017,0.0211,1186.0
evidence,0.1174,0.0069,0.0805,1054.0,0.1174,0.0069,0.0805,1054.0
no_evidence_pos,0.0757,0.0057,0.0759,342.0,0.0757,0.0057,0.0759,342.0
no_evidence_neg,0.0595,0.0017,0.0199,1082.0,0.0595,0.0017,0.0199,1082.0
implied_category,0.0674,0.006,0.1025,16.0,0.1479,0.007,0.0774,106.0
typical_of_concept,0.217,0.0187,0.1086,4.0,0.0875,0.0055,0.0609,94.0


In [32]:


metrics = [
    'proportion', 
#     'coherence',
#     'dist-mean', 
#     'dist-max', 
    'str-mean', 
    'str-max']

columns = ['proportion', 'str-mean', 'str-max', 'n_pairs']
evidence_type = 'l'
#model_name = 'giga_full_updated'
model_name = 'wiki_updated'

level = 'relations'
df = get_overview(metrics, level, model_name, evidence_type)


df_strict = df[columns].fillna(0.0)

level = 'relations-hyp'
df = get_overview(metrics, level, model_name, evidence_type)


df_hyp = df[columns].fillna(0.0)


dict_total = dict()
all_rels = df_hyp.index



for rel in all_rels:
    row_total = dict()
    if rel in df_strict.index:
        row_strict = df_strict.loc[rel]
    else:
        row_strict = dict()
    if rel in df_hyp.index:
        row_hyp = df_hyp.loc[rel]
    else:
        row_hyp = dict()
    for k in columns:
        k_strict = f'{k}-strict'
        k_hyp = f'{k}-loose'
        if k in row_strict:
            v_strict = row_strict[k]
        else:
            v_strict = np.nan
        v_hyp = row_hyp[k]
        row_total[k_strict] = v_strict
        row_total[k_hyp] = v_hyp
    dict_total[rel] = row_total

columns = ['proportion-strict', 'str-mean-strict', 'str-max-strict', 'n_pairs-strict', 
           'proportion-loose', 'str-mean-loose', 'str-max-loose', 'n_pairs-loose']   
    
df = pd.DataFrame(dict_total).T[columns]
#print(df.round(4).to_latex())
df.round(4)

#0.0096	0.0075	0.0103	4.0	
#0.0031	0.0051	0.0088	4.0

#0.0338	0.0092	0.0097	4.0	
#0.0026	0.0138	0.0268

Unnamed: 0,proportion-strict,str-mean-strict,str-max-strict,n_pairs-strict,proportion-loose,str-mean-loose,str-max-loose,n_pairs-loose
pos,0.0,0.0,0.0,1766.0,0.0,0.0,0.0,1766.0
neg,0.0,0.0,0.0,1186.0,0.0,0.0,0.0,1186.0
all,0.0,0.0,0.0,1021.0,0.0,0.0,0.0,1021.0
some,0.0,0.0,0.0,745.0,0.0,0.0,0.0,745.0
few,0.0,0.0005,0.0,1186.0,0.0,0.0005,0.0,1186.0
evidence,0.0,0.0292,0.0,1054.0,0.0,0.0292,0.0,1054.0
no_evidence_pos,0.0,0.0,0.0,342.0,0.0,0.0,0.0,342.0
no_evidence_neg,0.0,0.0,0.0,1082.0,0.0,0.0,0.0,1082.0
implied_category,0.0002,0.0086,0.0185,16.0,0.0,0.0,0.0,106.0
typical_of_concept,0.0,0.0,0.0,4.0,0.0,0.0,0.0,94.0


## Properties

In [23]:
import relations, utils
from collections import defaultdict, Counter

def load_all_pairs_with_relation(rel):
    
    all_pairs = set()
    
    path_dir = '../data/relations'
    all_files = os.listdir(path_dir)
    all_files_relation = []
    for f in all_files:
        if rel in f:
            all_files_relation.append(f)
    for f in all_files_relation:
        full_path = f'{path_dir}/{f}'
        with open(full_path) as infile:
            lines = infile.read().strip().split('\n')
            pairs = [(l.split(',')[0], l.split(',')[1]) for l in lines]
            all_pairs.update(pairs)
    return all_pairs


def get_prop_rel_cnts(model_name, mode):
    rels =  ['implied_category', 
             'typical_of_concept', 'typical_of_property', 
             'affording_activity', 'afforded_usual', 'afforded_unusual',
             'variability_limited', 'variability_open',
             'variability_limited_scalar', 'variability_open_scalar',
             #'rare', 'unusual', 'impossible', 'creative'
            ]

    label = 'pos'

    prop_rel_dict = defaultdict(dict)
    prop_rel_cnts = dict()
    for rel in rels:
        concept_rel_dict = dict()
        if mode == 'strict':
            pairs = relations.load_relation_pairs([rel], order=True)
        elif mode == 'hyp':
            pairs = relations.load_relation_pairs_hyp([rel])
        elif mode == 'all':
            pairs = load_all_pairs_with_relation(rel)

        for prop, c in pairs:
            prop_rel_dict[prop][c] = rel

    for prop, c_rel_dict in prop_rel_dict.items():
        examples_pos = utils.get_examples(model_name, prop, label)
        concepts_in_corpus = [c for c in c_rel_dict.keys() if c in examples_pos]
        rels_prop = [c_rel_dict[c] for c in concepts_in_corpus]
        rel_cnt = Counter(rels_prop)
        for rel, cnt in rel_cnt.items():
            rel_cnt[rel] = cnt/len(examples_pos)
        prop_rel_cnts[prop] = rel_cnt
    return prop_rel_cnts
    
    
def get_prop_categories():
    
    prop_type_dict = dict()
    with open('../data/property_types.csv') as infile:
        lines = infile.read().strip().split('\n')
    for line in lines[1:]:
        prop, t = line.split(',')
        prop_type_dict[prop] = t
    return prop_type_dict
    
    

In [37]:



metrics = [
    'proportion', 
#     'coherence',
#     'dist-mean', |
#     'dist-max', 
    'str-mean', 
    'str-max']


evidence_type = 'prop-specific'
#model_name = 'giga_full_updated'
model_name = 'wiki_updated'

mode = 'all'


level = 'properties'
df = get_overview(metrics, level, model_name, evidence_type)
df = utils.raw_to_distance(df, score_names = metrics, reference_name = 'median', 
                           score = 'dist-percent', sum_scores = ['sum', 'bin'])
prop_rel_cnts = get_prop_rel_cnts(model_name, mode)
prop_type_dict = get_prop_categories()


full_dict = dict()
columns = ['proportion', 'str-mean', 'str-max', 'sum', 'bin']
for prop,  row in df.iterrows():
    if prop in prop_type_dict and prop != 'female':
        #columns = ['proportion', 'str-mean', 'str-max', 'sum']
        t = prop_type_dict[prop]
        prop_t = (t,  prop)
#         rel_cnt = prop_rel_cnts[prop]
#         n_rels = 3
#         n_rels_found = len(rel_cnt)
#         if n_rels > n_rels_found:
#             n_rels = n_rels_found
#         top_rels = rel_cnt.most_common(n_rels)
#         for i in range(n_rels):
#             top_r,  p  = top_rels[i]
#             row[f'top_rel{i}'] = top_r
#             row[f'p{i}']  = p
#             columns.append(f'top_rel{i}')
#             columns.append(f'p{i}')
#         for k,  v in row.items():
#             if type(v) == float:
#                 row[k] = round(v, 4)
      
        full_dict[prop_t] = row
df_full = pd.DataFrame(full_dict)
df_full.T.sort_index()[columns].sort_values('sum', ascending=False)


#df.sort_values('str-mean', ascending=False).round(4)

Unnamed: 0,Unnamed: 1,proportion,str-mean,str-max,sum,bin
temperature,hot,15.095267,0.677351,1.222811,5.665143,1.0
function-action,fly,15.82687,-0.49535,-0.608436,4.907695,0.333333
part,wheels,7.814075,0.803044,0.948919,3.188679,1.0
taste,sweet,6.11906,0.902888,0.144135,2.388694,1.0
taste,juicy,0.517177,3.416682,1.655585,1.863148,1.0
material,made_of_wood,2.351831,0.343825,0.701833,1.132497,1.0
function-action,used_in_cooking,1.362922,0.0,0.720362,0.694428,0.666667
part,wings,2.345101,-0.368379,-0.428978,0.515915,0.333333
function-action,roll,-0.468727,0.072591,1.064981,0.222948,0.666667
color,red,-0.854827,1.124502,0.277383,0.182353,0.666667


## Relations and properties - correlation checks

In [75]:
import relations
from collections import Counter

def relation_prop_overview(mode = 'strict'):

    rels =  ['pos', 'neg', 'all', 'some', 'few',
                         'evidence', 'no_evidence_pos', 'no_evidence_neg',
                         'implied_category', 
                         'typical_of_concept', 'typical_of_property', 
                         'affording_activity', 'afforded_usual', 'afforded_unusual',
                         'variability_limited', 'variability_open',
                         'variability_limited_scalar', 'variability_open_scalar',
                         'rare', 'unusual', 'impossible', 'creative']

    relation_prop_dict = dict()
    for rel in rels:
        prop_counter = Counter()
        if mode == 'strict':
            pairs = relations.load_relation_pairs([rel], order=True)
        elif mode == 'hyp':
            pairs = relations.load_relation_pairs_hyp([rel])
        for p, c in pairs:
            prop_counter[p] += 1
        prop_counter_norm = dict()
        total = sum(prop_counter.values())
        for prop, c in prop_counter.items():
            prop_counter_norm[prop] = c/total
        relation_prop_dict[rel]  = prop_counter_norm
    df = pd.DataFrame(relation_prop_dict)#.set_index('relation')
    return df

In [76]:
pd.options.display.max_columns = None
df_hyp = relation_prop_overview(mode = 'hyp').T.fillna(0.0)
df_strict = relation_prop_overview(mode = 'strict').T.fillna(0.0)
#df['sum'] = df.sum(axis=1)
df_strict.round(2)

Unnamed: 0,made_of_wood,lay_eggs,hot,dangerous,used_in_cooking,warm,red,wheels,juicy,round,green,blue,cold,sweet,black,fly,square,swim,wings,roll,yellow
pos,0.06,0.04,0.06,0.04,0.06,0.07,0.05,0.04,0.05,0.06,0.05,0.03,0.04,0.05,0.05,0.04,0.05,0.06,0.05,0.03,0.02
neg,0.04,0.06,0.04,0.05,0.05,0.03,0.06,0.02,0.05,0.02,0.06,0.09,0.02,0.05,0.04,0.09,0.02,0.04,0.07,0.03,0.07
all,0.04,0.07,0.05,0.06,0.08,0.05,0.02,0.07,0.06,0.04,0.03,0.01,0.02,0.05,0.01,0.06,0.03,0.11,0.07,0.06,0.03
some,0.07,0.0,0.07,0.01,0.02,0.1,0.09,0.01,0.05,0.08,0.07,0.06,0.08,0.06,0.09,0.0,0.07,0.0,0.0,0.01,0.04
few,0.03,0.06,0.03,0.05,0.05,0.03,0.05,0.02,0.05,0.02,0.06,0.09,0.02,0.05,0.04,0.08,0.02,0.04,0.07,0.04,0.09
evidence,0.07,0.04,0.07,0.04,0.06,0.09,0.04,0.04,0.06,0.04,0.05,0.02,0.05,0.07,0.05,0.04,0.04,0.04,0.04,0.02,0.02
no_evidence_pos,0.02,0.05,0.01,0.03,0.03,0.02,0.07,0.05,0.03,0.11,0.06,0.08,0.01,0.03,0.07,0.03,0.07,0.1,0.04,0.05,0.03
no_evidence_neg,0.03,0.06,0.03,0.05,0.06,0.03,0.05,0.02,0.06,0.01,0.06,0.09,0.02,0.05,0.04,0.09,0.02,0.04,0.08,0.03,0.08
implied_category,0.0,0.19,0.0,0.06,0.0,0.06,0.0,0.19,0.0,0.19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.12,0.12,0.06,0.0
typical_of_concept,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0


In [74]:
# get top 3 per relation

def get_top_props(df):
    table = []
    for r,  row in df.iterrows():
        p_prop_dict = defaultdict(list)
        for prop, p in row.items():
            p_prop_dict[p].append(prop)
        top_scores = sorted(list(p_prop_dict.keys()), reverse = True)[:3]

        top_props = []
        top_scores_new = []
        for s in top_scores:
            if s > 0.0:
                props = p_prop_dict[s]
                props_str = ' '.join(p_prop_dict[s])
                top_props.append(props_str)
                for i in range(len(props)):
                    top_scores_new.append(str(round(s, 2)))

        d = dict()
        d['relation'] = r
        d['top_prop'] = ' '.join(top_props)
        d['top_p'] = ' '.join(top_scores_new)
        table.append(d)

    df_prop = pd.DataFrame(table).set_index('relation')
    return df_prop


df_prop_strict = get_top_props(df_strict)
df_prop_hyp = get_top_props(df_hyp)

dict_total = dict()
for r, row in df_prop_strict.iterrows():
    row_hyp = df_prop_hyp.loc[r]
    row_new = dict()
    for k, v in row_hyp.items():
        k_new = f'{k}-loose'
        row_new[k_new] = v
    for k, v in row.items():
        k_new = f'{k}-strict'
        row_new[k_new] = v
    dict_total[r] = row_new
df_total = pd.DataFrame(dict_total).T


print(df_total.to_latex())

\begin{tabular}{lllll}
\toprule
{} &                           top\_prop-loose &               top\_p-loose &                                    top\_prop-strict &                             top\_p-strict \\
\midrule
pos                        &           warm used\_in\_cooking hot round &       0.07 0.06 0.06 0.06 &                     warm used\_in\_cooking hot round &                      0.07 0.06 0.06 0.06 \\
neg                        &                          blue fly yellow &            0.09 0.09 0.07 &                                    blue fly yellow &                           0.09 0.09 0.07 \\
all                        &               swim used\_in\_cooking wings &            0.11 0.08 0.07 &                         swim used\_in\_cooking wings &                           0.11 0.08 0.07 \\
some                       &                           warm black red &             0.1 0.09 0.09 &                                     warm black red &                            0.1