In [1]:
import utils
import relations

import pandas as pd
import os
import csv

from collections import defaultdict

pd.set_option('display.max_rows', None)


def get_overview(metrics, level, model_name, evidence_type):
    path_dir = f'../analysis/{model_name}/{level}'
    overview_dict = defaultdict(dict)
    if level == 'properties':
        target = 'property'
    elif level == 'relations' or level  == 'relations-hyp':
        target = 'relation'
    for metric in metrics:
        if target == 'relation':
            metric_f = metric+'_'+evidence_type
        else:
            metric_f = metric
        path = f'{path_dir}/{metric_f}.csv'
        with open(path) as infile:
            data = list(csv.DictReader(infile))
        for d in data:
            if target in d:
                prop = d[target]
            else:
                prop = d['']
            sc = d[evidence_type]
            if sc == '':
                sc = 0
            overview_dict[prop][metric] = float(sc)
            if 'n_pairs' in d:
                overview_dict[prop]['n_pairs'] = float(d['n_pairs'])
                
    df = pd.DataFrame(overview_dict)
    return df.T

In [10]:
# test
from sklearn.metrics.pairwise import cosine_similarity

In [21]:
for p in utils.get_properties():
    print(p)

square
warm
black
red
fly
dangerous
wings
sweet
hot
used_in_cooking
juicy
green
made_of_wood
blue
yellow
roll
female
cold
round
wheels
lay_eggs
swim


# Overview properties

## Giga

In [29]:
metrics = [
    'proportion', 
    #'diversity', 
    'coherence', 
    'dist-mean', 
    'dist-max', 
    'str-mean', 
    'str-max'
          ]
level = 'properties'
evidence_type = 'prop-specific'
model_name = 'giga_full_updated'
df = get_overview(metrics, level, model_name, evidence_type)
df = utils.raw_to_distance(df, score_names = metrics, reference_name = 'median', 
                            score = 'dist-percent', sum_scores = ['sum', 'bin'])

df = df.sort_values('bin', ascending=False)
#print(df[3:7].round(4).to_latex())
#print(df.round(2).fillna('-').to_latex())
df.round(2).fillna('-')

Unnamed: 0,proportion,coherence,dist-mean,dist-max,str-mean,str-max,sum,bin
median-reference,0.01,0.39,0.75,0.79,0.01,0.01,0.33,1.0
sweet,9.74,0.21,0.1,0.12,0.17,0.18,1.75,1.0
used_in_cooking,1.63,-0,0.1,0.17,0.31,0.53,0.46,0.83
lay_eggs,4.08,1.15,0.11,0.06,0.43,-0.06,0.96,0.83
red,-0.89,1.59,0.06,0,2.09,0.73,0.6,0.83
wheels,2.08,0,0.06,0.06,0.48,0.51,0.53,0.83
yellow,2.61,1.59,0.06,-0,0.91,0.07,0.87,0.83
green,-0.67,1.59,0.15,0.08,0.91,0.07,0.36,0.83
juicy,-0.54,0.65,0.06,0,3.3,3.38,1.14,0.67
made_of_wood,-0.04,0.14,0,0.02,0.79,0.73,0.27,0.67


In [24]:
metrics = ['proportion', 
           #'diversity', 
           'coherence', 
           'dist-mean', 
           'dist-max', 
           'str-mean', 
           'str-max'
          ]
level = 'properties'
evidence_type = 'prop-specific'
model_name = 'giga_full_updated'
df = get_overview(metrics, level, model_name, evidence_type)

#df = utils.raw_to_distance(df, score_names = metrics, reference_name = 'median', 
 #                          score = 'dist-percent', sum_scores = ['sum', 'bin'])
#df['sum'] = df.sum(axis=1)
#df = df.sort_values('sum', ascending=False)
#print(df.round(2).fillna('-').to_latex())
#df.round(2)
print(df.sort_values('str-mean').round(4).to_latex())

\begin{tabular}{lrrrrrr}
\toprule
{} &  proportion &  coherence &  dist-mean &  dist-max &  str-mean &  str-max \\
\midrule
square          &      0.0000 &     0.0000 &     0.0000 &    0.0000 &    0.0000 &   0.0000 \\
dangerous       &      0.0206 &     0.2389 &     0.7224 &    0.8404 &    0.0034 &   0.0095 \\
swim            &      0.0007 &     1.0000 &     0.6726 &    0.6726 &    0.0035 &   0.0035 \\
warm            &      0.0043 &     0.2714 &     0.5767 &    0.7465 &    0.0040 &   0.0085 \\
fly             &      0.0095 &     0.3466 &     0.8334 &    0.9007 &    0.0043 &   0.0084 \\
round           &      0.0038 &     0.1244 &     0.5884 &    0.6811 &    0.0047 &   0.0083 \\
female          &      0.0667 &     0.5798 &     0.6333 &    0.6949 &    0.0049 &   0.0078 \\
roll            &      0.0026 &     0.2076 &     0.6724 &    0.7864 &    0.0052 &   0.0156 \\
wings           &      0.0125 &     0.3916 &     0.7590 &    0.8531 &    0.0058 &   0.0084 \\
cold            &      0.0056 

# Comparison to diagnostic classification

In [36]:
from collections import defaultdict
import numpy as np
import csv
import os

metrics = [
            'proportion', 
            #'diversity', 
           'coherence', 
           'dist-mean', 'dist-max', 
           'str-mean', 'str-max'
]

level = 'properties'
evidence_types = ['all', 'prop-specific', 'non-specific', 'u']

# model_name = 'wiki_updated'
# model_name_d = 'wiki_corpus'
model_name = 'giga_full_updated'
model_name_d = 'giga_corpus'
# load diagnostic results
path = '../data/diagnostic_classification/selectivity-giga_corpus_wiki_corpus_googlenews.csv'
df_d = pd.read_csv(path, index_col=0)


model_dict = defaultdict(dict)
new_model_dict = dict()

for evidence_type in evidence_types:
    if evidence_type == 'all':
        metrics_new = [m for m in metrics if m != 'proportion']
    else:
        metrics_new = metrics
    df = get_overview(metrics_new, level, model_name, evidence_type)
    df = utils.raw_to_distance(df, score_names = metrics_new, reference_name = 'median', 
                           score = 'dist-percent', sum_scores = ['sum', 'bin'])
    
    learned = df_d[model_name_d]['learned'].split(' ')
    not_learned = df_d[model_name_d]['not learned'].split(' ')
    for i, row in df.iterrows():
        model_dict[i][f'{evidence_type}-sum'] = round(row['sum'], 2)
        model_dict[i][f'{evidence_type}-bin'] = round(row['bin'], 2)
for p, d in model_dict.items():
    if p not in ['median', 'median-reference']:
        if p in learned:
            diag = True
        elif p in not_learned:
            diag = False
        elif p == 'female':
            diag = True
        d = model_dict[p]
        d[f'cl'] = diag
        enough_scores = set()
        evidence_types_enough = set()
        for e in evidence_types:
            s = d[f'{e}-sum']
            b = d[f'{e}-bin']
            if s > 0.0 and b > 0.5:
                enough_scores.add(True)
                evidence_types_enough.add(e)
        d['et_enough'] = ' '.join(sorted(list(evidence_types_enough)))
        new_model_dict[p] = d

             
df = pd.DataFrame(new_model_dict).T.fillna('-')
# print(df.to_latex())    
#print(df.to_latex())
path_dir = '../analysis/diagnostic_classification'
os.makedirs(path_dir, exist_ok=True)
df.to_csv(f'{path_dir}/comparison-{model_name}.csv')

df = df.sort_values('cl')
#print(df.to_latex())
df

Unnamed: 0,all-sum,all-bin,prop-specific-sum,prop-specific-bin,non-specific-sum,non-specific-bin,u-sum,u-bin,cl,et_enough
warm,-0.0,0.2,-0.28,0.0,-0.08,0.17,0.11,0.33,False,
black,-0.09,0.2,-0.23,0.0,-0.3,0.0,0.02,0.33,False,
red,-0.06,0.4,0.6,0.83,-0.05,0.17,-0.06,0.5,False,prop-specific
dangerous,-0.08,0.4,0.27,0.33,-0.06,0.5,0.0,0.5,False,
round,-0.01,0.4,-0.35,0.0,0.09,0.5,0.07,0.5,False,
cold,0.13,0.6,-0.09,0.17,-0.08,0.17,0.23,0.67,False,all u
hot,0.29,0.4,1.3,0.5,0.68,0.67,0.06,0.33,False,non-specific
roll,-0.05,0.6,-0.22,0.17,-0.22,0.0,0.05,0.67,False,u
green,0.0,0.4,0.36,0.83,0.26,0.67,-0.06,0.33,False,non-specific prop-specific
made_of_wood,-0.18,0.0,0.27,0.67,-0.2,0.0,-0.16,0.17,False,prop-specific


In [3]:
# compare evidence type scores

def get_comparison_evidence(evidence_type, model_name, metrics):
    
    path_dir = '../analysis/diagnostic_classification'
    df = pd.read_csv(f'{path_dir}/comparison-{model_name}.csv', index_col = 0)
    prop_dict = df.to_dict('index')
    
    level = 'properties'
    if evidence_type == 'all':
        metrics_new = [m for m in metrics if m != 'proportion']
    else:
        metrics_new = metrics
    df_overview = get_overview(metrics_new, level, model_name, evidence_type)
    prop_overview_dict = df_overview.to_dict('index')
   
    new_df_dict_enough = dict()
    new_df_dict_not_enough = dict()
    df_total = dict()
    
    cl_dict = dict()

    for prop, d in prop_overview_dict.items():
        if prop not in ['median', 'median-reference']:
            cl = prop_dict[prop]['cl']
            if cl == False:
                new_df_dict_not_enough[(prop+'-'+evidence_type)] = d
                df_total[prop] = dict()
                df_total[prop].update(d)
                #df_total[prop]['cl'] = cl

            else:
                #et_types = prop_dict[prop]['et_enough']
                #if type(et_types) == str:
                    #if evidence_type in et_types:
                new_df_dict_enough[(prop+'-'+evidence_type)] = d
            df_total[prop] = dict()
            df_total[prop].update(d)
            df_total[prop]['cl'] = cl
            
            #df_total[prop]['i'] = i
            
           
    df_enough = pd.DataFrame(new_df_dict_enough).T.sort_index()

    df_mean = pd.DataFrame()

    df_mean[('mean', True)] = df_enough.mean(axis = 0)
    df_mean[('median', True )] = df_enough.median(axis = 0)
    df_mean[('std', True)] = df_enough.std(axis = 0)

    df_not_enough = pd.DataFrame(new_df_dict_not_enough).T.sort_index()
    df_mean[('mean', False)] = df_not_enough.mean(axis = 0, skipna = True)
    df_mean[('median', False )] = df_not_enough.median(axis = 0, skipna = True)
    df_mean[('std', False )] = df_not_enough.std(axis = 0, skipna = True)
#     print('n enough', len(new_df_dict_enough.keys()))
#     print('n not enough', len(new_df_dict_not_enough.keys()))
    
    df_total = pd.DataFrame(df_total).T
    return df_mean, df_total




In [4]:

metrics = [
            'proportion',
            'diversity', 
           #'coherence', 
           'dist-mean', 'dist-max', 
           'str-mean', 'str-max'
          ]


evidence_type = 'all-p' #'prop-specific'#, 'non-specific', 'u']
#model_name = 'giga_full_updated'
model_name = 'wiki_updated'
df_mean, df_total = get_comparison_evidence(evidence_type, model_name, metrics)
df_mean.round(4).T.sort_index()
#df_total.sort_values('cl')
#df_mean
#print(df_mean.round(3).T.sort_index().to_latex())

Unnamed: 0,Unnamed: 1,proportion,diversity,dist-mean,dist-max,str-mean,str-max
mean,False,0.1271,70.8,0.6088,0.7896,0.007,0.0263
mean,True,0.2134,55.8333,0.7006,0.8639,0.0084,0.0302
median,False,0.0753,70.0,0.591,0.7942,0.0054,0.0284
median,True,0.1842,30.0,0.6945,0.865,0.0073,0.0267
std,False,0.1247,44.594,0.0749,0.0527,0.0043,0.008
std,True,0.1531,70.5161,0.065,0.0668,0.004,0.0146


In [53]:
#df_total.sort_values('cl')

In [86]:

metrics = [
            'proportion',
            'diversity', 
           'coherence', 
           'dist-mean', 'dist-max', 
           'str-mean', 'str-max'
          ]


#model_name = 'giga_full_updated'
model_name = 'wiki_updated'

evidence_type = 'prop-specific'#, 'non-specific', 'u']
df_mean, df_total = get_comparison_evidence(evidence_type, model_name, metrics)
df_mean.round(4).T.sort_index()
#df_total.sort_values('cl')
df_mean.round(4).T.sort_index()
print(df_mean.round(3).T.sort_index().to_latex())

\begin{tabular}{llrrrrrrr}
\toprule
    &       &  proportion &  diversity &  coherence &  dist-mean &  dist-max &  str-mean &  str-max \\
\midrule
mean & False &       0.015 &      3.900 &      0.159 &      0.683 &     0.740 &     0.010 &    0.015 \\
    & True  &       0.019 &      3.833 &      0.277 &      0.560 &     0.602 &     0.008 &    0.011 \\
median & False &       0.004 &      3.000 &      0.203 &      0.686 &     0.777 &     0.010 &    0.014 \\
    & True  &       0.011 &      4.000 &      0.333 &      0.745 &     0.780 &     0.005 &    0.007 \\
std & False &       0.028 &      3.348 &      0.144 &      0.130 &     0.115 &     0.005 &    0.008 \\
    & True  &       0.026 &      3.326 &      0.228 &      0.341 &     0.367 &     0.010 &    0.012 \\
\bottomrule
\end{tabular}



In [90]:

metrics = [
            'proportion',
            'diversity', 
           'coherence', 
           'dist-mean', 'dist-max', 
           'str-mean', 'str-max'
          ]


model_name = 'giga_full_updated'
#model_name = 'wiki_updated'


evidence_type = 'non-specific'#, 'non-specific', 'u']
df_mean, df_total = get_comparison_evidence(evidence_type, model_name, metrics)
df_mean.round(4).T.sort_index()

#print(df_mean.round(3).T.sort_index().to_latex())
df_total.round(4).sort_values('cl')

Unnamed: 0,proportion,diversity,coherence,dist-mean,dist-max,str-mean,str-max,cl
warm,0.0531561,112,0.138535,0.528577,0.703606,0.00559121,0.0352123,False
black,0.0478677,55,0.13123,0.582553,0.731915,0.00323633,0.0107202,False
red,0.0537634,95,0.139391,0.536143,0.708003,0.00361831,0.0487161,False
dangerous,0.0987433,110,0.133403,0.679184,0.859854,0.00421924,0.0171904,False
round,0.0769231,40,0.217503,0.561775,0.777071,0.00791498,0.0260727,False
cold,0.0619796,67,0.14359,0.564065,0.728242,0.00598912,0.0263585,False
hot,0.261905,22,0.215453,0.622759,0.731384,0.0120889,0.0349191,False
roll,0.0357786,139,0.148745,0.619351,0.793596,0.00380414,0.0186652,False
green,0.16436,95,0.206876,0.585602,0.774663,0.00565863,0.0342444,False
made_of_wood,0.0585242,46,0.136994,0.58646,0.730592,0.0050408,0.0125254,False


In [88]:

metrics = [
            'proportion',
            'diversity', 
           'coherence', 
           'dist-mean', 'dist-max', 
           'str-mean', 'str-max'
          ]


#model_name = 'giga_full_updated'
model_name = 'wiki_updated'


evidence_type = 'u'#, 'non-specific', 'u']
df_mean, df_total = get_comparison_evidence(evidence_type, model_name, metrics)
df_mean.round(4).T.sort_index()

print(df_mean.round(3).T.sort_index().to_latex())

\begin{tabular}{llrrrrrrr}
\toprule
    &       &  proportion &  diversity &  coherence &  dist-mean &  dist-max &  str-mean &  str-max \\
\midrule
mean & False &       0.873 &   1016.100 &      0.108 &      0.588 &     0.813 &     0.003 &    0.029 \\
    & True  &       0.786 &    346.583 &      0.120 &      0.631 &     0.856 &     0.004 &    0.020 \\
median & False &       0.924 &    871.500 &      0.101 &      0.588 &     0.807 &     0.002 &    0.030 \\
    & True  &       0.816 &    189.000 &      0.113 &      0.630 &     0.851 &     0.003 &    0.019 \\
std & False &       0.125 &    966.329 &      0.012 &      0.061 &     0.081 &     0.001 &    0.016 \\
    & True  &       0.153 &    444.579 &      0.022 &      0.059 &     0.072 &     0.002 &    0.006 \\
\bottomrule
\end{tabular}



# Overview relations

In [59]:
import numpy as np

metrics = [
    'proportion', 
#     'coherence',
#     'dist-mean', 
#     'dist-max', 
    'str-mean', 
    'str-max']

columns = ['proportion', 'str-mean', 'str-max', 'n_pairs']
evidence_type = 'prop-specific'
#model_name = 'giga_full_updated'
model_name = 'wiki_updated'

level = 'relations'
df = get_overview(metrics, level, model_name, evidence_type)
# df = utils.raw_to_distance(df_raw, score_names = metrics, reference_name = 'all', 
#                             score = 'raw', sum_scores = ['sum'])

df_strict = df[columns].fillna(0.0)

level = 'relations-hyp'
df = get_overview(metrics, level, model_name, evidence_type)
#df = utils.raw_to_distance(df_raw, score_names = metrics, reference_name = 'all', 
 #                           score = 'raw', sum_scores = ['sum'])

df_hyp = df[columns].fillna(0.0)


dict_total = dict()
all_rels = df_hyp.index



for rel in all_rels:
    row_total = dict()
    if rel in df_strict.index:
        row_strict = df_strict.loc[rel]
    else:
        row_strict = dict()
    if rel in df_hyp.index:
        row_hyp = df_hyp.loc[rel]
    else:
        row_hyp = dict()
    for k in columns:
        k_strict = f'{k}-strict'
        k_hyp = f'{k}-loose'
        if k in row_strict:
            v_strict = row_strict[k]
        else:
            v_strict = np.nan
        v_hyp = row_hyp[k]
        row_total[k_strict] = v_strict
        row_total[k_hyp] = v_hyp
    dict_total[rel] = row_total

columns = ['proportion-strict', 'str-mean-strict', 'str-max-strict', 'n_pairs-strict', 
           'proportion-loose', 'str-mean-loose', 'str-max-loose', 'n_pairs-loose']   
    
df = pd.DataFrame(dict_total).T[columns]
#print(df.round(4).to_latex())
df.round(4)

Unnamed: 0,proportion-strict,str-mean-strict,str-max-strict,n_pairs-strict,proportion-loose,str-mean-loose,str-max-loose,n_pairs-loose
pos,0.0043,0.0052,0.0114,1766.0,0.0043,0.0052,0.0114,1766.0
neg,0.0,0.0013,0.003,1186.0,0.0,0.0013,0.003,1186.0
all,0.0084,0.0056,0.0135,1021.0,0.0084,0.0056,0.0135,1021.0
some,0.0029,0.005,0.0091,745.0,0.0029,0.005,0.0091,745.0
few,0.0,0.0013,0.003,1186.0,0.0,0.0013,0.003,1186.0
evidence,0.0055,0.0053,0.0125,1054.0,0.0055,0.0053,0.0125,1054.0
no_evidence_pos,0.0018,0.0052,0.0106,342.0,0.0018,0.0052,0.0106,342.0
no_evidence_neg,0.0,0.0012,0.0027,1082.0,0.0,0.0012,0.0027,1082.0
implied_category,0.0022,0.0037,0.0043,16.0,0.0045,0.0035,0.0085,106.0
typical_of_concept,0.0353,0.0042,0.0051,4.0,0.0062,0.0096,0.0193,94.0


In [60]:


metrics = [
    'proportion', 
#     'coherence',
#     'dist-mean', 
#     'dist-max', 
    'str-mean', 
    'str-max']

columns = ['proportion', 'str-mean', 'str-max', 'n_pairs']
evidence_type = 'prop-specific'
#model_name = 'giga_full_updated'
model_name = 'wiki_updated'

level = 'relations'
df = get_overview(metrics, level, model_name, evidence_type)


df_strict = df[columns].fillna(0.0)

level = 'relations-hyp'
df = get_overview(metrics, level, model_name, evidence_type)


df_hyp = df[columns].fillna(0.0)


dict_total = dict()
all_rels = df_hyp.index



for rel in all_rels:
    row_total = dict()
    if rel in df_strict.index:
        row_strict = df_strict.loc[rel]
    else:
        row_strict = dict()
    if rel in df_hyp.index:
        row_hyp = df_hyp.loc[rel]
    else:
        row_hyp = dict()
    for k in columns:
        k_strict = f'{k}-strict'
        k_hyp = f'{k}-loose'
        if k in row_strict:
            v_strict = row_strict[k]
        else:
            v_strict = np.nan
        v_hyp = row_hyp[k]
        row_total[k_strict] = v_strict
        row_total[k_hyp] = v_hyp
    dict_total[rel] = row_total

columns = ['proportion-strict', 'str-mean-strict', 'str-max-strict', 'n_pairs-strict', 
           'proportion-loose', 'str-mean-loose', 'str-max-loose', 'n_pairs-loose']   
    
df = pd.DataFrame(dict_total).T[columns]
#print(df.round(4).to_latex())
df.round(4)

#0.0096	0.0075	0.0103	4.0	
#0.0031	0.0051	0.0088	4.0

#0.0338	0.0092	0.0097	4.0	
#0.0026	0.0138	0.0268

Unnamed: 0,proportion-strict,str-mean-strict,str-max-strict,n_pairs-strict,proportion-loose,str-mean-loose,str-max-loose,n_pairs-loose
pos,0.0043,0.0052,0.0114,1766.0,0.0043,0.0052,0.0114,1766.0
neg,0.0,0.0013,0.003,1186.0,0.0,0.0013,0.003,1186.0
all,0.0084,0.0056,0.0135,1021.0,0.0084,0.0056,0.0135,1021.0
some,0.0029,0.005,0.0091,745.0,0.0029,0.005,0.0091,745.0
few,0.0,0.0013,0.003,1186.0,0.0,0.0013,0.003,1186.0
evidence,0.0055,0.0053,0.0125,1054.0,0.0055,0.0053,0.0125,1054.0
no_evidence_pos,0.0018,0.0052,0.0106,342.0,0.0018,0.0052,0.0106,342.0
no_evidence_neg,0.0,0.0012,0.0027,1082.0,0.0,0.0012,0.0027,1082.0
implied_category,0.0022,0.0037,0.0043,16.0,0.0045,0.0035,0.0085,106.0
typical_of_concept,0.0353,0.0042,0.0051,4.0,0.0062,0.0096,0.0193,94.0


## Properties

In [2]:
import relations, utils
from collections import defaultdict, Counter

def load_all_pairs_with_relation(rel):
    
    all_pairs = set()
    
    path_dir = '../data/relations'
    all_files = os.listdir(path_dir)
    all_files_relation = []
    for f in all_files:
        if rel in f:
            all_files_relation.append(f)
    for f in all_files_relation:
        full_path = f'{path_dir}/{f}'
        with open(full_path) as infile:
            lines = infile.read().strip().split('\n')
            pairs = [(l.split(',')[0], l.split(',')[1]) for l in lines]
            all_pairs.update(pairs)
    return all_pairs


def get_prop_rel_cnts(model_name, mode):
    rels =  ['implied_category', 
             'typical_of_concept', 'typical_of_property', 
             'affording_activity', 'afforded_usual', 'afforded_unusual',
             'variability_limited', 'variability_open',
             'variability_limited_scalar', 'variability_open_scalar',
             #'rare', 'unusual', 'impossible', 'creative'
            ]

    label = 'pos'

    prop_rel_dict = defaultdict(dict)
    prop_rel_cnts = dict()
    for rel in rels:
        concept_rel_dict = dict()
        if mode == 'strict':
            pairs = relations.load_relation_pairs([rel], order=True)
        elif mode == 'hyp':
            pairs = relations.load_relation_pairs_hyp([rel])
        elif mode == 'all':
            pairs = load_all_pairs_with_relation(rel)

        for prop, c in pairs:
            prop_rel_dict[prop][c] = rel

    for prop, c_rel_dict in prop_rel_dict.items():
        examples_pos = utils.get_examples(model_name, prop, label)
        concepts_in_corpus = [c for c in c_rel_dict.keys() if c in examples_pos]
        rels_prop = [c_rel_dict[c] for c in concepts_in_corpus]
        rel_cnt = Counter(rels_prop)
        for rel, cnt in rel_cnt.items():
            rel_cnt[rel] = cnt/len(examples_pos)
        prop_rel_cnts[prop] = rel_cnt
    return prop_rel_cnts
    
    
def get_prop_categories():
    
    prop_type_dict = dict()
    with open('../data/property_types.csv') as infile:
        lines = infile.read().strip().split('\n')
    for line in lines[1:]:
        prop, t = line.split(',')
        prop_type_dict[prop] = t
    return prop_type_dict
    
    

In [49]:



metrics = [
    'proportion', 
    'diversity',
#     'coherence',
#     'dist-mean', |
#     'dist-max', 
    'str-mean', 
    'str-max']


evidence_type = 'prop-specific'
model_names = ['giga_full_updated', 'wiki_updated']
#model_name = 'wiki_updated'


level = 'properties'

#prop_rel_cnts = get_prop_rel_cnts(model_name, mode)
prop_type_dict = get_prop_categories()


full_dict = defaultdict(dict)
columns = ['proportion', 'diversity', 'str-mean', 'str-max']#, 'sum', 'bin']
columns_new = []
for model_name in model_names:
    for c in columns:
        c_new = f'{c}-{model_name}'
        columns_new.append(c_new)
    df = get_overview(metrics, level, model_name, evidence_type)
#     df = utils.raw_to_distance(df, score_names = metrics, reference_name = 'median', 
#                            score = 'dist-percent', sum_scores = ['sum', 'bin'])
    for prop,  row in df.iterrows():
        if prop in prop_type_dict and prop != 'female':
            t = prop_type_dict[prop]
            prop_t = (t,  prop)
            for k,  v in row.items():
                k_new  = f'{k}-{model_name}'
                row[k_new] = v
            full_dict[prop_t].update(row)
df_full = pd.DataFrame(full_dict)
df = df_full.T.sort_index()[columns_new].sort_index().round(3)#.sort_values('proportion', ascending=False)

#print(df.to_latex())
df

Unnamed: 0,Unnamed: 1,proportion-giga_full_updated,diversity-giga_full_updated,str-mean-giga_full_updated,str-max-giga_full_updated,proportion-wiki_updated,diversity-wiki_updated,str-mean-wiki_updated,str-max-wiki_updated
color,black,0.002,2.0,0.008,0.014,0.005,4.0,0.007,0.017
color,blue,0.001,2.0,0.008,0.013,0.001,1.0,0.013,0.013
color,green,0.002,1.0,0.015,0.015,0.002,1.0,0.011,0.011
color,red,0.001,1.0,0.024,0.024,0.001,1.0,0.017,0.017
color,yellow,0.019,1.0,0.015,0.015,0.0,0.0,0.0,0.0
encyclopedic,dangerous,0.021,23.0,0.003,0.009,0.01,6.0,0.005,0.008
function-action,fly,0.009,9.0,0.004,0.008,0.091,5.0,0.004,0.005
function-action,roll,0.003,10.0,0.005,0.016,0.003,10.0,0.009,0.027
function-action,swim,0.001,1.0,0.003,0.003,0.001,2.0,0.002,0.003
function-action,used_in_cooking,0.014,8.0,0.01,0.022,0.013,9.0,0.008,0.023


## Relations and properties - correlation checks

In [43]:
import relations
from collections import Counter

def relation_prop_overview(mode = 'strict'):

    rels =  ['pos', 'neg', 'all', 'some', 'few',
                         'evidence', 'no_evidence_pos', 'no_evidence_neg',
                         'implied_category', 
                         'typical_of_concept', 'typical_of_property', 
                         'affording_activity', 'afforded_usual', 'afforded_unusual',
                         'variability_limited', 'variability_open',
                         'variability_limited_scalar', 'variability_open_scalar',
                         'rare', 'unusual', 'impossible', 'creative']

    relation_prop_dict = dict()
    for rel in rels:
        prop_counter = Counter()
        if mode == 'strict':
            pairs = relations.load_relation_pairs([rel], order=True)
        elif mode == 'hyp':
            pairs = relations.load_relation_pairs_hyp([rel])
        for p, c in pairs:
            prop_counter[p] += 1
        prop_counter_norm = dict()
        total = sum(prop_counter.values())
        for prop, c in prop_counter.items():
            prop_counter_norm[prop] = c/total
        relation_prop_dict[rel]  = prop_counter_norm
    df = pd.DataFrame(relation_prop_dict)#.set_index('relation')
    return df

In [44]:
pd.options.display.max_columns = None
df_hyp = relation_prop_overview(mode = 'hyp').T.fillna(0.0)
df_strict = relation_prop_overview(mode = 'strict').T.fillna(0.0)
#df['sum'] = df.sum(axis=1)
df_strict.round(2)

Unnamed: 0,swim,used_in_cooking,juicy,roll,wheels,green,made_of_wood,warm,wings,fly,dangerous,lay_eggs,black,yellow,round,sweet,square,red,blue,cold,hot
pos,0.06,0.06,0.05,0.03,0.04,0.05,0.06,0.07,0.05,0.04,0.04,0.04,0.05,0.02,0.06,0.05,0.05,0.05,0.03,0.04,0.06
neg,0.04,0.05,0.05,0.03,0.02,0.06,0.04,0.03,0.07,0.09,0.05,0.06,0.04,0.07,0.02,0.05,0.02,0.06,0.09,0.02,0.04
all,0.11,0.08,0.06,0.06,0.07,0.03,0.04,0.05,0.07,0.06,0.06,0.07,0.01,0.03,0.04,0.05,0.03,0.02,0.01,0.02,0.05
some,0.0,0.02,0.05,0.01,0.01,0.07,0.07,0.1,0.0,0.0,0.01,0.0,0.09,0.04,0.08,0.06,0.07,0.09,0.06,0.08,0.07
few,0.04,0.05,0.05,0.04,0.02,0.06,0.03,0.03,0.07,0.08,0.05,0.06,0.04,0.09,0.02,0.05,0.02,0.05,0.09,0.02,0.03
evidence,0.04,0.06,0.06,0.02,0.04,0.05,0.07,0.09,0.04,0.04,0.04,0.04,0.05,0.02,0.04,0.07,0.04,0.04,0.02,0.05,0.07
no_evidence_pos,0.1,0.03,0.03,0.05,0.05,0.06,0.02,0.02,0.04,0.03,0.03,0.05,0.07,0.03,0.11,0.03,0.07,0.07,0.08,0.01,0.01
no_evidence_neg,0.04,0.06,0.06,0.03,0.02,0.06,0.03,0.03,0.08,0.09,0.05,0.06,0.04,0.08,0.01,0.05,0.02,0.05,0.09,0.02,0.03
implied_category,0.12,0.0,0.0,0.06,0.19,0.0,0.0,0.06,0.12,0.0,0.06,0.19,0.0,0.0,0.19,0.0,0.0,0.0,0.0,0.0,0.0
typical_of_concept,0.0,0.0,0.25,0.0,0.25,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [47]:
# get top 3 per relation

def get_top_props(df):
    table = []
    for r,  row in df.iterrows():
        p_prop_dict = defaultdict(list)
        for prop, p in row.items():
            p_prop_dict[p].append(prop)
        top_scores = sorted(list(p_prop_dict.keys()), reverse = True)[:3]

        top_props = []
        top_scores_new = []
        for s in top_scores:
            if s > 0.0:
                props = p_prop_dict[s]
                props_str = ' '.join(p_prop_dict[s])
                top_props.append(props_str)
                for i in range(len(props)):
                    top_scores_new.append(str(round(s, 2)))

        d = dict()
        d['relation'] = r
        d['top_prop'] = ' '.join(top_props)
        d['top_p'] = ' '.join(top_scores_new)
        table.append(d)

    df_prop = pd.DataFrame(table).set_index('relation')
    return df_prop


df_prop_strict = get_top_props(df_strict)
df_prop_hyp = get_top_props(df_hyp)

dict_total = dict()
for r, row in df_prop_strict.iterrows():
    row_hyp = df_prop_hyp.loc[r]
    row_new = dict()
    for k, v in row_hyp.items():
        k_new = f'{k}-loose'
        row_new[k_new] = v
    for k, v in row.items():
        k_new = f'{k}-strict'
        row_new[k_new] = v
    dict_total[r] = row_new
df_total = pd.DataFrame(dict_total).T


#print(df_total.to_latex())
df_total

Unnamed: 0,top_prop-loose,top_p-loose,top_prop-strict,top_p-strict
pos,warm used_in_cooking round hot,0.07 0.06 0.06 0.06,warm used_in_cooking round hot,0.07 0.06 0.06 0.06
neg,blue fly yellow,0.09 0.09 0.07,blue fly yellow,0.09 0.09 0.07
all,swim used_in_cooking wings,0.11 0.08 0.07,swim used_in_cooking wings,0.11 0.08 0.07
some,warm black red,0.1 0.09 0.09,warm black red,0.1 0.09 0.09
few,blue yellow fly,0.09 0.09 0.08,blue yellow fly,0.09 0.09 0.08
evidence,warm hot made_of_wood,0.09 0.07 0.07,warm hot made_of_wood,0.09 0.07 0.07
no_evidence_pos,round swim blue,0.11 0.1 0.08,round swim blue,0.11 0.1 0.08
no_evidence_neg,fly blue yellow wings,0.09 0.09 0.08 0.08,fly blue yellow wings,0.09 0.09 0.08 0.08
implied_category,lay_eggs round roll,0.17 0.15 0.11,wheels lay_eggs round swim wings roll warm dan...,0.19 0.19 0.19 0.12 0.12 0.06 0.06 0.06
typical_of_concept,green made_of_wood yellow red dangerous,0.14 0.09 0.09 0.09 0.08,juicy wheels made_of_wood fly,0.25 0.25 0.25 0.25


## Genre

In [63]:
import numpy as np
metrics = [
    'proportion', 
#     'coherence',
#     'dist-mean', 
#     'dist-max', 
    'str-mean', 
    'str-max']

columns = ['proportion', 'str-mean', 'str-max', 'n_pairs']
evidence_type = 'prop-specific'

relations =  [
                        #'pos', 'neg', 'all', 'some', 'few',
                        # 'evidence', 'no_evidence_pos', 'no_evidence_neg',
                         'implied_category', 
                         #'typical_of_concept', 'typical_of_property', 
                         'affording_activity', 'afforded_usual', 'afforded_unusual',
                         #'variability_limited', 'variability_open',
                         #'variability_limited_scalar', 'variability_open_scalar',
                         #'rare', 'unusual', 'impossible', 'creative'
]

model_names = ['giga_full_updated', 'wiki_updated']
dict_total = dict()
for model_name in model_names:
    level = 'relations'
    df = get_overview(metrics, level, model_name, evidence_type)
    df_strict = df[columns].fillna(0.0)

    level = 'relations-hyp'
    df = get_overview(metrics, level, model_name, evidence_type)
    df_hyp = df[columns].fillna(0.0)
    

    for rel in relations:
        row_total = dict()
        if rel in df_strict.index:
            row_strict = df_strict.loc[rel]
        else:
            row_strict = dict()
        if rel in df_hyp.index:
            row_hyp = df_hyp.loc[rel]
        else:
            row_hyp = dict()
        for k in columns:
            k_strict = f'{k}-strict'
            k_hyp = f'{k}-loose'
            if k in row_strict:
                v_strict = row_strict[k]
            else:
                v_strict = np.nan
            if k in row_hyp:
                v_hyp = row_hyp[k]
            else:
                v_hyp = np.nan
            row_total[k_strict] = v_strict
            row_total[k_hyp] = v_hyp
        dict_total[(rel, model_name)] = row_total
    



columns = ['proportion-strict', 'str-mean-strict', 'str-max-strict', 'n_pairs-strict', 
           'proportion-loose', 'str-mean-loose', 'str-max-loose', 'n_pairs-loose']   
    
df = pd.DataFrame(dict_total).T[columns]
#print(df.round(4).to_latex())
print(df.round(4).sort_index().to_latex())

\begin{tabular}{llrrrrrrrr}
\toprule
                 &              &  proportion-strict &  str-mean-strict &  str-max-strict &  n\_pairs-strict &  proportion-loose &  str-mean-loose &  str-max-loose &  n\_pairs-loose \\
\midrule
afforded\_unusual & giga\_full\_updated &             0.0013 &           0.0013 &          0.0017 &            19.0 &            0.0012 &          0.0013 &         0.0017 &           41.0 \\
                 & wiki\_updated &             0.0010 &           0.0012 &          0.0018 &            20.0 &            0.0014 &          0.0016 &         0.0027 &           42.0 \\
afforded\_usual & giga\_full\_updated &             0.0714 &           0.0552 &          0.0721 &             2.0 &            0.0057 &          0.0021 &         0.0032 &           31.0 \\
                 & wiki\_updated &             0.0000 &           0.0000 &          0.0000 &             3.0 &            0.0000 &          0.0012 &         0.0035 &           54.0 \\
affording\_activity &