# Analyze based on semantic categories

1.) change tfidf so we compare equivalent categories only - done
2.) update ranking accordingly

In [1]:
import os
from collections import Counter, defaultdict
import csv
import pandas as pd
import numpy as np


pd.set_option('display.max_rows', None)
from sklearn.metrics import precision_recall_fscore_support

In [3]:
f_original = os.listdir('../contexts/giga_full/vocab')
print(len(f_original))
f_update = os.listdir('../contexts/giga_full_updated/vocab')
print(len(f_update))

1446
1636


In [2]:
f_original = os.listdir('../contexts/wiki/vocab')
print(len(f_original))
f_update = os.listdir('../contexts/wiki_updated/vocab')
print(len(f_update))

1669
1874


In [4]:
def get_categories(prop, model_name):
    analysis_type = 'tfidf-raw-10000/each_target_vs_corpus_per_category'
    path_dir = f'../results/{model_name}/{analysis_type}'
    path_dir = f'{path_dir}/{prop}'
    categories = set()
    for d in os.listdir(path_dir):
        categories.add(d)
    return categories

def get_context_cnts(prop, cat, label, model_name):
    
    analysis_type = 'tfidf-raw-10000/each_target_vs_corpus_per_category'
    path_dir = f'../results/{model_name}/{analysis_type}'
    path_dir = f'{path_dir}/{prop}'
    path_label = f'{path_dir}/{cat}/{label}'
    
    context_cnt = Counter()
    for f in os.listdir(path_label):
        full_path = f'{path_label}/{f}'
        if full_path.endswith('.csv'):
            with open(full_path) as infile:
                data = list(csv.DictReader(infile))
            for d in data:
                context = d['']
                diff = float(d['diff'])
                if diff > 0:
                    context_cnt[context] += 1
    return context_cnt
    
def get_n_concepts_total(prop, cat, model_name):
    
    analysis_type = 'tfidf-raw-10000/each_target_vs_corpus_per_category'
    path_dir = f'../results/{model_name}/{analysis_type}'
    path_dir = f'{path_dir}/{prop}'
    label = 'pos'
    path_pos = f'{path_dir}/{cat}/{label}'
    label = 'neg'
    path_neg = f'{path_dir}/{cat}/{label}'
    
    files_pos = [f for f in os.listdir(path_pos) if f.endswith('.csv')]
    files_neg = [f for f in os.listdir(path_neg) if f.endswith('.csv')]
    
    return len(files_pos), len(files_neg)

def get_f1_distinctiveness(n_pos, n_neg, total_pos, total_neg):
    
   
    total_instances = total_pos + total_neg
    labels = []
    [labels.append('pos') for i in range(total_pos)]
    [labels.append('neg') for i in range(total_neg)]
    pred_labels_pos = []
    for i in range(total_pos):
        if i < n_pos:
            pred_labels_pos.append('pos')
        else:
            pred_labels_pos.append('neg')
#     print(n_pos, total_pos)
#     print(pred_labels_pos.count('pos'), pred_labels_pos.count('neg'))
    
    pred_labels_neg = []
    for i in range(total_neg):
        if i < n_neg:
            pred_labels_neg.append('pos')
        else:
            pred_labels_neg.append('neg')
#     print(n_neg, total_neg)
#     print(pred_labels_neg.count('pos'), pred_labels_neg.count('neg'))
    
    predictions = pred_labels_pos + pred_labels_neg
    
    
    #print(len(labels), len(predictions))
    #print(pos_predictions, neg_predictions)
    
    p, r, f1, supp = precision_recall_fscore_support(labels, predictions, average = 'weighted', 
                                                     zero_division=0)
    #average='weighted'
    
    return p, r, f1


    
def aggregate_contexts(prop, cutoff, model_name):
    aggregation_name = 'aggregated-tfidf-raw-10000-categories'
    path_dir_agg = f'../analysis/{model_name}/{aggregation_name}/{prop}'
    os.makedirs(path_dir_agg, exist_ok = True)
    
    context_cnts_all = Counter()
    context_cat_dict = defaultdict(set)

    cats = get_categories(prop, model_name)

    for cat in cats:
        context_cnts_pos = get_context_cnts(prop, cat, 'pos', model_name)
        context_cnts_neg = get_context_cnts(prop, cat, 'neg', model_name)
        total_pos, total_neg = get_n_concepts_total(prop, cat, model_name)
        
        context_f1_dict = Counter()
        context_score_dict = defaultdict(dict)
        
        # get distinctiveness
        for c, cnt_pos in context_cnts_pos.most_common():
            cnt_neg = context_cnts_neg[c]
            p, r, f1 = get_f1_distinctiveness(cnt_pos, cnt_neg, total_pos, total_neg)
            context_f1_dict[c] = f1
            context_score_dict[c] = {'p': p,'r':r, 'f1': f1}
        
        table = []
        for c, f1 in context_f1_dict.most_common():
            scores = context_score_dict[c]
            d = dict()
            d['context'] = c
            d.update(scores)
            d['n_pos'] = context_cnts_pos[c]
            d['total_pos'] = total_pos
            d['n_neg'] = context_cnts_neg[c]
            d['total_neg'] = total_neg
            table.append(d)
        
        # collect and write to file
        f = f'{path_dir_agg}/{cat}.csv'
        
        header = table[0].keys()
        with open(f, 'w') as outfile:
            writer = csv.DictWriter(outfile, fieldnames = header)
            writer.writeheader()
            for d in table:
                writer.writerow(d)
        
                
def prepare_annotation(prop, model_name, cutoff=3, cutoff_concepts = 5):
    
    annotation_name = f'annotation-tfidf-top_{cutoff}_{cutoff_concepts}-raw-10000-categories'
    path_dir_annotation = f'../analysis/{model_name}/{annotation_name}/{prop}'
    os.makedirs(path_dir_annotation, exist_ok = True)
    f_annotation = f'../analysis/{model_name}/{annotation_name}/{prop}/annotation-updated.csv'
    
    # paths aggregated files:
    aggregation_name = 'aggregated-tfidf-raw-10000-categories'
    path_dir_agg = f'../analysis/{model_name}/{aggregation_name}/{prop}'

    
    # get categories
    cats = get_categories(prop, model_name)
    
    # collect all contexts and categories 
    context_cats_dict = defaultdict(set)
    
    # load top per category
    for cat in cats:
        path = f'{path_dir_agg}/{cat}.csv'
        with open(path) as infile:
            data = list(csv.DictReader(infile))
        # sort by f1
        f1_dict  = defaultdict(list)
        for d in data:
            f1 = d['f1']
            f1_dict[f1].append(d)
        scores = sorted(list(f1_dict.keys()), reverse=True)
        top_scores = scores[:cutoff]
        top_context_dicts = []
        for ts in top_scores:
            dicts = f1_dict[ts]
            for d in dicts:
                n_pos = int(d['n_pos'])
                if n_pos > cutoff_concepts:
                    top_context_dicts.append(d)
    
        contexts = [d['context'] for d in top_context_dicts]
        # record categories
        for c in contexts:
            context_cats_dict[c].add(cat)
    
    with open(f_annotation, 'w') as outfile:
        outfile.write('context,evidence_type,categories\n')
        for c, cats in context_cats_dict.items():
            outfile.write(f'{c}, ,{" ".join(cats)}\n')

def get_properties():
    properties = []
    for path in os.listdir('../data/aggregated/'):
        prop = path.split('.')[0]
        if 'female-' not in prop and prop != '':
            properties.append(prop)
    return properties

def get_top_distinctive_contexts(properties, model_name, top_cutoff=3, concept_cutoff=3):
    aggregation_name = 'aggregated-tfidf-raw-10000-categories'
    ann_name = f'annotation-tfidf-top_{top_cutoff}_{concept_cutoff}-raw-10000-categories'
    path_results = f'../results/{model_name}/tfidf-raw-10000/each_target_vs_corpus_per_category'
    table = []
    for prop in properties:
        path_dir_agg = f'../analysis/{model_name}/{aggregation_name}/{prop}'
        path = path = f'{path_dir_agg}/all.csv'
        # load file containing all contexts
        with open(path) as infile:
            data = list(csv.DictReader(infile))
        # top distinctive context
        d_prop = dict()
        d_prop['property'] = prop
        # sort data by f1
        f1_dict = defaultdict(list)
        for d in data:
            f1 = d['f1']
            f1_dict[f1].append(d)
            
        # get n extracted candidates
        f_ann =  f'../analysis/{model_name}/{ann_name}/{prop}/annotation-updated.csv'
        with open(f_ann) as infile:
            data = list(csv.DictReader(infile))
        n_contexts = len(data)
        
        # get number concepts
        dir_results = f'{path_results}/{prop}/all/pos/'
        n_files = len([f for f in os.listdir(dir_results) if f.endswith('.csv')])
        
        top_score = max(list(f1_dict.keys()))
        top_dicts = f1_dict[top_score]
        top_context_dict = top_dicts[0]
        top_contexts = ' '.join([d['context'] for d in top_dicts])
        d_prop['n_contexts'] = n_contexts
        d_prop['n_concepts'] = n_files

        for k, v in top_context_dict.items():
            if k != 'context':
                v = float(v)
                d_prop[k] = v
        d_prop['contexts'] = top_contexts
        table.append(d_prop)
    return table

In [6]:
model_name = 'wiki_updated'
properties = get_properties()
#properties_test = ['dangerous', 'cold', 'lay_eggs']
#properties = [p for p in properties if p not in properties_test]
#properties = properties_test
cutoff = 3
cutoff_concepts = 3

for prop in properties:
    print(prop)
    
    aggregate_contexts(prop, cutoff, model_name)
    prepare_annotation(prop, model_name, cutoff, cutoff_concepts) 

square
warm
black
red
fly
dangerous
wings
sweet
hot
used_in_cooking
juicy
green
made_of_wood
blue
yellow
roll
female
cold
round
wheels
lay_eggs
swim


In [32]:
# get top distinctive contexts per prop

model_name = 'wiki_updated'
properties = get_properties()
table = get_top_distinctive_contexts(properties, model_name)
df = pd.DataFrame(table)
df.sort_values('f1', ascending = False).round(2)

Unnamed: 0,property,n_contexts,n_concepts,p,r,f1,n_pos,total_pos,n_neg,total_neg,contexts
9,used_in_cooking,705,102,0.93,0.92,0.92,88.0,102.0,0.0,64.0,meat
6,wings,332,81,0.91,0.89,0.89,64.0,81.0,1.0,84.0,birds
4,fly,55,63,0.89,0.89,0.88,47.0,63.0,3.0,104.0,bird
16,female,109,122,0.88,0.88,0.87,98.0,122.0,10.0,150.0,she
13,blue,1819,60,0.88,0.85,0.83,34.0,60.0,0.0,109.0,evening
19,wheels,105,78,0.89,0.82,0.83,59.0,78.0,0.0,27.0,chassis
15,roll,3485,55,0.83,0.82,0.83,45.0,55.0,7.0,42.0,from
14,yellow,111,42,0.85,0.83,0.81,21.0,42.0,1.0,85.0,tribe
0,square,442,90,0.89,0.78,0.81,67.0,90.0,1.0,21.0,built
11,green,550,91,0.84,0.81,0.81,64.0,91.0,4.0,68.0,green


In [33]:
# latex table for paper:
cols = ['property', 'n_concepts', 'n_contexts', 'f1', 'contexts']
df = df.sort_values('f1', ascending = False).round(2)
print(df[cols].to_latex(index=False))

\begin{tabular}{lrrrl}
\toprule
        property &  n\_concepts &  n\_contexts &    f1 &    contexts \\
\midrule
 used\_in\_cooking &         102 &         705 &  0.92 &        meat \\
           wings &          81 &         332 &  0.89 &       birds \\
             fly &          63 &          55 &  0.88 &        bird \\
          female &         122 &         109 &  0.87 &         she \\
            blue &          60 &        1819 &  0.83 &     evening \\
          wheels &          78 &         105 &  0.83 &     chassis \\
            roll &          55 &        3485 &  0.83 &        from \\
          yellow &          42 &         111 &  0.81 &       tribe \\
          square &          90 &         442 &  0.81 &       built \\
           green &          91 &         550 &  0.81 &       green \\
       dangerous &          76 &         627 &  0.79 &     killing \\
        lay\_eggs &          72 &          26 &  0.79 &        bird \\
             hot &         102 &          92

In [14]:
# get top distinctive contexts per prop

# model_name = 'wiki_updated'
# properties = get_properties()
# table = get_top_distinctive_contexts(properties, model_name)
# df = pd.DataFrame(table)
# df.sort_values('f1', ascending = False).round(2)

### Transfer old annotations to new files


In [2]:
properties = get_properties()
model_name_current = 'giga_full_updated'
model_name_old = 'giga_full'


for prop in properties:
    # current file:
    annotation_name = 'annotation-tfidf-top20-raw-10000-categories'
    path_dir_annotation = f'../analysis/{model_name}/{annotation_name}/{prop}'
    f_annotation_new = f'{path_dir_annotation}/annotation.csv'
    f_annotation_tr = f'{path_dir_annotation}/annotation-transferred.csv'

    # old file:
    annotation_name = 'annotation-tfidf-top20-raw-10000'
    path_dir_annotation = f'../analysis/{model_name_old}/{annotation_name}/{prop}-pos'
    f_annotation_old = f'{path_dir_annotation}/annotation-done.csv'

    # load old annotations
    context_annotation_dict=dict()
    with open(f_annotation_old) as infile:
        data = list(csv.DictReader(infile))
        for d in data:
            c = d['context']
            et = d['evidence']
            context_annotation_dict[c] = et
            #c = d['context']

    # load new candidates

    with open(f_annotation_new) as infile:
        data = list(csv.DictReader(infile))

    # fill in old annotations
    for d in data:
        c = d['context']
        if c in context_annotation_dict:
            et = context_annotation_dict[c]
        else:
            et = 'NA'
        d['evidence_type'] = et

    # write to new file

    with open(f_annotation_tr, 'w') as outfile:
        writer = csv.DictWriter(outfile, fieldnames = data[0].keys())
        writer.writeheader()
        for d in data:
            writer.writerow(d)

NameError: name 'get_properties' is not defined

In [29]:
# transfer new annotations to updated f1 scores

properties = get_properties()
#properties = ['dangerous']
model_name_current = 'giga_full_updated'
model_name_old = 'giga_full'


for prop in properties:
    # current file:
    annotation_name = 'annotation-tfidf-top_3_3-raw-10000-categories'
    path_dir_annotation = f'../analysis/{model_name}/{annotation_name}/{prop}'
    f_annotation_new = f'{path_dir_annotation}/annotation-updated.csv'
    f_annotation_tr = f'{path_dir_annotation}/annotation-transferred-updated.csv'

    # old file:
    annotation_name = 'annotation-tfidf-top_3_5-raw-10000-categories'
    path_dir_annotation = f'../analysis/{model_name}/{annotation_name}/{prop}'
    f_annotation_old = f'{path_dir_annotation}/annotation-updated-done.csv'

    # load old annotations
    if os.path.isfile(f_annotation_old):
        print('found file')
        context_annotation_dict=dict()
        with open(f_annotation_old) as infile:
            data = list(csv.DictReader(infile))
            for d in data:
                c = d['context']
                et = d['evidence_type']
                context_annotation_dict[c] = et
                #c = d['context']

        # load new candidates

        with open(f_annotation_new) as infile:
            data = list(csv.DictReader(infile))

        # fill in old annotations
        for d in data:
            c = d['context']
            if c in context_annotation_dict:
                et = context_annotation_dict[c]
            else:
                et = 'NA'
            d['evidence_type'] = et

        # write to new file

        with open(f_annotation_tr, 'w') as outfile:
            writer = csv.DictWriter(outfile, fieldnames = data[0].keys())
            writer.writeheader()
            for d in data:
                writer.writerow(d)

found file
found file
found file


In [12]:
# transfer giga annotations to wiki

properties = get_properties()
#properties = ['dangerous']
model_name_current = 'wiki_updated'
model_name_old = 'giga_full_updated'


for prop in properties:
    print(prop)
    # current file:
    annotation_name = 'annotation-tfidf-top_3_3-raw-10000-categories'
    path_dir_annotation = f'../analysis/{model_name_current}/{annotation_name}/{prop}'
    f_annotation_new = f'{path_dir_annotation}/annotation-updated.csv'
    f_annotation_tr = f'{path_dir_annotation}/annotation-transferred-updated.csv'

    # old file:
    annotation_name = 'annotation-tfidf-top_3_3-raw-10000-categories'
    path_dir_annotation = f'../analysis/{model_name_old}/{annotation_name}/{prop}'
    f_annotation_old = f'{path_dir_annotation}/annotation-updated-done.csv'

    # load old annotations
    if os.path.isfile(f_annotation_old):
        print('found file')
        context_annotation_dict=dict()
        with open(f_annotation_old) as infile:
            data = list(csv.DictReader(infile))
            for d in data:
                c = d['context']
                et = d['evidence_type']
                context_annotation_dict[c] = et
                #c = d['context']

        # load new candidates

        with open(f_annotation_new) as infile:
            data = list(csv.DictReader(infile))

        # fill in old annotations
        for d in data:
            c = d['context']
            if c in context_annotation_dict:
                et = context_annotation_dict[c]
            else:
                et = 'NA'
            d['evidence_type'] = et

        # write to new file

        with open(f_annotation_tr, 'w') as outfile:
            writer = csv.DictWriter(outfile, fieldnames = data[0].keys())
            writer.writeheader()
            for d in data:
                writer.writerow(d)

square
found file
warm
found file
black
found file
red
found file
fly
found file
dangerous
found file
wings
found file
sweet
found file
hot
found file
used_in_cooking
found file
juicy
found file
green
found file
made_of_wood
found file
blue
found file
yellow
found file
roll
found file
female
found file
cold
found file
round
found file
wheels
found file
lay_eggs
found file
swim
found file


### Complete annotations

In [13]:
from collections import defaultdict
import os
import csv

In [30]:
def get_annotation_status(model_name, top_cutoff, concept_cutoff):
    dir_path = f'../analysis/{model_name}'
    dir_annotations = f'{dir_path}/annotation-tfidf-top_{top_cutoff}_{concept_cutoff}-raw-10000-categories'
    annotation_dict = defaultdict(set)
    line_dict = dict()

    for f in os.listdir(dir_annotations):
        if  not f.endswith('.csv') and not f.endswith('.ipynb_checkpoints'):
            prop = f.split('/')[-1]
            full_path = f'{dir_annotations}/{f}'
            
            #print(full_path)
            # get categories:
            files = os.listdir(full_path)
            # get number of words
            path_file = f'{full_path}/annotation-transferred-updated.csv'
            with open(path_file) as infile:
                lines = infile.read().strip().split('\n')
                not_annotated = [l for l in lines if l.strip().split(',')[1] == 'NA']
            line_dict[prop] = (len(lines), len(not_annotated), len(lines)-len(not_annotated))
            if 'annotation-updated-done.csv' in files:
                annotation_dict['complete'].add(prop)
            else:
                annotation_dict['incomplete'].add(prop)
                
    return annotation_dict, line_dict

def show_annotation_status(model_name, top_cutoff, concept_cutoff):
    annotation_dict, line_dict = get_annotation_status(model_name, 
                                        top_cutoff, concept_cutoff)
    # same category not annotated:
    print('completed:\n')
    for prop in sorted(list(annotation_dict['complete'])):
        # cats open:
        print(prop, line_dict[prop])
    print()
    print('Incomplete:\n')
    for prop in sorted(annotation_dict['incomplete']):
        if prop not in annotation_dict['complete']:
            print(prop, line_dict[prop])
    return annotation_dict
            
            
def get_evidence_dict(model_name, prop, top_cutoff, concept_cutoff):
    
    annotation_name = f'annotation-tfidf-top_{top_cutoff}_{concept_cutoff}-raw-10000-categories'
    path_dir_annotation = f'../analysis/{model_name}/{annotation_name}/{prop}'
    f_annotation = f'{path_dir_annotation}/annotation-updated-done.csv'
    
    ev_dict = dict()
    
    with open(f_annotation) as infile:
        data = list(csv.DictReader(infile))
    for d in data:
        et = d['evidence_type']
        ev = d['context']
        ev_dict[ev] = et
    return ev_dict
        
            

def get_evidence_distribution(model_name, prop, top_cutoff, concept_cutoff):
    
    # current file:
    annotation_name = f'annotation-tfidf-top_{top_cutoff}_{concept_cutoff}-raw-10000-categories'
    path_dir_annotation = f'../analysis/{model_name}/{annotation_name}/{prop}'
    f_annotation = f'{path_dir_annotation}/annotation-updated-done.csv'
    
    ev_dict = get_evidence_dict(model_name, prop, top_cutoff, concept_cutoff)
    
    ev_cnts = Counter()
    
    for e, et in ev_dict.items():
        ev_cnts[et] += 1
        if et != 'u':
            ev_cnts['all'] += 1
        if et in ['p', 'l', 'n']:
            ev_cnts['prop_specific'] += 1
        elif et in ['i', 'r', 'b']:
            ev_cnts['non-specific'] += 1
    
    total_contexts = len(ev_dict)
    
    ev_counts_norm = dict()
    for ev, cnt in ev_cnts.items():
        ev_counts_norm[ev]  = cnt/total_contexts
    return ev_counts_norm

In [31]:
model_name = 'wiki_updated'
top_cutoff = 3
concept_cutoff = 3
ann_dict = show_annotation_status(model_name, top_cutoff, concept_cutoff)

completed:


Incomplete:

black (821, 634, 187)
blue (1820, 1018, 802)
cold (591, 382, 209)
dangerous (628, 392, 236)
female (110, 97, 13)
fly (56, 41, 15)
green (551, 402, 149)
hot (93, 78, 15)
juicy (123, 59, 64)
lay_eggs (27, 18, 9)
made_of_wood (498, 373, 125)
red (1276, 781, 495)
roll (3486, 1464, 2022)
round (1035, 886, 149)
square (443, 214, 229)
sweet (27, 20, 7)
swim (1701, 1225, 476)
used_in_cooking (706, 492, 214)
warm (1182, 573, 609)
wheels (106, 66, 40)
wings (333, 234, 99)
yellow (112, 108, 4)


In [15]:
model_name = 'giga_full_updated'
properties = ann_dict['complete']
top_cutoff = 3
concept_cutoff = 3

cols = ['property', 'u', 'all', 'prop_specific', 'non-specific', 'p', 'n', 'l', 'i', 'r', 'b']
table = []
for prop in properties:
    d = dict()
    d['property'] =  prop
    d.update(get_evidence_distribution(model_name, prop, top_cutoff, concept_cutoff))
    for c in cols:
        if c not in d:
            d[c] = np.nan
    table.append(d)
   
cols = ['property', 'u', 'all', 'prop_specific', 'non-specific', 'p', 'n', 'l', 'i', 'r', 'b']
df = pd.DataFrame(table)[cols]
df = df[cols].sort_values('all', ascending = False).round(3)
df

Unnamed: 0,property,u,all,prop_specific,non-specific,p,n,l,i,r,b
4,used_in_cooking,0.357,0.643,0.014,0.629,0.007,0.003,0.003,0.302,0.327,
1,sweet,0.4,0.6,0.057,0.543,0.057,,,0.543,,
6,hot,0.702,0.298,0.036,0.262,0.024,0.012,,0.167,0.095,
7,lay_eggs,0.703,0.297,0.027,0.27,0.027,,,0.176,0.095,
21,female,0.711,0.289,0.044,0.244,,,0.044,0.156,0.022,0.067
10,green,0.839,0.161,0.002,0.159,0.002,,,0.145,0.014,
19,wheels,0.84,0.16,0.016,0.143,0.008,,0.008,0.049,0.094,
13,wings,0.848,0.152,0.011,0.139,0.002,,0.009,0.045,0.068,0.027
17,juicy,0.88,0.12,0.002,0.117,0.002,,,0.102,0.015,
3,dangerous,0.883,0.117,0.021,0.096,0.002,0.006,0.013,0.037,0.054,0.005


In [17]:
#print(df.round(3).fillna('-').to_latex(index=False))

## Evidence strength and distinctiveness

In [68]:
from statistics import stdev
import pandas as pd

def get_categories(prop, model_name):
    analysis_type = 'tfidf-raw-10000/each_target_vs_corpus_per_category'
    path_dir = f'../results/{model_name}/{analysis_type}'
    path_dir = f'{path_dir}/{prop}'
    categories = set()
    for d in os.listdir(path_dir):
        if '.' not in d:
            categories.add(d)
    return categories

In [104]:
# get all mean tfidf values per evidence word and category ranked by f1-score


# table (1 per property)

# word, evidence_type, f1-score, all, cat1, cat2, cat3 

# get mean tfidf score + std

def get_tfidf_scores(prop, model_name, evidence_dict):
    categories = get_categories(prop, model_name)
    dir_tfidf = f'../results/{model_name}/tfidf-raw-10000/each_target_vs_corpus_per_category/{prop}'
    word_cat_mean_dict = defaultdict(dict)
    #n_concepts_cat = []
    for cat in categories:
        dir_cat_tfidf = f'{dir_tfidf}/{cat}/pos'
        f_concepts = os.listdir(dir_cat_tfidf)
        word_tfidfs = defaultdict(list)
        word_concepts = defaultdict(list)
        n_concepts_total = 0
        for f in f_concepts:
            full_f = f'{dir_cat_tfidf}/{f}'
            if full_f.endswith('.csv'):
                n_concepts_total += 1
                with open(full_f) as infile:
                    data = list(csv.DictReader(infile))
                for d in data:
                    word = d['']
                    if word in evidence_dict and float(d['diff']) > 0:
                        word_tfidfs[word].append(float(d['target']))
                        word_concepts[word].append(full_f.split('/')[-1].split('.')[0])
        for word, tfidfs in word_tfidfs.items():
            if len(tfidfs) > 0:
                mean = sum(tfidfs)/len(tfidfs)
            else:
                mean = 0
            word_cat_mean_dict[word]['ev_type'] = evidence_dict[word]
            word_cat_mean_dict[word][cat] = mean
    return word_cat_mean_dict

    

prop = 'fly'
model_name = 'giga_full_updated'
top_cutoff = 3
concept_cutoff = 3
evidence_dict = get_evidence_dict(model_name, prop, top_cutoff, concept_cutoff)
# make selection
evidence_dict_reduced = dict()
selected_words = ['fly', 'plane', 'landing']
for w in selected_words:
    evidence_dict_reduced[w] = evidence_dict[w]
# 
word_cat_dict_tfidf = get_tfidf_scores(prop, model_name, evidence_dict_reduced)
df = pd.DataFrame(word_cat_dict_tfidf)
df.round(2).T.sort_values('all', ascending=False)

Unnamed: 0,ev_type,mammal,no-cat,relation,object,communication,measure,bird,all,fish,vehicle,food,animal
landing,l,0.00109967,0.0384938,0.0305578,0.0599175,0.0615637,,0.00984135,0.0562959,0.0237487,0.0914492,0.0214683,0.0101632
fly,p,0.0016877,0.0211931,0.0264586,0.0184811,0.022011,0.00223312,0.0110777,0.018222,0.0025445,0.0410519,0.0033475,0.00963089
plane,i,0.00468728,0.0181112,0.0435988,0.0142364,0.00361045,0.0122682,0.00622109,0.0149158,0.00102178,0.026819,0.00460734,0.00695006


## Evidence distribution per semantic category

In [34]:
import json
import csv
import os
from collections import Counter, defaultdict
import pandas as pd
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [86]:
def load_prop_data(prop):
    
    path = f'../data/aggregated_semantic_info/{prop}.json'
    with open(path) as infile:
        concept_dict = json.load(infile)
    return concept_dict


def load_concept_evidence(concept, prop, model_name, categories):
    
    categories.add('all')
    contexts = set()
    dir_path = f'../results/{model_name}/tfidf-raw-10000/each_target_vs_corpus_per_category'
    
    for cat in categories:
        f_path = f'{dir_path}/{prop}/{cat}/pos/{concept}.csv'
        if os.path.isfile(f_path):
            with open(f_path) as infile:
                data = list(csv.DictReader(infile))
            for d in data:
                context = d['']
                diff = float(d['diff'])
                if diff > 0:
                    contexts.add(context)
    return contexts  

def get_categories(prop, model_name):
    analysis_type = 'tfidf-raw-10000/each_target_vs_corpus_per_category'
    path_dir = f'../results/{model_name}/{analysis_type}'
    path_dir = f'{path_dir}/{prop}'
    categories = set()
    for d in os.listdir(path_dir):
        if '.' not in d:
            categories.add(d)
    return categories


def get_top_ev_categories(prop, model_name, top_cutoff, concept_cutoff):
    table = dict()
    aggregation_name = f'aggregated-tfidf-raw-10000-categories'
    categories = get_categories(prop, model_name)
    
    path_dir_agg = f'../analysis/{model_name}/{aggregation_name}/{prop}'
    evidence_dict = get_evidence_dict(model_name, prop, top_cutoff, concept_cutoff)
    
    et_context_dict = defaultdict(set)
    for c, et in evidence_dict.items():
        et_context_dict[et].add(c)
    
    # get top performance per evidence type for each category
    for cat in categories:
        path = path = f'{path_dir_agg}/{cat}.csv'
        # load file containing all concepts and simply load first one
        with open(path) as infile:
            data = list(csv.DictReader(infile))
        # sort by performance:
        perf_data = defaultdict(list)
        for d in data:
            f1 = d['f1']
            perf_data[f1].append(d)
        perf_ranked = sorted(list(perf_data.keys()), reverse = True)
        for et, contexts in et_context_dict.items():
            for f1 in perf_ranked:
                data = perf_data[f1]
                d_perf = dict()
                for k, v in d.items():
                    if k != 'context':
                        d_perf[k] = round(float(v), 2)
                contexts_ev = set()
                for d in data:
                    context = d['context']
                    if context in contexts:
                        contexts_ev.add(context)
                if contexts_ev:
                    d_perf['n_c'] = len(contexts_ev)
                    d_perf['contexts'] = ' '.join(contexts_ev)
                    table[(cat, et)] = d_perf
                    break
                
    return table

In [115]:
prop = 'female'
model_name = 'giga_full_updated'
top_cutoff = 3
concept_cutoff = 3
table = get_top_ev_categories(prop, model_name, top_cutoff, concept_cutoff)
df = pd.DataFrame(table)
df.T

Unnamed: 0,Unnamed: 1,p,r,f1,n_pos,total_pos,n_neg,total_neg,n_c,contexts
mammal,i,0.89,0.86,0.86,7,10,0,12,4,herself actress her lady
mammal,u,0.89,0.86,0.86,7,10,0,12,1,grass
mammal,r,0.86,0.82,0.81,6,10,0,12,1,birth
mammal,l,0.89,0.86,0.86,7,10,0,12,2,pregnant pregnancy
mammal,b,0.72,0.68,0.66,4,10,1,12,1,baby
no-cat,i,1.0,1.0,1.0,1,1,0,17,1,herself
no-cat,u,0.97,0.94,0.95,1,1,1,17,3,about media so
no-cat,l,0.96,0.89,0.91,1,1,2,17,1,pregnancy
no-cat,b,0.96,0.89,0.91,1,1,2,17,2,baby beautiful
relation,i,0.17,0.25,0.2,1,2,2,2,5,girl her she actress lady


In [116]:
# get mean per cat

properties = get_properties()
prop_table = []
for prop in properties:
    table = get_top_ev_categories(prop, model_name, top_cutoff, concept_cutoff)
    type_scores = defaultdict(list)
    ev_type_mean_scores = dict()
    for (cat, ev_type), f1_dict in table.items():
        type_scores[ev_type].append(f1_dict['f1'])
    for ev_type, scores in type_scores.items():
        mean = sum(scores)/len(scores)
        prop_dict = dict()
        ev_type_mean_scores[ev_type] = mean
    prop_dict['property'] = prop
    prop_dict.update(ev_type_mean_scores)
    prop_table.append(prop_dict)
    
cols = ['property', 'p', 'n', 'l', 'i', 'r', 'b', 'u']
df = pd.DataFrame(prop_table)
df = df[cols].sort_values('p', ascending = False).round(2).fillna('-')

In [117]:
df

Unnamed: 0,property,p,n,l,i,r,b,u
9,used_in_cooking,0.97,0.81,0.94,0.37,0.98,-,0.94
19,wheels,0.92,-,0.87,0.92,0.2,-,0.86
15,roll,0.9,-,0.83,0.9,0.93,-,0.28
4,fly,0.89,0.82,0.84,0.88,0.9,0.78,0.44
6,wings,0.88,-,0.86,0.84,0.9,0.81,0.51
20,lay_eggs,0.86,-,-,0.51,0.91,-,0.91
7,sweet,0.86,-,-,0.55,-,-,0.88
11,green,0.85,-,-,0.9,0.85,-,0.29
5,dangerous,0.85,0.91,0.88,0.95,0.95,0.82,0.32
10,juicy,0.85,-,-,0.9,0.87,-,0.53


In [118]:
print(df.to_latex(index=False))

\begin{tabular}{llllrllr}
\toprule
        property &     p &     n &     l &     i &     r &     b &     u \\
\midrule
 used\_in\_cooking &  0.97 &  0.81 &  0.94 &  0.37 &  0.98 &     - &  0.94 \\
          wheels &  0.92 &     - &  0.87 &  0.92 &   0.2 &     - &  0.86 \\
            roll &   0.9 &     - &  0.83 &  0.90 &  0.93 &     - &  0.28 \\
             fly &  0.89 &  0.82 &  0.84 &  0.88 &   0.9 &  0.78 &  0.44 \\
           wings &  0.88 &     - &  0.86 &  0.84 &   0.9 &  0.81 &  0.51 \\
        lay\_eggs &  0.86 &     - &     - &  0.51 &  0.91 &     - &  0.91 \\
           sweet &  0.86 &     - &     - &  0.55 &     - &     - &  0.88 \\
           green &  0.85 &     - &     - &  0.90 &  0.85 &     - &  0.29 \\
       dangerous &  0.85 &  0.91 &  0.88 &  0.95 &  0.95 &  0.82 &  0.32 \\
           juicy &  0.85 &     - &     - &  0.90 &  0.87 &     - &  0.53 \\
             hot &  0.84 &  0.74 &     - &  0.31 &  0.91 &     - &  0.89 \\
             red &  0.84 &     - &     - 

## Evidence strength


In [18]:
def get_evidence_dict(model_name, prop, top_cutoff, concept_cutoff):
    
    annotation_name = f'annotation-tfidf-top_{top_cutoff}_{concept_cutoff}-raw-10000-categories'
    path_dir_annotation = f'../analysis/{model_name}/{annotation_name}/{prop}'
    f_annotation = f'{path_dir_annotation}/annotation-updated-done.csv'
    
    ev_dict = dict()
    
    with open(f_annotation) as infile:
        data = list(csv.DictReader(infile))
    for d in data:
        et = d['evidence_type']
        ev = d['context']
        ev_dict[ev] = et
    return ev_dict
        
    

In [22]:
model_name = 'giga_full_updated'
prop = 'wings'
top_cutoff = 3
concept_cutoff = 3

evidence_dict = get_evidence_dict(model_name, prop, top_cutoff, concept_cutoff)
evidence_dict

{'crashed': 'u',
 'similar': 'u',
 'visibility': 'u',
 'indicating': 'u',
 'hunt': 'u',
 'northwest': 'u',
 'piloted': 'r',
 'propeller': 'r',
 'conducting': 'u',
 'fighter': 'u',
 'fighters': ' ',
 'supersonic': 'u',
 'pilot': 'r',
 'flew': 'l',
 'runway': 'u',
 'flying': 'l',
 'flown': 'l',
 'aviation': 'r',
 'cockpit': 'r',
 'downed': 'r',
 'wreckage': 'r',
 'peruvian': 'u',
 'spy': 'b',
 'downing': 'r',
 'radar': 'r',
 'russian': 'b',
 '270': 'u',
 'recorder': 'u',
 'missiles': 'r',
 'pentagon': 'b',
 'fuselage': 'r',
 'fired': 'u',
 'cuban': 'b',
 'us': 'b',
 'pakistani': 'b',
 'yemeni': 'b',
 '800': 'u',
 'intercept': 'r',
 'attacks': 'b',
 'pakistan': 'b',
 'descended': 'r',
 'yemen': 'b',
 'strikes': 'b',
 'terrorist': 'b',
 'terrorists': 'b',
 'locate': 'u',
 'iranian': 'b',
 'confirmed': 'u',
 'withstand': 'u',
 'controversial': 'u',
 'perished': 'u',
 'intelligence': 'u',
 'gather': 'u',
 'thwarted': 'u',
 'deciding': 'u',
 'publicly': 'u',
 'bird': 'i',
 'nest': 'r',
 'spec

## Relation analysis

In [7]:
from statistics import stdev
import numpy as np

In [8]:
def load_tfidf_score(prop, concept, evidence_word, label, 
                     model_name, top_cutoff, concept_cutoff):
    
    tfidf_scores = []
    dir_res = f'../results/{model_name}/tfidf-raw-10000/each_target_vs_corpus_per_category'
    categories = get_categories(prop, model_name)
    for cat in categories:
        f =  f'{dir_res}/{prop}/{cat}/{label}/{concept}.csv'
        if os.path.isfile(f):
            with open(f) as infile:
                data = list(csv.DictReader(infile))
            for d in data:
                context = d['']
                diff =  float(d['diff'])
                if context == evidence_word and diff > 0:
                    score = float(d['target'])
                    tfidf_scores.append(score)
    return tfidf_scores

def get_mean(numbers):
    if len(numbers) > 0:
        mean = sum(numbers)/len(numbers)
    else:
        mean = 0
    return mean

  
def get_relation_combinations(properties, combinations):
    
    relation_pair_dict = defaultdict(set)

    
    for prop in properties:
        prop_dict = load_prop_data(prop)
        for c, d in prop_dict.items():
            ml_label = d['ml_label']
            if ml_label in {'all', 'some', 'all-some', 'few-some'}:
                l = 'pos'
            elif ml_label in {'few'}:
                l = 'neg'
            relation_pair_dict[l].add((prop, c))
            if l == 'pos':
                rel_dict = d['relations']
                for combination in combinations:
                    relations = set([rel for rel, p in rel_dict.items() if p > 0.5])
                    if combination == relations:
                        l_comb = tuple(sorted(relations))
                        relation_pair_dict[l_comb].add((prop, c))
    return relation_pair_dict   

In [35]:
combinations = [
                    {'implied_category'},
                    {'implied_category', 'variability_limited'},
                    {'variability_limited'},
                    {'typical_of_property'},
                    {'typical_of_concept'},
                    {'implied_category', 'typical_of_concept'},
                    {'implied_cateogry', 'typical_of_property'},
                    {'typical_of_concept', 'typical_of_property'},
                    {'afforded_usual'},
                    {'afforded_unusual'}
                
                    ]

properties = get_properties()
relation_pair_dict = get_relation_combinations(properties, combinations)
for rel, pairs in relation_pair_dict.items():
    print(rel, len(pairs))
print()
print(relation_pair_dict[('implied_category',  )]) 
print()
print(relation_pair_dict[('implied_category', 'variability_limited', )])
print()
print(relation_pair_dict[('typical_of_concept',  )]) 
print()
print(relation_pair_dict[('implied_category', 'typical_of_concept')])
print()
print(relation_pair_dict[('typical_of_property',  )]) 
print()
print(relation_pair_dict[('implied_category', 'typical_of_property')])
print()
print(relation_pair_dict[('typical_of_concept', 'typical_of_property')])
print()
print(relation_pair_dict[('afforded_usual', )])
print()
print(relation_pair_dict[('afforded_unusual', )])







neg 1545
pos 2135
('implied_category', 'variability_limited') 21
('variability_limited',) 114
('implied_category',) 16
('implied_category', 'typical_of_concept') 13
('typical_of_concept', 'typical_of_property') 4
('typical_of_concept',) 4
('afforded_unusual',) 20
('afforded_usual',) 3

{('swim', 'cob'), ('warm', 'brogue'), ('wings', 'roach'), ('wheels', 'tank'), ('dangerous', 'pentobarbital'), ('round', 'pepperoni'), ('roll', 'bike'), ('round', 'patty'), ('round', 'cherry'), ('wheels', 'saloon'), ('lay_eggs', 'neritidae'), ('wings', 'cricket'), ('lay_eggs', 'crane'), ('swim', 'bay'), ('wheels', 'underframe'), ('lay_eggs', 'flounder')}

{('square', 'laptop'), ('sweet', 'breadfruit'), ('made_of_wood', 'ladle'), ('round', 'pineapple'), ('roll', 'cart'), ('made_of_wood', 'girder'), ('juicy', 'anjou'), ('fly', 'fowl'), ('round', 'gourd'), ('round', 'sapodilla'), ('round', 'cabbage'), ('square', 'computer'), ('round', 'onion'), ('round', 'lemon'), ('square', 'blackboard'), ('sweet', 'carrot'

In [44]:

label_rel = 'pos'
label = 'pos'

all_scores = []
all_means = []
for prop, concept in relation_pair_dict[label_rel]:
    evidence_word = prop
    if prop == 'lay_eggs':
        evidence_word = 'eggs'
    elif prop == 'used_in_cooking':
        evidence_word = 'cook'
    elif prop == 'made_of_wood':
        evidence_word = 'wood'
    elif prop == 'has_wings':
        evidence_word = 'wings'
    elif prop == 'has_wheels':
        evidence_word = 'wheels'
    scores = load_tfidf_score(prop, concept, evidence_word, label, 
                      model_name, top_cutoff, concept_cutoff)
    all_scores.extend(scores)
    mean = get_mean(scores)
    all_means.append(mean)

print(get_mean(all_means))

0.012380345716773471


In [43]:
label_rel = 'neg'
label = 'neg'

all_scores = []
all_means = []
for prop, concept in relation_pair_dict[label_rel]:
    evidence_word = prop
    if prop == 'lay_eggs':
        evidence_word = 'eggs'
    elif prop == 'used_in_cooking':
        evidence_word = 'cook'
    elif prop == 'made_of_wood':
        evidence_word = 'wood'
    elif prop == 'has_wings':
        evidence_word = 'wings'
    elif prop == 'has_wheels':
        evidence_word = 'wheels'
    scores = load_tfidf_score(prop, concept, evidence_word, label, 
                      model_name, top_cutoff, concept_cutoff)
    all_scores.extend(scores)
    mean = get_mean(scores)
    all_means.append(mean)

print(get_mean(all_means))

0.0019888443681951185


In [42]:
label_rel = ('implied_category', )
label = 'pos'

all_scores = []
all_means = []
for prop, concept in relation_pair_dict[label_rel]:
    evidence_word = prop
    if prop == 'lay_eggs':
        evidence_word = 'eggs'
    elif prop == 'used_in_cooking':
        evidence_word = 'cook'
    elif prop == 'made_of_wood':
        evidence_word = 'wood'
    elif prop == 'has_wings':
        evidence_word = 'wings'
    elif prop == 'has_wheels':
        evidence_word = 'wheels'

    scores = load_tfidf_score(prop, concept, evidence_word, label, 
                      model_name, top_cutoff, concept_cutoff)
    all_scores.extend(scores)
    mean = get_mean(scores)
    all_means.append(mean)

print(get_mean(all_means))

0.0018485234171035732


In [41]:
label_rel = ('variability_limited', )
label = 'pos'

all_scores = []
all_means = []
for prop, concept in relation_pair_dict[label_rel]:
    evidence_word = prop
    if prop == 'lay_eggs':
        evidence_word = 'eggs'
    elif prop == 'used_in_cooking':
        evidence_word = 'cook'
    elif prop == 'made_of_wood':
        evidence_word = 'wood'
    elif prop == 'has_wings':
        evidence_word = 'wings'
    elif prop == 'has_wheels':
        evidence_word = 'wheels'

    scores = load_tfidf_score(prop, concept, evidence_word, label, 
                      model_name, top_cutoff, concept_cutoff)
    all_scores.extend(scores)
    mean = get_mean(scores)
    all_means.append(mean)

print(get_mean(all_means))

0.011092839659587376


In [40]:
label_rel = ('implied_category', 'variability_limited')
label = 'pos'

all_scores = []
all_means = []
for prop, concept in relation_pair_dict[label_rel]:
    evidence_word = prop
    if prop == 'lay_eggs':
        evidence_word = 'eggs'
    elif prop == 'used_in_cooking':
        evidence_word = 'cook'
    elif prop == 'made_of_wood':
        evidence_word = 'wood'
    elif prop == 'has_wings':
        evidence_word = 'wings'
    elif prop == 'has_wheels':
        evidence_word = 'wheels'

    scores = load_tfidf_score(prop, concept, evidence_word, label, 
                      model_name, top_cutoff, concept_cutoff)
    all_scores.extend(scores)
    mean = get_mean(scores)
    all_means.append(mean)

print(get_mean(all_means))

0.006298089086881645


In [38]:
label_rel = ('afforded_usual', )
label = 'pos'

all_scores = []
all_means = []
for prop, concept in relation_pair_dict[label_rel]:
    evidence_word = prop
    if prop == 'lay_eggs':
        evidence_word = 'eggs'
    elif prop == 'used_in_cooking':
        evidence_word = 'cook'
    elif prop == 'made_of_wood':
        evidence_word = 'wood'
    elif prop == 'has_wings':
        evidence_word = 'wings'
    elif prop == 'has_wheels':
        evidence_word = 'wheels'

    scores = load_tfidf_score(prop, concept, evidence_word, label, 
                      model_name, top_cutoff, concept_cutoff)
    all_scores.extend(scores)
    mean = get_mean(scores)
    all_means.append(mean)


print(get_mean(all_means))

0.025515742044588785


In [39]:
label_rel = ('afforded_unusual', )
label = 'pos'

all_scores = []
all_means = []
for prop, concept in relation_pair_dict[label_rel]:
    evidence_word = prop
    if prop == 'lay_eggs':
        evidence_word = 'eggs'
    elif prop == 'used_in_cooking':
        evidence_word = 'cook'
    elif prop == 'made_of_wood':
        evidence_word = 'wood'
    elif prop == 'has_wings':
        evidence_word = 'wings'
    elif prop == 'has_wheels':
        evidence_word = 'wheels'

    scores = load_tfidf_score(prop, concept, evidence_word, label, 
                      model_name, top_cutoff, concept_cutoff)
    all_scores.extend(scores)
    mean = get_mean(scores)
    all_means.append(mean)

print(get_mean(all_means))

0.003130250052305159


In [102]:
prop = 'red'
concept = 'ambulance'
evidence_word = 'red'
label = 'pos'
#lay_eggs', 'neritidae
scores = load_tfidf_score(prop, concept, evidence_word, label, 
                     model_name, top_cutoff, concept_cutoff)
print(get_mean(scores))

0.021969832408808006


In [186]:
# check how often direct mentions of property words are mentioned 
# in the context of the target pairs vs the rest