# Corpus exploration

Can we find evidence among all contexts of positive examples of a property by comparing them to the contexts of all negative examples?

Here, similar concepts should come in handy, as there should be a lot of overlap in their contexts. Distinctive aspects should be all the more salient. 



In [1]:
import pandas as pd
import os

In [None]:
# run extract_contexts.py (vm)
# dowload contexts - out in ../contexts/
# run get_tfidf.py
# run process_tfidf.py
# annotate evidence candidates
# run 

In [2]:
# sanity check giga

path_giga = '../contexts/giga_full/vocab/'
path_wiki =  '../contexts/wiki/vocab/'
path_vocab = '../data/vocab.txt'

with open(path_vocab) as infile:
    vocab = set(infile.read().strip().split('\n'))
print(len(vocab))

giga_vocab = [f.split('.')[0] for f in os.listdir(path_giga)]
print(len(giga_vocab))

wiki_vocab = [f.split('.')[0] for f in os.listdir(path_wiki)]
print(len(wiki_vocab))

1780
1445
1669


# TF-idf analysis 

In [2]:
# get overview of counts
from process_tfidf import get_table
from analyze_evidence import get_evidence_table, get_properties
import pandas as pd

In [7]:
# before annotation 

prop = 'female'
model = 'giga_full'
cnt_type = 'raw'
max_features = 10000
category = 'all-pos'
pos_dicts, neg_dicts = get_table(model, prop, category, 
                                 cnt_type, max_features, 
                                 rank_by='n_concepts', top_n=20)
df_pos = pd.DataFrame(pos_dicts)
df_neg = pd.DataFrame(neg_dicts)
df_pos

../analysis/giga_full/tfidf_aggregated_concept_scores-raw-10000/female-pos/all-pos/female-pos.csv


Unnamed: 0,context,n_concepts,mean_tfidf,mean_diff
0,herself,77,0.02,0.02
1,she,72,0.05,0.04
2,beautiful,68,0.02,0.02
3,lady,65,0.02,0.02
4,anna,63,0.02,0.02
5,love,63,0.02,0.02
6,mary,61,0.03,0.03
7,actress,60,0.03,0.03
8,elizabeth,60,0.04,0.03
9,maria,59,0.03,0.03


In [8]:
df_neg

Unnamed: 0,context,n_concepts,mean_tfidf,mean_diff
0,himself,98,0.01,0.01
1,michael,97,0.01,0.01
2,david,95,0.01,0.01
3,steve,94,0.01,0.01
4,brian,92,0.01,0.01
5,failed,91,0.0,0.0
6,signed,91,0.0,0.0
7,peter,91,0.01,0.01
8,james,90,0.01,0.01
9,bob,89,0.01,0.01


## Annotation procedure


## File handling:

* Annotate per semantic category and for all positive concepts
* Files: ../analysis/giga_full/annotation-tfidf-top10-raw-10000/dangerous-pos/animal/dangerous-pos.csv
* Go to dir
* Copy file: cp dangerous-pos.csv dangerous-pos-annotated.csv
* Annotate and save


## Annotation guidelines


Use labels in table

[insert table]


As yourself:

Can this word indicate the property given the semantic category? 

If you are annotating the general bin, just ask yourself whether this word can indicate the property.


## Inspect annotations

### Check annotation status

In [11]:
from analyze_evidence import show_annotation_status

In [18]:
model = 'wiki'
show_annotation_status(model)

completed:

black-pos
blue-pos

Incomplete:

cold-pos
dangerous-pos
female-pos
fly-pos
green-pos
hot-pos
juicy-pos
lay_eggs-pos
made_of_wood-pos
red-pos
roll-pos
round-pos
square-pos
sweet-pos
swim-pos
used_in_cooking-pos
warm-pos
wheels-pos
wings-pos
yellow-pos


### Inspect annotations per property and category

### Check annotation consistency


In [13]:
from analyze_evidence import summarize_annotation_table, get_property_annotations
import pandas as pd
pd.set_option('max_rows', None)

In [19]:
#set up new annotation

prop = 'cold'
model = 'wiki'
summarize_annotation_table(prop, model)

In [23]:

prop = 'cold'
model = 'giga_full'
df = get_property_annotations(model, prop)  
df = df.loc[df['evidence'] == True]
df

Unnamed: 0,cold,all-pos,object,measure,relation,food,evidence
bottles,-,i,i,-,i,-,True
frozen,-,n,n,-,n,n,True
beer,-,-,-,-,i,-,True
cold,p,p,p,p,p,p,True
glasses,-,i,i,-,-,-,True
frost,-,i,i,-,-,-,True
vanilla,-,-,-,-,-,r,True
chilled,-,-,-,-,-,n,True
cream,-,-,-,-,-,r,True


In [21]:
# get overview

prop = 'blue'
model = 'wiki'
df = get_property_annotations(model, prop)  
df = df.loc[df['evidence'] == True]
df

Unnamed: 0,plant,all-pos,animal,object,relation,food,bird,evidence
feather,-,-,-,-,i,-,-,True
chemical,-,-,-,-,i,-,-,True
watches,-,-,-,-,i,-,-,True
panel,-,-,-,-,i,-,-,True
blue,p,p,p,p,p,p,p,True
plant,i,-,-,-,-,-,-,True
plants,i,-,-,-,-,-,-,True
bird,-,-,i,-,-,-,i,True
parrot,-,-,i,-,-,-,i,True
feathers,-,-,-,-,-,-,i,True


### Aggregate annotations on property level

* avoid duplicate counting

In [8]:
from analyze_evidence import get_concept_evidence_counts, get_evidence_words
import pandas as pd

In [2]:
prop = 'yellow'
model = 'giga_full'
#evidence_words_total = get_evidence_words(prop, model)
overview_table = pd.DataFrame(get_concept_evidence_counts(prop, model))
overview_table.sort_values('distinctiveness', ascending=False)

Unnamed: 0,word,evidence_type,distinctiveness,f1_dist,n_concepts-pos,mean_tfidf-pos,p_concepts-pos,t_concepts-pos,n_concepts-neg,mean_tfidf-neg,p_concepts-neg,t_concepts-neg
0,yellow,p,0.43,0.615385,20,0.05,0.5,40,5,0.06,0.07,74
6,apple,i,0.38,0.571429,18,0.05,0.45,40,5,0.03,0.07,74
1,peach,i,0.3,0.472727,13,0.09,0.33,40,2,0.04,0.03,74
2,apples,i,0.27,0.466667,14,0.02,0.35,40,6,0.01,0.08,74
8,flowers,i,0.27,0.466667,14,0.03,0.35,40,6,0.03,0.08,74
3,melon,i,0.25,0.440678,13,0.02,0.33,40,6,0.01,0.08,74
4,peaches,i,0.19,0.37931,11,0.02,0.28,40,7,0.02,0.09,74
7,apricot,i,0.18,0.363636,10,0.06,0.25,40,5,0.02,0.07,74
5,spice,i,-0.04,0.26087,9,0.01,0.23,40,20,0.01,0.27,74


In [3]:
prop = 'wings'
model = 'giga_full'
#evidence_words_total = get_evidence_words(prop, model)
overview_table = pd.DataFrame(get_concept_evidence_counts(prop, model))
overview_table.sort_values('distinctiveness', ascending=False)

Unnamed: 0,word,evidence_type,distinctiveness,f1_dist,n_concepts-pos,mean_tfidf-pos,p_concepts-pos,t_concepts-pos,n_concepts-neg,mean_tfidf-neg,p_concepts-neg,t_concepts-neg
0,bird,i,0.59,0.742268,36,0.04,0.6,60,1,0.04,0.01,77
2,wings,p,0.54,0.708333,34,0.02,0.57,60,2,0.01,0.03,77
1,birds,i,0.54,0.714286,35,0.04,0.58,60,3,0.02,0.04,77
3,flies,r,0.46,0.645161,30,0.03,0.5,60,3,0.02,0.04,77
5,nest,r,0.41,0.581395,25,0.06,0.42,60,1,0.03,0.01,77
4,fly,r,0.38,0.577778,26,0.04,0.43,60,4,0.03,0.05,77
7,eggs,l,0.36,0.551724,24,0.05,0.4,60,3,0.05,0.04,77
6,flew,r,0.36,0.555556,25,0.01,0.42,60,5,0.01,0.06,77
18,turkey,i,0.31,0.5,21,0.01,0.35,60,3,0.0,0.04,77
16,hawk,i,0.29,0.469136,19,0.02,0.32,60,2,0.1,0.03,77


In [4]:
prop = 'female'
model = 'giga_full'
#evidence_words_total = get_evidence_words(prop, model)
overview_table = pd.DataFrame(get_concept_evidence_counts(prop, model))
overview_table.sort_values('distinctiveness', ascending=False)

Unnamed: 0,word,evidence_type,distinctiveness,f1_dist,n_concepts-pos,mean_tfidf-pos,p_concepts-pos,t_concepts-pos,n_concepts-neg,mean_tfidf-neg,p_concepts-neg,t_concepts-neg
0,herself,i,0.65,0.789744,77,0.02,0.71,109,9,0.02,0.06,144
1,she,i,0.59,0.753927,72,0.05,0.66,109,10,0.06,0.07,144
2,beautiful,b,0.56,0.731183,68,0.02,0.62,109,9,0.02,0.06,144
3,lady,i,0.56,0.722222,65,0.02,0.6,109,6,0.03,0.04,144
4,anna,i,0.55,0.715909,63,0.02,0.58,109,4,0.02,0.03,144
6,mary,i,0.53,0.701149,61,0.03,0.56,109,4,0.02,0.03,144
8,elizabeth,i,0.53,0.697674,60,0.04,0.55,109,3,0.03,0.02,144
9,maria,i,0.53,0.698225,59,0.03,0.54,109,1,0.04,0.01,144
10,lovely,b,0.51,0.682081,59,0.02,0.54,109,5,0.01,0.03,144
15,marie,i,0.5,0.674419,58,0.02,0.53,109,5,0.02,0.03,144


In [9]:
prop = 'square'
model = 'giga_full'
#evidence_words_total = get_evidence_words(prop, model)
overview_table = pd.DataFrame(get_concept_evidence_counts(prop, model))
overview_table.sort_values('distinctiveness', ascending=False)

Unnamed: 0,word,evidence_type,distinctiveness,f1_dist,n_concepts-pos,mean_tfidf-pos,p_concepts-pos,t_concepts-pos,n_concepts-neg,mean_tfidf-neg,p_concepts-neg,t_concepts-neg
1,rooms,i,0.73,0.84507,30,0.02,0.73,41,-,-,-,-
2,desk,i,0.66,0.794118,27,0.03,0.66,41,-,-,-,-
4,sheets,i,0.66,0.794118,27,0.01,0.66,41,-,-,-,-
6,table,i,0.66,0.794118,27,0.02,0.66,41,-,-,-,-
7,phone,i,0.66,0.794118,27,0.02,0.66,41,-,-,-,-
9,tables,i,0.63,0.776119,26,0.01,0.63,41,-,-,-,-
8,boxes,i,0.58,0.764706,26,0.01,0.63,41,1,0.01,0.05,21
10,room,i,0.58,0.764706,26,0.08,0.63,41,1,0.05,0.05,21
3,photograph,i,0.56,0.771429,27,0.01,0.66,41,2,0.01,0.1,21
5,note,i,0.56,0.771429,27,0.01,0.66,41,2,0.01,0.1,21


## Aggregate evidence and count concepts with prop evidence


In [2]:
from analyze_evidence import get_concept_context_overview
import pandas as pd

In [2]:
prop = 'used_in_cooking'
model = 'giga_full'
table = get_concept_context_overview(prop, model)
df = pd.DataFrame(table).round(2)
df

Unnamed: 0,evidence,distinctiveness,f1_dist,n_concepts_with_ev_pos,p_concepts_with_ev_pos,n_concepts_pos,n_concepts_with_ev_neg,p_concepts_with_ev_neg,n_concepts_neg,total_evidence_words
0,all,0.43,0.86,97,1.0,97,97,0.57,56,67
1,p,0.86,0.93,85,0.88,97,85,0.02,56,4
2,n,0.0,0.0,0,0.0,97,0,0.0,56,0
3,i,0.8,0.95,97,1.0,97,97,0.2,56,21
4,r,0.52,0.88,97,1.0,97,97,0.48,56,42
5,b,0.0,0.0,0,0.0,97,0,0.0,56,0
6,l,0.0,0.0,0,0.0,97,0,0.0,56,0


In [3]:
prop = 'female'
model = 'giga_full'
table = get_concept_context_overview(prop, model)
df = pd.DataFrame(table).round(2)
df

Unnamed: 0,evidence,distinctiveness,f1_dist,n_concepts_with_ev_pos,p_concepts_with_ev_pos,n_concepts_pos,n_concepts_with_ev_neg,p_concepts_with_ev_neg,n_concepts_neg,total_evidence_words
0,all,0.42,0.72,106,0.97,109,106,0.56,144,35
1,p,0.12,0.39,33,0.3,109,33,0.19,144,1
2,n,0.0,0.0,0,0.0,109,0,0.0,144,0
3,i,0.55,0.77,105,0.96,109,105,0.42,144,24
4,r,0.0,0.0,0,0.0,109,0,0.0,144,0
5,b,0.58,0.77,98,0.9,109,98,0.32,144,10
6,l,0.0,0.0,0,0.0,109,0,0.0,144,0


In [3]:
prop = 'wings'
model = 'giga_full'
table = get_concept_context_overview(prop, model)
df = pd.DataFrame(table).round(2)
df

Unnamed: 0,evidence,distinctiveness,f1_dist,n_concepts_with_ev_pos,p_concepts_with_ev_pos,n_concepts_pos,n_concepts_with_ev_neg,p_concepts_with_ev_neg,n_concepts_neg,total_evidence_words
0,all,0.46,0.73,54,0.9,60,54,0.44,77,20
1,p,0.56,0.73,38,0.63,60,38,0.08,77,2
2,n,0.0,0.0,0,0.0,60,0,0.0,77,0
3,i,0.5,0.73,48,0.8,60,48,0.3,77,10
4,r,0.52,0.74,46,0.77,60,46,0.25,77,7
5,b,0.0,0.0,0,0.0,60,0,0.0,77,0
6,l,0.36,0.55,24,0.4,60,24,0.04,77,1


### Get overview table with all properties:

* top distinctiveness
* mean distinctiveness
* n different evidence words
* n evidence words by type

In [4]:
from analyze_evidence import get_prop_overview, get_properties
import pandas as pd

In [5]:
model = 'giga_full'
properties = get_properties()
prop_table = []
for prop in properties:
    prop_dict = get_prop_overview(model, prop)
    prop_table.append(prop_dict)
df = pd.DataFrame(prop_table)

In [6]:
df.sort_values('max_dist', ascending=False).round(2)

Unnamed: 0,property,n_evidence_words,p,n,i,r,b,l,combined_dist,combined_f1,max_dist,max_dist_f1,max_dist_ev,max_dist_t
9,used_in_cooking,67,4,0,21,42,0,0,0.43,0.86,0.94,0.97,add,r
0,square,11,1,0,10,0,0,0,0.62,0.91,0.73,0.85,rooms,i
19,wheels,20,3,0,4,12,0,1,0.67,0.94,0.7,0.84,wheel,p
10,juicy,32,3,0,27,2,0,0,0.43,0.83,0.69,0.81,pineapple,i
4,fly,25,7,0,9,9,0,0,0.38,0.59,0.68,0.8,flew,p
17,cold,9,1,2,4,2,0,0,0.75,0.95,0.66,0.8,frozen,n
5,dangerous,48,1,0,15,28,4,0,0.18,0.75,0.65,0.79,killed,r
7,sweet,32,2,0,24,5,1,0,0.4,0.81,0.65,0.79,jam,i
16,female,35,1,0,24,0,10,0,0.42,0.72,0.65,0.79,herself,i
21,swim,27,3,1,10,13,0,0,0.38,0.86,0.63,0.79,fish,i


### Ideas for further exploration:


**Combination of evidence and distinctiveness of evidence**

Is it possible to find a combination of words that is maximally distinctive between pos and neg class?

Find the max distinctive combination and measure distinctiveness in terms of difference pos-neg (e.g. 100% pos, 0% neg --> distinctiveness of 1, 100% pos 100% neg --> distinctiveness of 0).


**Cosine similarity of evidence words - how coherent is the group of evidence words?**

# Analysis of property evidence in concept-contexts [to adapt to new setup]

In [20]:
# create concept-context overview matrix for each property

from analyze_evidence import get_properties, get_prop_overview

# Property types

In [17]:
from analyze_evidence import get_prop_collection_overview, get_prop_types
import pandas as pd

In [23]:
# create overview table

prop_collection_dict, collection_prop_dict = get_prop_types()
table = []
for collection, props in collection_prop_dict.items():
    props_line = ' '.join(props)
    d = dict()
    d['prop_type'] = collection
    d['properties'] = props_line
    table.append(d)
df = pd.DataFrame(table)
df

Unnamed: 0,prop_type,properties
0,perceptual-shape,round square
1,percetual-heat,warm cold hot
2,perceptual-color,green red blue yellow black
3,activities,roll swim lay_eggs fly
4,complex,used_in_cooking dangerous
5,parts,wheels wings
6,perceptual,sweet juicy
7,part-material,made_of_wood
8,gender,female


In [24]:
print(df.to_latex(index=False))

\begin{tabular}{ll}
\toprule
        prop\_type &                   properties \\
\midrule
 perceptual-shape &                 round square \\
   percetual-heat &                warm cold hot \\
 perceptual-color &  green red blue yellow black \\
       activities &       roll swim lay\_eggs fly \\
          complex &    used\_in\_cooking dangerous \\
            parts &                 wheels wings \\
       perceptual &                  sweet juicy \\
    part-material &                 made\_of\_wood \\
           gender &                       female \\
\bottomrule
\end{tabular}



In [29]:
props = get_properties()
model = 'giga_full'
collection_table = get_prop_collection_overview(props, model)
df = pd.DataFrame(collection_table).sort_values('collection')
print(df.to_latex(index=False))

\begin{tabular}{lrrrrrr}
\toprule
       collection &  n\_props &  n\_ev &  total\_pos &  p\_ev\_pos &  total\_neg &  p\_ev\_neg \\
\midrule
       activities &        4 &    15 &        200 &      0.45 &        218 &      0.17 \\
          complex &        2 &     6 &        146 &      0.61 &        107 &      0.18 \\
           gender &        1 &     6 &        109 &      0.95 &        144 &      0.33 \\
    part-material &        1 &     2 &         43 &      0.05 &         33 &      0.18 \\
            parts &        2 &     3 &        125 &      0.46 &        102 &      0.06 \\
       perceptual &        2 &    13 &        106 &      0.77 &        123 &      0.23 \\
 perceptual-color &        5 &    10 &        126 &      0.64 &        356 &      0.09 \\
 perceptual-shape &        2 &     1 &         69 &      0.03 &         39 &      0.05 \\
   percetual-heat &        3 &     5 &        132 &      0.46 &         99 &      0.17 \\
\bottomrule
\end{tabular}



In [28]:
props = get_properties()
model = 'wiki'
collection_table = get_prop_collection_overview(props, model)
df = pd.DataFrame(collection_table).sort_values('collection')
print(df.to_latex(index=False))

\begin{tabular}{lrrrrrr}
\toprule
       collection &  n\_props &  n\_ev &  total\_pos &  p\_ev\_pos &  total\_neg &  p\_ev\_neg \\
\midrule
       activities &        4 &     8 &        279 &      0.34 &        262 &      0.13 \\
          complex &        2 &     3 &        154 &      0.25 &        123 &      0.07 \\
           gender &        1 &    10 &        122 &      0.97 &        150 &      0.46 \\
    part-material &        1 &     2 &         50 &      0.64 &         43 &      0.14 \\
            parts &        2 &     9 &        154 &      0.69 &        111 &      0.17 \\
       perceptual &        2 &     8 &        109 &      0.72 &        128 &      0.27 \\
 perceptual-color &        5 &     8 &        129 &      0.60 &        379 &      0.09 \\
 perceptual-shape &        2 &     3 &         73 &      0.40 &         39 &      0.15 \\
   percetual-heat &        3 &     5 &        140 &      0.54 &        101 &      0.12 \\
\bottomrule
\end{tabular}



## Relations

In [11]:
# def get hypotheses

from analyze_evidence import get_properties, get_relation_overview
import pandas as pd

In [37]:
props = get_properties()
#props = [p for p in props if p != 'female']
model = 'wiki'
relation_table = get_relation_overview(props, model, rel_type = 'top')
df = pd.DataFrame(relation_table)
df = df.sort_values('p_evidence', ascending = False)
print(df.to_latex(index=False))

\begin{tabular}{lrr}
\toprule
            relation &  total\_concepts &  p\_evidence \\
\midrule
          gender-all &             152 &        0.78 \\
 typical\_of\_property &             127 &        0.68 \\
  affording\_activity &             407 &        0.59 \\
  typical\_of\_concept &             572 &        0.57 \\
    implied\_category &             629 &        0.50 \\
      afforded\_usual &             172 &        0.41 \\
          gender-few &             208 &        0.33 \\
    afforded\_unusual &              73 &        0.19 \\
 variability\_limited &             562 &        0.17 \\
             unusual &             578 &        0.16 \\
                rare &             308 &        0.15 \\
            creative &             152 &        0.12 \\
          impossible &             554 &        0.11 \\
    variability\_open &             443 &        0.08 \\
\bottomrule
\end{tabular}



In [33]:
props = get_properties()
model = 'wiki'
relation_table = get_relation_overview(props, model, rel_type = 'hyp_top')
df = pd.DataFrame(relation_table)
df = df.sort_values('p_evidence', ascending = False)
print(df.to_latex(index=False))

\begin{tabular}{lrr}
\toprule
            relation &  total\_concepts &  p\_evidence \\
\midrule
          gender-all &             152 &        0.78 \\
 typical\_of\_property &             211 &        0.59 \\
  affording\_activity &             555 &        0.54 \\
  typical\_of\_concept &              19 &        0.53 \\
      afforded\_usual &             211 &        0.37 \\
          gender-few &             208 &        0.33 \\
    implied\_category &              45 &        0.27 \\
    afforded\_unusual &              50 &        0.24 \\
 variability\_limited &             803 &        0.20 \\
                rare &             244 &        0.15 \\
             unusual &             477 &        0.14 \\
            creative &             140 &        0.13 \\
          impossible &             553 &        0.11 \\
    variability\_open &             285 &        0.04 \\
\bottomrule
\end{tabular}



In [39]:
props = get_properties()
#props = [p for p in props if p != 'female']
model = 'giga_full'
relation_table = get_relation_overview(props, model, rel_type = 'top')
df = pd.DataFrame(relation_table)
df = df.sort_values('p_evidence', ascending = False)
print(df.to_latex(index=False))


\begin{tabular}{lrr}
\toprule
            relation &  total\_concepts &  p\_evidence \\
\midrule
          gender-all &             152 &        0.68 \\
 typical\_of\_property &             127 &        0.57 \\
  affording\_activity &             407 &        0.53 \\
  typical\_of\_concept &             572 &        0.52 \\
    implied\_category &             629 &        0.41 \\
      afforded\_usual &             172 &        0.38 \\
          gender-few &             208 &        0.23 \\
 variability\_limited &             562 &        0.19 \\
            creative &             152 &        0.17 \\
             unusual &             578 &        0.16 \\
                rare &             308 &        0.15 \\
    afforded\_unusual &              73 &        0.15 \\
          impossible &             554 &        0.10 \\
    variability\_open &             443 &        0.07 \\
\bottomrule
\end{tabular}



In [34]:
props = get_properties()
model = 'giga_full'
relation_table = get_relation_overview(props, model, rel_type = 'hyp_top')
df = pd.DataFrame(relation_table)
df = df.sort_values('p_evidence', ascending = False)
print(df.to_latex(index=False))

\begin{tabular}{lrr}
\toprule
            relation &  total\_concepts &  p\_evidence \\
\midrule
          gender-all &             152 &        0.68 \\
 typical\_of\_property &             211 &        0.52 \\
  affording\_activity &             555 &        0.49 \\
  typical\_of\_concept &              19 &        0.42 \\
      afforded\_usual &             211 &        0.37 \\
          gender-few &             208 &        0.23 \\
 variability\_limited &             803 &        0.21 \\
    implied\_category &              45 &        0.18 \\
            creative &             140 &        0.18 \\
    afforded\_unusual &              50 &        0.18 \\
             unusual &             477 &        0.14 \\
                rare &             244 &        0.12 \\
          impossible &             553 &        0.10 \\
    variability\_open &             285 &        0.03 \\
\bottomrule
\end{tabular}



## Development

In [216]:
s1 = {1,2,3}
s2 = {2,3,1}
s1 ==s2

True

In [214]:
prop = 'yellow'


In [19]:



properties = ['fly']
prop_cat_syn_dict = collect_search_log(properties)
prop_cat_syn_dict

defaultdict(list,
            {'swim': [{'bird': {"Synset('bird.n.01')", "Synset('bird.n.02')"}},
              {'fish': {"Synset('fish.n.01')", "Synset('fish.n.02')"}},
              {'mammal': {"Synset('mammal.n.01')"}},
              {'bird': {"Synset('bird.n.01')", "Synset('bird.n.02')"}},
              {'fish': {"Synset('fish.n.01')", "Synset('fish.n.02')"}},
              {'mammal': {"Synset('mammal.n.01')"}}]})