# Relations


In [2]:
import glob
import csv 
import utils
from DataSet import DataSet
import pandas as pd
from collections import defaultdict

# print full dataframes:
pd.set_option("display.max_rows", None, "display.max_columns", None)
pd.set_option('display.max_colwidth', 0)

In [58]:
# load data


source = 'clean'

lexical_data = utils.load_lexical_data()

leopard = [('experiment1', '3'), ('experiment2', '4')]
current = [('experiment3', '5_pilot'), ('scalar_heat', '5_scalar_heat')]
total = leopard +current
print(total)
all_data = []
for group, run in total:
    all_data.extend(utils.load_data(run, group, source))
print(len(all_data))


[('experiment1', '3'), ('experiment2', '4'), ('experiment3', '5_pilot'), ('scalar_heat', '5_scalar_heat')]
../data/clean/diagnostic_dataset/annotations_clean_contradictions_batch_0.5/run3-group_experiment1/*.csv
../data/clean/diagnostic_dataset/annotations_clean_contradictions_batch_0.5/run4-group_experiment2/*.csv
../data/clean/diagnostic_dataset/annotations_clean_contradictions_batch_0.5/run5_pilot-group_experiment3/*.csv
../data/clean/diagnostic_dataset/annotations_clean_contradictions_batch_0.5/run5_scalar_heat-group_scalar_heat/*.csv
253875


In [7]:
data_set = DataSet(all_data, lexical_data, metric = 'prop_true')

In [10]:
rel_overview_dicts = data_set.get_relation_overview(rel_type = 'fine-grained')
df_relations = pd.DataFrame(rel_overview_dicts).sort_values('alpha', ascending=False).round(2)
df = df_relations #.drop(['alpha min', 'alpha max', 'unit min', 'unit max'], axis = 1)
df

Unnamed: 0,relation,alpha,seconds,pairs,properties,candidate pairs
11,afforded_usual,0.63,7.81,214,4,623
7,typical_of_concept,0.55,6.84,1056,21,3045
3,implied_category,0.53,7.73,1060,21,3045
8,affording_activity,0.51,7.22,732,17,2422
4,typical_of_property,0.42,7.23,633,21,3045
2,impossible,0.41,8.04,617,21,3095
9,variability_limited,0.34,9.32,967,21,3045
6,variability_open,0.32,9.05,672,17,2422
5,unusual,0.3,8.11,839,21,3095
1,rare,0.28,7.07,630,21,3095


## Evaluation by relation

per relation, inspect 20 examples (to start with)

In [11]:
import random
import uuid
import os
import pandas as pd

In [5]:
def draw_random_pairs(relation_set, n_samples=20):
    
    units = relation_set.units_pos
    indices = list(range(len(units)-1))
    random_indices = random.sample(indices, n_samples)
    random_units = [units[i] for i in random_indices]
    return random_units, random_indices


def random_units_to_file(relation, n_samples, random_units, random_indices):
    
    run_id = uuid.uuid1()
    f_path = f'../analysis/relations/qualitative_analysis/n-samples{n_samples}/{relation}_{run_id}.csv'
    header = ['random_index', 'prop', 'concept', 'prop_true', 'uas_true', 'description']
    with open(f_path, 'w') as outfile:
        writer = csv.DictWriter(outfile, fieldnames = header)
        writer.writeheader()
        for i, u in zip(random_indices, random_units):
            d = dict()
            d['random_index'] = i
            d['prop'] = u.prop
            d['concept'] = u.concept
            d['prop_true'] = u.prop_true
            d['uas_true'] = u.uas_true
            d['description'] = u.annotations[0].description
            writer.writerow(d)
        

    

In [18]:
# already generated - do not repeat
#n_samples = 30
#for rel, rel_set in data_set.relation_sets.items():
 #   random_units, random_indices = draw_random_pairs(rel_set, n_samples=n_samples)
  #  random_units_to_file(rel, n_samples, random_units, random_indices)

In [9]:
# Generate latex tables
# copy filename from terminal:
path_dir = '../analysis/relations/qualitative_analysis/n-samples30/'
f = 'creative_afd8e242-7770-11eb-883d-acde48001122.csv'
df = pd.read_csv(f'{path_dir}{f}')
print(df.sort_values('prop_true', 
                     ascending=False).round(2).drop(
    ['description', 'random_index'],
    axis = 1).to_latex(index = False))

\begin{tabular}{llrr}
\toprule
         prop &      concept &  prop\_true &  uas\_true \\
\midrule
 fly &  car &  1.00 &  1.00 \\
 wings &  automobile &  1.00 &  1.00 \\
 fly &  boat &  0.86 &  0.85 \\
 roll &  plastic &  0.86 &  0.90 \\
 green &  raccoon &  0.86 &  0.85 \\
 blue &  giraffe &  0.78 &  0.78 \\
 juicy &  chip &  0.75 &  0.76 \\
 fly &  sharpie &  0.75 &  0.75 \\
 made\_of\_wood &  rock &  0.71 &  0.71 \\
 fly &  roebuck &  0.71 &  0.71 \\
 fly &  deer &  0.71 &  0.71 \\
 hot &  vegetable &  0.71 &  0.72 \\
 hot &  winter &  0.70 &  0.61 \\
 fly &  lion &  0.70 &  0.67 \\
 blue &  pit &  0.67 &  0.66 \\
 fly &  seal &  0.62 &  0.63 \\
 wings &  dozer &  0.62 &  0.64 \\
 dangerous &  club &  0.62 &  0.59 \\
 round &  chicken &  0.62 &  0.62 \\
 sweet &  bulgur &  0.62 &  0.63 \\
 lay\_eggs &  howler &  0.62 &  0.63 \\
 sweet &  vinaigrette &  0.62 &  0.62 \\
 wings &  admiral &  0.60 &  0.58 \\
 dangerous &  crease &  0.60 &  0.54 \\
 juicy &  emperor &  0.60 &  0.60 \\
 r

# Intersections of relations

In [41]:
import utils
from collections import defaultdict
import pandas as pd

In [55]:
# load aggregated data


def get_all_relations(properties):
    relations = set()
    
    for prop in properties:
        prop_data = utils.load_prop_data_agg(prop)
        for c, d in prop_data.items():
            rels = d['relations']
            relations.update(rels.keys())
    return relations
        
    
def get_rel_pairs(relations):
    pairs = set()
    for r1 in relations:
        for r2 in relations:
            if r1 != r2:
                pair = sorted([r1, r2])
                pairs.add(tuple(pair))
    return pairs
    
    
    
def get_rel_overlap(properties):
    relations = get_all_relations(properties)
    rel_pairs = get_rel_pairs(relations)
    rel_overview = []
    for rel_pair in rel_pairs:
        #print(rel_pair)
        rel_pair_dict = defaultdict(set)
        for prop in properties:
            prop_set = utils.load_prop_data_agg(prop)
            for c, d in prop_set.items():
                rels = d['relations']
                # first check if both rels are in options
                if all([p in rels for p in rel_pair]):
                    rels_app = [r for r, p in rels.items() if p > 0.5]
                    for r in rel_pair:
                        if r in rels_app:
                            rel_pair_dict[r].add((prop, c))
        r1, r2 = rel_pair
        pairs1 = rel_pair_dict[r1]
        pairs2 = rel_pair_dict[r2]
        i = pairs1.intersection(pairs2)
        if len(i) > 0:
            u = pairs1.union(pairs2)
            prop_r1_with_r2 = round(len(i)/len(pairs1), 2)
            prop_r2_with_r1 = round(len(i)/len(pairs2), 2)
            #print(r1, r2, len(i), prop_r1_with_r2, prop_r2_with_r1)
            d = dict()
            d['rel1'] = r1
            d['rel1_with_rel2'] = prop_r1_with_r2
            d['rel1_rel2'] = round(len(i)/len(u), 2)
            d['rel2_with_rel1'] = prop_r2_with_r1
            d['rel2'] = r2
            rel_overview.append(d)
    return rel_overview
                    
    

    
    

In [56]:
# properties = utils.get_properties()
# relations = get_all_relations(properties)
# rel_pairs = get_rel_pairs(relations)
#rel_pairs
properties = utils.get_properties()
rel_overview = get_rel_overlap(properties)
df = pd.DataFrame(rel_overview)
df = df.sort_values('rel1_rel2', ascending  = False)
df

Unnamed: 0,rel1,rel1_with_rel2,rel1_rel2,rel2_with_rel1,rel2
54,afforded_usual,0.83,0.82,0.99,typical_of_concept
31,afforded_usual,0.97,0.81,0.83,implied_category
49,affording_activity,0.9,0.74,0.81,implied_category
11,implied_category,0.85,0.74,0.85,typical_of_concept
45,affording_activity,0.91,0.71,0.76,typical_of_concept
8,rare,0.88,0.61,0.66,unusual
21,typical_of_concept,0.59,0.58,0.98,typical_of_property
35,affording_activity,0.62,0.57,0.86,typical_of_property
5,implied_category,0.56,0.53,0.93,typical_of_property
26,afforded_usual,0.49,0.48,0.97,typical_of_property


In [57]:
print(df.to_latex(index=False))

\begin{tabular}{lrrrl}
\toprule
                rel1 &  rel1\_with\_rel2 &  rel1\_rel2 &  rel2\_with\_rel1 &                 rel2 \\
\midrule
      afforded\_usual &            0.83 &       0.82 &            0.99 &   typical\_of\_concept \\
      afforded\_usual &            0.97 &       0.81 &            0.83 &     implied\_category \\
  affording\_activity &            0.90 &       0.74 &            0.81 &     implied\_category \\
    implied\_category &            0.85 &       0.74 &            0.85 &   typical\_of\_concept \\
  affording\_activity &            0.91 &       0.71 &            0.76 &   typical\_of\_concept \\
                rare &            0.88 &       0.61 &            0.66 &              unusual \\
  typical\_of\_concept &            0.59 &       0.58 &            0.98 &  typical\_of\_property \\
  affording\_activity &            0.62 &       0.57 &            0.86 &  typical\_of\_property \\
    implied\_category &            0.56 &       0.53 &            0.93