In [1]:
import json
from tqdm import tqdm
import os
import numpy as np
import re

In [2]:
def load_dataset_and_extract_types(path):
    with open(path, 'r') as inp:
        types = [json.loads(l)['y_str'] for l in tqdm(inp.readlines())]
    return types

In [3]:
import nltk
from collections import defaultdict
def count_types(list_of_types):
    all_types = [t for l in tqdm(list_of_types) for t in l]
    type_counter = defaultdict(int)
    for t in tqdm(all_types):
        type_counter[t] += 1
    c = [(word, round(type_counter[word] / len(all_types), 2)) for word in tqdm(set(all_types))]     
    return sorted(c, key = lambda x : x[1], reverse = True)

def count_types_for_example(list_of_types):
    type_counter = defaultdict(int)
    for types in tqdm(list_of_types):
        for t in types:
            type_counter[t] += 1
    c = [(word, round(type_counter[word], 2)) for word in tqdm(type_counter.keys())]     
    return sorted(c, key = lambda x : x[1], reverse = True)

In [6]:
base_results_path = '../official_results/predictions/'

In [7]:
def trimmed_stats(x, sampled = True):
    x_sorted = np.sort(x)[1:-1]
    return x_sorted.mean(), x_sorted.std(ddof = 1 if sampled else 0)

def compute_corrs(distribution, suffix):
    regex = r'_[0-9]+'

    files = []
    for file in os.listdir(base_results_path):
        if file.endswith(suffix):
            files.append(base_results_path + file)

    corrs = {}    
    for f in files:
    # f = files[0]
        print(f)
        test_types = [] 
        f1s = defaultdict(list)
        update_i = {}
        with open(f, 'r') as inp:
            f_lines = inp.readlines()
            delimiter = f_lines[0]
            model_i = 0
            for l in f_lines[1:]:
                if l != delimiter:
                    elems = l.split('\t')
                    typ = re.sub(regex, '', elems[0])
                    if typ in update_i and update_i[typ] != model_i:
                        f1 = float(elems[3])
                        f1s[typ].append(f1)
                        update_i[typ] = model_i
                    elif typ not in update_i:
                        update_i[typ] = 0
                    if typ not in test_types:
                        test_types.append(typ)
                else:
                    model_i += 1
            f1s = {k:trimmed_stats(v)[0] for k, v in f1s.items()}
            corrs[f] = np.corrcoef([f1s[t] for t in test_types if t in distribution], [distribution[t] for t in test_types if t in distribution])[0][1]
    return corrs

def compute_onoe_corr(distribution, suffix):
    files = []
    for file in os.listdir(base_results_path):
        if file.endswith(suffix):
            files.append(base_results_path + file)
    corrs = {}    
    for f in files:
    # f = files[0]
        print(f)
        test_types = [] 
        f1s = {}
        with open(f, 'r') as inp:
            f_lines = inp.readlines()
            for l in f_lines[1:]:
                elems = l.split('\t')
                typ = elems[0].strip()
                if typ not in test_types:
                    f1 = float(elems[1])
                    f1s[typ] = f1
                    test_types.append(typ)
            print(f1s)
            corrs[f] = np.corrcoef([f1s[t] for t in test_types if t in distribution], [distribution[t] for t in test_types if t in distribution])[0][1]
    return corrs

# BBN

In [29]:
train = '/datahdd/vmanuel/entity_typing_all_datasets/data/BBN/BBN/train_partitioned.json'
dev = '/datahdd/vmanuel/entity_typing_all_datasets/data/BBN/BBN/dev_partitioned.json'
test = '/datahdd/vmanuel/entity_typing_all_datasets/data/BBN/BBN/test_lines.json'

test_on_onto = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_Onto_into_BBN.json'
test_on_figer = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_figer_into_BBN.json'
test_on_choi = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_choi_into_BBN.json'

In [30]:
train_types = load_dataset_and_extract_types(train)
dev_types = load_dataset_and_extract_types(dev)
test_types = load_dataset_and_extract_types(test)

on_onto_types = load_dataset_and_extract_types(test_on_onto)
on_figer_types = load_dataset_and_extract_types(test_on_figer)
on_choi_types = load_dataset_and_extract_types(test_on_choi)


100%|██████████| 84357/84357 [00:01<00:00, 74193.49it/s]
100%|██████████| 1721/1721 [00:00<00:00, 85612.25it/s]
100%|██████████| 12349/12349 [00:00<00:00, 111666.89it/s]
100%|██████████| 4261/4261 [00:00<00:00, 43702.74it/s]
100%|██████████| 522/522 [00:00<00:00, 64785.52it/s]
100%|██████████| 1475/1475 [00:00<00:00, 52884.14it/s]


In [8]:
# train_dist = count_types(train_types)
# dev_dist = count_types(dev_types)
# test_dist = count_types(test_types)

# on_onto_dist = count_types(on_onto_types)
# on_figer_dist = count_types(on_figer_types)
# on_choi_dist = count_types(on_choi_types)

In [31]:
train_dist = count_types_for_example(train_types)
dev_dist = count_types_for_example(dev_types)
test_dist = count_types_for_example(test_types)

on_onto_dist = count_types_for_example(on_onto_types)
on_figer_dist = count_types_for_example(on_figer_types)
on_choi_dist = count_types_for_example(on_choi_types)

100%|██████████| 84357/84357 [00:00<00:00, 1037403.47it/s]
100%|██████████| 47/47 [00:00<00:00, 138144.56it/s]
100%|██████████| 1721/1721 [00:00<00:00, 692345.79it/s]
100%|██████████| 42/42 [00:00<00:00, 95016.60it/s]
100%|██████████| 12349/12349 [00:00<00:00, 714341.32it/s]
100%|██████████| 39/39 [00:00<00:00, 87521.59it/s]
100%|██████████| 4261/4261 [00:00<00:00, 780127.00it/s]
100%|██████████| 25/25 [00:00<00:00, 50099.19it/s]
100%|██████████| 522/522 [00:00<00:00, 668527.23it/s]
100%|██████████| 17/17 [00:00<00:00, 70111.28it/s]
100%|██████████| 1475/1475 [00:00<00:00, 770627.60it/s]
100%|██████████| 40/40 [00:00<00:00, 126144.48it/s]


# Correlations with BBN from scratch models

In [33]:
suffix = 'trained_on_bbn_tested_on_bbn_test.txt'

In [34]:
compute_corrs(dict(train_dist), suffix)

../official_results/predictions/adapter_2_trained_on_bbn_tested_on_bbn_test.txt
../official_results/predictions/adapter_16_trained_on_bbn_tested_on_bbn_test.txt
../official_results/predictions/bert_ft_2_trained_on_bbn_tested_on_bbn_test.txt
../official_results/predictions/bert_ft_0_trained_on_bbn_tested_on_bbn_test.txt


{'../official_results/predictions/adapter_2_trained_on_bbn_tested_on_bbn_test.txt': 0.386649024237096,
 '../official_results/predictions/adapter_16_trained_on_bbn_tested_on_bbn_test.txt': 0.3634725851542615,
 '../official_results/predictions/bert_ft_2_trained_on_bbn_tested_on_bbn_test.txt': 0.381467154495951,
 '../official_results/predictions/bert_ft_0_trained_on_bbn_tested_on_bbn_test.txt': 0.6545634506419999}

In [35]:
suffix = 'BBN_preds.txt'
compute_onoe_corr(dict(train_dist), suffix)

../results/avgs_stds/onoe_BBN_preds.txt
{'/PERSON': 0.72, '/ORGANIZATION': 0.87, '/ORGANIZATION/CORPORATION': 0.79, '/ORGANIZATION/GOVERNMENT': 0.72, '/GPE': 0.89, '/GPE/CITY': 0.82, '/LOCATION': 0.21, '/LOCATION/REGION': 0.38, '/SUBSTANCE': 0.9, '/SUBSTANCE/FOOD': 0.79, '/EVENT': 0.59, '/GAME': 0.06, '/GPE/STATE_PROVINCE': 0.79, '/GPE/COUNTRY': 0.92, '/WORK_OF_ART': 0.42, '/WORK_OF_ART/BOOK': 0.06, '/LOCATION/LAKE_SEA_OCEAN': 0.0, '/WORK_OF_ART/SONG': 0.0, '/ANIMAL': 0.65, '/LANGUAGE': 0.56, '/ORGANIZATION/EDUCATIONAL': 0.42, '/PRODUCT': 0.18, '/ORGANIZATION/HOTEL': 0.36, '/ORGANIZATION/POLITICAL': 0.75, '/DISEASE': 0.87, '/LOCATION/RIVER': 0.37, '/PRODUCT/VEHICLE': 0.3, '/LOCATION/CONTINENT': 0.97, '/LAW': 0.36, '/SUBSTANCE/CHEMICAL': 0.44, '/ORGANIZATION/RELIGIOUS': 0.29, '/EVENT/HURRICANE': 0.94, '/EVENT/WAR': 0.51, '/PLANT': 0.22, '/SUBSTANCE/DRUG': 0.39, '/ORGANIZATION/HOSPITAL': 0.0, '/WORK_OF_ART/PLAY': 0.0, '/CONTACT_INFO': 0.0, '/PRODUCT/WEAPON': 0.0}


{'../results/avgs_stds/onoe_BBN_preds.txt': 0.37372718303130786}

# Onto

In [36]:
train = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/ontonotes/g_train_tree.json'
dev = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/ontonotes/g_dev_tree.json'
test = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/ontonotes/g_test_tree.json'

test_on_bbn = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_BBN_into_Ontonotes.json'
test_on_figer = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_figer_into_Ontonotes.json'
test_on_choi = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_choi_into_onto.json'

train_types = load_dataset_and_extract_types(train)
dev_types = load_dataset_and_extract_types(dev)
test_types = load_dataset_and_extract_types(test)

on_bbn_types = load_dataset_and_extract_types(test_on_bbn)
on_figer_types = load_dataset_and_extract_types(test_on_figer)
on_choi_types = load_dataset_and_extract_types(test_on_choi)

train_dist = count_types_for_example(train_types)
dev_dist = count_types_for_example(dev_types)
test_dist = count_types_for_example(test_types)

on_bbn_dist = count_types_for_example(on_bbn_types)
on_figer_dist = count_types_for_example(on_figer_types)
on_choi_dist = count_types_for_example(on_choi_types)

100%|██████████| 251039/251039 [00:03<00:00, 63603.42it/s]
100%|██████████| 2202/2202 [00:00<00:00, 47730.78it/s]
100%|██████████| 8963/8963 [00:00<00:00, 44291.38it/s]
100%|██████████| 11483/11483 [00:00<00:00, 59309.53it/s]
100%|██████████| 528/528 [00:00<00:00, 59907.28it/s]
100%|██████████| 1532/1532 [00:00<00:00, 53553.98it/s]
100%|██████████| 251039/251039 [00:00<00:00, 995378.13it/s] 
100%|██████████| 89/89 [00:00<00:00, 241613.63it/s]
100%|██████████| 2202/2202 [00:00<00:00, 747479.56it/s]
100%|██████████| 45/45 [00:00<00:00, 85520.47it/s]
100%|██████████| 8963/8963 [00:00<00:00, 1078353.13it/s]
100%|██████████| 67/67 [00:00<00:00, 227729.63it/s]
100%|██████████| 11483/11483 [00:00<00:00, 1018895.55it/s]
100%|██████████| 29/29 [00:00<00:00, 114211.09it/s]
100%|██████████| 528/528 [00:00<00:00, 627718.97it/s]
100%|██████████| 35/35 [00:00<00:00, 55690.68it/s]
100%|██████████| 1532/1532 [00:00<00:00, 752244.64it/s]
100%|██████████| 69/69 [00:00<00:00, 87328.60it/s]


# Correlations with ONTO from scratch models

In [37]:
suffix = 'trained_on_onto_tested_on_onto_test.txt'

In [38]:
compute_corrs(dict(train_dist), suffix)

../official_results/predictions/adapter_2_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/bert_ft_0_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/adapter_16_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/bert_ft_2_trained_on_onto_tested_on_onto_test.txt


{'../official_results/predictions/adapter_2_trained_on_onto_tested_on_onto_test.txt': 0.886010273805676,
 '../official_results/predictions/bert_ft_0_trained_on_onto_tested_on_onto_test.txt': 0.7677132250452502,
 '../official_results/predictions/adapter_16_trained_on_onto_tested_on_onto_test.txt': 0.8892003310044112,
 '../official_results/predictions/bert_ft_2_trained_on_onto_tested_on_onto_test.txt': 0.7761324268909864}

In [39]:
suffix = 'onto_preds.txt'
compute_onoe_corr(dict(train_dist), suffix)

../results/avgs_stds/onoe_onto_preds.txt
{'/organization/company': 0.41, '/organization': 0.57, '/person': 0.55, '/organization/government': 0.39, '/other': 0.83, '/location': 0.69, '/location/structure': 0.12, '/other/legal': 0.04, '/other/currency': 0.55, '/location/country': 0.82, '/location/city': 0.59, '/person/title': 0.18, '/person/political_figure': 0.22, '/other/health': 0.11, '/other/product': 0.26, '/other/health/treatment': 0.19, '/person/legal': 0.0, '/location/geography': 0.11, '/other/art': 0.17, '/other/art/writing': 0.1, '/other/event': 0.1, '/other/event/violent_conflict': 0.23, '/other/heritage': 0.0, '/organization/company/news': 0.52, '/other/product/weapon': 0.0, '/organization/stock_exchange': 0.48, '/other/product/computer': 0.0, '/person/athlete': 0.0, '/other/event/sports_event': 0.0, '/other/product/car': 0.22, '/person/artist': 0.17, '/person/artist/author': 0.13, '/other/living_thing': 0.43, '/other/food': 0.16, '/other/living_thing/animal': 0.55, '/locatio

{'../results/avgs_stds/onoe_onto_preds.txt': 0.46716190103689564}

# FIGER

In [17]:
train = '/datahdd/vmanuel/entity_typing_all_datasets/data/FIGER/train_partitioned.json'
dev = '/datahdd/vmanuel/entity_typing_all_datasets/data/FIGER/dev_partitioned.json'
test = '/datahdd/vmanuel/entity_typing_all_datasets/data/FIGER/test_lines.json'

test_on_bbn = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_BBN_into_FIGER.json'
test_on_onto = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_Onto_into_figer.json'
test_on_choi = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_choi_into_figer.json'

train_types = load_dataset_and_extract_types(train)
dev_types = load_dataset_and_extract_types(dev)
test_types = load_dataset_and_extract_types(test)

on_bbn_types = load_dataset_and_extract_types(test_on_bbn)
on_onto_types = load_dataset_and_extract_types(test_on_onto)
on_choi_types = load_dataset_and_extract_types(test_on_choi)

train_dist = count_types_for_example(train_types)
dev_dist = count_types_for_example(dev_types)
test_dist = count_types_for_example(test_types)

on_bbn_dist = count_types_for_example(on_bbn_types)
on_onto_dist = count_types_for_example(on_onto_types)
on_choi_dist = count_types_for_example(on_choi_types)

100%|██████████| 2684906/2684906 [00:46<00:00, 57134.63it/s]
100%|██████████| 5380/5380 [00:00<00:00, 62982.99it/s]
100%|██████████| 563/563 [00:00<00:00, 75579.09it/s]
100%|██████████| 11811/11811 [00:00<00:00, 62644.62it/s]
100%|██████████| 4475/4475 [00:00<00:00, 40812.60it/s]
100%|██████████| 1618/1618 [00:00<00:00, 52469.34it/s]
100%|██████████| 2684906/2684906 [00:02<00:00, 1190814.96it/s]
100%|██████████| 128/128 [00:00<00:00, 418123.76it/s]
100%|██████████| 5380/5380 [00:00<00:00, 561020.23it/s]
100%|██████████| 120/120 [00:00<00:00, 149618.45it/s]
100%|██████████| 563/563 [00:00<00:00, 780110.06it/s]
100%|██████████| 50/50 [00:00<00:00, 67890.97it/s]
100%|██████████| 11811/11811 [00:00<00:00, 821309.49it/s]
100%|██████████| 31/31 [00:00<00:00, 117561.87it/s]
100%|██████████| 4475/4475 [00:00<00:00, 1003019.85it/s]
100%|██████████| 56/56 [00:00<00:00, 87609.48it/s]
100%|██████████| 1618/1618 [00:00<00:00, 739096.48it/s]
100%|██████████| 90/90 [00:00<00:00, 92931.40it/s]


# Correlations with FIGER from scratch models

In [18]:
suffix = 'trained_on_figer_tested_on_figer_test.txt'

In [19]:
compute_corrs(dict(dev_dist), suffix)

../official_results/predictions/bert_ft_0_trained_on_figer_tested_on_figer_test.txt
../official_results/predictions/adapter_2_trained_on_figer_tested_on_figer_test.txt
../official_results/predictions/bert_ft_2_trained_on_figer_tested_on_figer_test.txt
../official_results/predictions/adapter_16_trained_on_figer_tested_on_figer_test.txt


{'../official_results/predictions/bert_ft_0_trained_on_figer_tested_on_figer_test.txt': 0.488391971402729,
 '../official_results/predictions/adapter_2_trained_on_figer_tested_on_figer_test.txt': 0.17373599678207563,
 '../official_results/predictions/bert_ft_2_trained_on_figer_tested_on_figer_test.txt': 0.1836042070729137,
 '../official_results/predictions/adapter_16_trained_on_figer_tested_on_figer_test.txt': 0.1411657716000408}

In [20]:
suffix = 'figer_preds.txt'
compute_onoe_corr(dict(dev_dist), suffix)

../results/avgs_stds/onoe_figer_preds.txt
{'/organization/educational_institution': 0.82, '/education': 0.29, '/organization': 0.9, '/education/department': 0.0, '/art': 0.25, '/written_work': 0.31, '/title': 0.8, '/person': 0.92, '/person/author': 0.0, '/event': 0.21, '/location': 0.85, '/location/city': 0.83, '/location/province': 0.82, '/time': 0.63, '/transportation': 0.5, '/transportation/road': 0.57, '/government_agency': 0.67, '/location/country': 1.0, '/government': 0.25, '/government/government': 0.22, '/person/politician': 0.67, '/organization/sports_league': 0.8, '/news_agency': 0.8, '/organization/company': 0.59, '/location/county': 1.0, '/law': 1.0, '/organization/sports_team': 0.85, '/building': 0.88, '/building/sports_facility': 0.89, '/person/coach': 1.0, '/person/athlete': 0.73, '/education/educational_degree': 0.67, '/person/artist': 0.17, '/people': 0.5, '/people/ethnicity': 0.44, '/building/hospital': 1.0, '/building/hotel': 0.0, '/person/doctor': 0.0, '/military': 

{'../results/avgs_stds/onoe_figer_preds.txt': 0.2145794898680103}

# Choi

In [8]:
train = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/choi/train.json'
dev = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/choi/dev.json'
test = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/choi/test_tree.json'

test_on_bbn = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_BBN_into_choi.json'
test_on_onto = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_Onto_into_choi.json'
test_on_figer = '/datahdd/vmanuel/entity_typing_all_datasets/data/entity_typing_original_datasets/mapped_datasets/test_figer_into_choi.json'

train_types = load_dataset_and_extract_types(train)
dev_types = load_dataset_and_extract_types(dev)
test_types = load_dataset_and_extract_types(test)

on_bbn_types = load_dataset_and_extract_types(test_on_bbn)
on_onto_types = load_dataset_and_extract_types(test_on_onto)
on_figer_types = load_dataset_and_extract_types(test_on_figer)

train_dist = count_types_for_example(train_types)
dev_dist = count_types_for_example(dev_types)
test_dist = count_types_for_example(test_types)

on_bbn_dist = count_types_for_example(on_bbn_types)
on_onto_dist = count_types_for_example(on_onto_types)
on_figer_dist = count_types_for_example(on_figer_types)

100%|██████████| 3549962/3549962 [01:21<00:00, 43607.73it/s]
100%|██████████| 126792/126792 [00:01<00:00, 102156.43it/s]
100%|██████████| 1998/1998 [00:00<00:00, 60398.41it/s]
100%|██████████| 12156/12156 [00:00<00:00, 58259.68it/s]
100%|██████████| 4480/4480 [00:00<00:00, 43511.80it/s]
100%|██████████| 556/556 [00:00<00:00, 57205.34it/s]
100%|██████████| 3549962/3549962 [00:02<00:00, 1449189.27it/s]
100%|██████████| 8617/8617 [00:00<00:00, 765289.30it/s]
100%|██████████| 126792/126792 [00:00<00:00, 1330804.69it/s]
100%|██████████| 5761/5761 [00:00<00:00, 442690.68it/s]
100%|██████████| 1998/1998 [00:00<00:00, 501539.25it/s]
100%|██████████| 1682/1682 [00:00<00:00, 785264.84it/s]
100%|██████████| 12156/12156 [00:00<00:00, 1219497.22it/s]
100%|██████████| 35/35 [00:00<00:00, 120525.98it/s]
100%|██████████| 4480/4480 [00:00<00:00, 1016800.97it/s]
100%|██████████| 56/56 [00:00<00:00, 221377.03it/s]
100%|██████████| 556/556 [00:00<00:00, 637167.49it/s]
100%|██████████| 40/40 [00:00<00:00, 

# Correlations with choi from scratch models

In [23]:
suffix = 'trained_on_choi_tested_on_choi_test.txt'

In [25]:
compute_corrs(dict(dev_dist), suffix)

../official_results/predictions/adapter_2_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/adapter_16_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/bert_ft_2_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/bert_ft_0_trained_on_choi_tested_on_choi_test.txt


{'../official_results/predictions/adapter_2_trained_on_choi_tested_on_choi_test.txt': 0.17512590745707912,
 '../official_results/predictions/adapter_16_trained_on_choi_tested_on_choi_test.txt': 0.16174095795535645,
 '../official_results/predictions/bert_ft_2_trained_on_choi_tested_on_choi_test.txt': 0.16777639790511673,
 '../official_results/predictions/bert_ft_0_trained_on_choi_tested_on_choi_test.txt': 0.21952809429155928}

In [26]:
suffix = 'choi_preds.txt'

In [28]:
compute_onoe_corr(dict(dev_dist), suffix)

../results/avgs_stds/onoe_choi_preds.txt
{'date': 0.0, 'day': 0.42, 'weekday': 0.0, 'event': 0.03, 'ceremony': 0.0, 'conference': 0.2, 'gathering': 0.0, 'meeting': 0.0, 'confluence': 0.0, 'person': 0.21, 'politician': 0.05, 'official': 0.14, 'policeman': 0.0, 'spokesman': 0.0, 'spokesperson': 0.0, 'statesman': 0.0, 'serviceman': 0.0, 'organization': 0.01, 'place': 0.03, 'government': 0.16, 'administration': 0.03, 'assembly': 0.0, 'committee': 0.0, 'legislature': 0.0, 'adult': 0.0, 'leader': 0.01, 'male': 0.0, 'man': 0.0, 'object': 0.0, 'document': 0.0, 'finding': 0.0, 'report': 0.2, 'military': 0.0, 'child': 0.0, 'relation': 0.0, 'son': 0.5, 'worker': 0.0, 'juvenile': 0.0, 'young': 0.0, 'organism': 0.0, 'concept': 0.0, 'reason': 0.0, 'athlete': 0.0, 'driver': 0.0, 'player': 0.03, 'professional': 0.03, 'contestant': 0.0, 'racer': 0.0, 'location': 0.0, 'city': 0.31, 'area': 0.03, 'municipality': 0.0, 'region': 0.0, 'country': 0.24, 'state': 0.02, 'performance': 0.0, 'show': 0.33, 'group'

{'../results/avgs_stds/onoe_choi_preds.txt': 0.039780592064976646}