In [1]:
import json
from tqdm import tqdm
import os
import numpy as np
import re
from collections import defaultdict

In [2]:
def load_dataset_and_extract_types(path):
    with open(path, 'r') as inp:
        types = [json.loads(l)['y_str'] for l in tqdm(inp.readlines())]
    return types

In [3]:
base_results_path = '../official_results/predictions/'

In [4]:
def trimmed_stats(x, sampled = True):
    x_sorted = np.sort(x)[1:-1]
    return x_sorted.mean(), x_sorted.std(ddof = 1 if sampled else 0)

In [5]:
base_results_path = '../official_results/predictions/'
onoe_base_results_path = '../results/avgs_stds/'

def compute_f1(p, r):
    return 2*p*r/(p + r) if p + r else 0

def extract_files(suffix):

    files = []
    for file in os.listdir(base_results_path):
        if file.endswith(suffix):
            files.append(base_results_path + file)
    return files

def extract_type(onoe, elems):
    regex = r'_[0-9]+'
    
    if not onoe:
        return re.sub(regex, '', elems[0])
    else:
        return elems[0].strip()

def extract_stats(types, suffix, files, onoe=False):
    
    
    stats = {}    
    for f in files:
        print(f)
        
        ps = defaultdict(list)
        rs = defaultdict(list)
        update_i = defaultdict(int)
        with open(f, 'r') as inp:
            f_lines = inp.readlines()
            delimiter = f_lines[0]
            model_i = 1
            for l in f_lines[1:]:
                if l != delimiter:
                    elems = l.split('\t')
                    typ = extract_type(onoe, elems)
                    if typ in types:
                        if update_i[typ] != model_i:
                            p = float(elems[1])
                            r = float(elems[2])
                            
                            ps[typ].append(p)
                            rs[typ].append(r)
                            update_i[typ] = model_i
                else:
                    model_i += 1
            if not onoe:
                ps = {k:trimmed_stats(v)[0] for k, v in ps.items()}
                rs = {k:trimmed_stats(v)[0] for k, v in rs.items()}
            
            average_p = round(np.mean(list(ps.values())), 2)
            p_std = round(np.std(list(ps.values())) , 2)
            average_r = round(np.mean(list(rs.values())), 2)
            r_std = round(np.std(list(rs.values())) , 2)
            stats[f] = {'precision': {'mean': average_p, 
                                      'std':  p_std},
                       'recall': {'mean': average_r, 
                                  'std':  r_std},
                        'f1': {'mean': round(compute_f1(average_p, average_r), 2)} 
                       }
    return stats

def compute_average_f1_for_types(types, suffix, onoe = False):
    files = extract_files(suffix)
    stats = extract_stats(types, suffix, files, onoe)
    return print_stats(stats)    

In [6]:
def print_stats(stats):
    print()
    print()
    out_str = '{:>50} |{:^13}|{:^13}|{:^6}'.format('model', 'precision', 'recall', 'f1')
    print(out_str)
    print('{:-<85}'.format('{: ^44}'.format('')))
    out_str = '{:>50} |{:^6}|{:^6}|{:^6}|{:^6}|{:^6}'.format('', 
                                                         'mean', 'std', 
                                                         'mean', 'std', 
                                                         'mean')
    print(out_str)
    print('{:-<85}'.format('{: ^10}'.format('')))
    
    for k, stat_dict in stats.items():
        out_str = '{:>50} |{:^6}|{:^6}|{:^6}|{:^6}|{:^6}'.format(k.split('/')[-1].replace('_test.txt', ''),
                                                             stat_dict['precision']['mean'], 
                                                             stat_dict['precision']['std'],
                                                             stat_dict['recall']['mean'], 
                                                             stat_dict['recall']['std'],
                                                             stat_dict['f1']['mean'])
        print(out_str)


# FIGER

In [7]:
types_path = '/datahdd/vmanuel/entity_typing_all_datasets/data/FIGER/ontology.txt'

In [8]:
with open(types_path, 'r') as inp:
    types = [l.replace('\n', '') for l in inp.readlines()]

fathers = []
sons = []

for t in types:
    for t2 in types:
        if t != t2 and t + '/' in t2:
            if t not in fathers:
                fathers.append(t)
            if t2 not in sons:
                sons.append(t2)

In [11]:
len(fathers)

22

In [12]:
len(sons)

79

In [157]:
suffix = 'trained_on_figer_tested_on_figer_test.txt'

compute_average_f1_for_types(fathers, suffix)

../official_results/predictions/bert_ft_0_trained_on_figer_tested_on_figer_test.txt
../official_results/predictions/adapter_2_trained_on_figer_tested_on_figer_test.txt
../official_results/predictions/bert_ft_2_trained_on_figer_tested_on_figer_test.txt
../official_results/predictions/adapter_16_trained_on_figer_tested_on_figer_test.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
        bert_ft_0_trained_on_figer_tested_on_figer | 0.43 | 0.38 | 0.25 | 0.28 | 0.32 
        adapter_2_trained_on_figer_tested_on_figer | 0.69 | 0.33 | 0.71 | 0.31 | 0.7  
        bert_ft_2_trained_on_figer_tested_on_figer | 0.65 | 0.31 | 0.67 | 0.33 | 0.66 
       adapter_16_trained_on_figer_tested_on_figer | 0

In [158]:
suffix = 'trained_on_figer_tested_on_figer_test.txt'

compute_average_f1_for_types(sons, suffix)

../official_results/predictions/bert_ft_0_trained_on_figer_tested_on_figer_test.txt
../official_results/predictions/adapter_2_trained_on_figer_tested_on_figer_test.txt
../official_results/predictions/bert_ft_2_trained_on_figer_tested_on_figer_test.txt
../official_results/predictions/adapter_16_trained_on_figer_tested_on_figer_test.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
        bert_ft_0_trained_on_figer_tested_on_figer | 0.24 | 0.34 | 0.16 | 0.25 | 0.19 
        adapter_2_trained_on_figer_tested_on_figer | 0.52 | 0.4  | 0.6  | 0.39 | 0.56 
        bert_ft_2_trained_on_figer_tested_on_figer | 0.48 | 0.4  | 0.58 | 0.4  | 0.53 
       adapter_16_trained_on_figer_tested_on_figer | 0

In [161]:
suffix = 'figer_preds.txt'

compute_average_f1_for_types(fathers, suffix, onoe=True)

../official_results/predictions/onoe_figer_preds.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
                              onoe_figer_preds.txt | 0.64 | 0.3  | 0.62 | 0.32 | 0.63 


In [162]:
suffix = 'figer_preds.txt'

compute_average_f1_for_types(sons, suffix, onoe=True)

../official_results/predictions/onoe_figer_preds.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
                              onoe_figer_preds.txt | 0.58 | 0.37 | 0.66 | 0.37 | 0.62 


# BBN

In [8]:
types_path = '/datahdd/vmanuel/entity_typing_all_datasets/data/BBN/BBN/ontology.txt'

In [11]:
with open(types_path, 'r') as inp:
    types = [l.replace('\n', '') for l in inp.readlines()]

fathers = []
sons = []

for t in types:
    for t2 in types:
        if t != t2 and t in t2:
            if t not in fathers:
                fathers.append(t)
            if t2 not in sons:
                sons.append(t2)

In [12]:
fathers

['/CONTACT_INFO',
 '/EVENT',
 '/FACILITY',
 '/GPE',
 '/LOCATION',
 '/ORGANIZATION',
 '/PRODUCT',
 '/SUBSTANCE',
 '/WORK_OF_ART']

In [13]:
sons

['/CONTACT_INFO/url',
 '/EVENT/HURRICANE',
 '/EVENT/WAR',
 '/FACILITY/AIRPORT',
 '/FACILITY/ATTRACTION',
 '/FACILITY/BRIDGE',
 '/FACILITY/BUILDING',
 '/FACILITY/HIGHWAY_STREET',
 '/GPE/CITY',
 '/GPE/COUNTRY',
 '/GPE/STATE_PROVINCE',
 '/LOCATION/CONTINENT',
 '/LOCATION/LAKE_SEA_OCEAN',
 '/LOCATION/REGION',
 '/LOCATION/RIVER',
 '/ORGANIZATION/CORPORATION',
 '/ORGANIZATION/EDUCATIONAL',
 '/ORGANIZATION/GOVERNMENT',
 '/ORGANIZATION/HOSPITAL',
 '/ORGANIZATION/HOTEL',
 '/ORGANIZATION/MUSEUM',
 '/ORGANIZATION/POLITICAL',
 '/ORGANIZATION/RELIGIOUS',
 '/PRODUCT/VEHICLE',
 '/PRODUCT/WEAPON',
 '/SUBSTANCE/CHEMICAL',
 '/SUBSTANCE/DRUG',
 '/SUBSTANCE/FOOD',
 '/WORK_OF_ART/BOOK',
 '/WORK_OF_ART/PLAY',
 '/WORK_OF_ART/SONG']

In [62]:
suffix = 'trained_on_bbn_tested_on_bbn_test.txt'

compute_average_f1_for_types(fathers, suffix)

../official_results/predictions/adapter_2_trained_on_bbn_tested_on_bbn_test.txt
../official_results/predictions/adapter_16_trained_on_bbn_tested_on_bbn_test.txt
../official_results/predictions/bert_ft_2_trained_on_bbn_tested_on_bbn_test.txt
../official_results/predictions/bert_ft_0_trained_on_bbn_tested_on_bbn_test.txt


{'../official_results/predictions/adapter_2_trained_on_bbn_tested_on_bbn_test.txt': {'precision': {'mean': 0.45,
   'std': 0.33},
  'recall': {'mean': 0.65, 'std': 0.33},
  'f1': {'mean': 0.53}},
 '../official_results/predictions/adapter_16_trained_on_bbn_tested_on_bbn_test.txt': {'precision': {'mean': 0.46,
   'std': 0.33},
  'recall': {'mean': 0.66, 'std': 0.33},
  'f1': {'mean': 0.54}},
 '../official_results/predictions/bert_ft_2_trained_on_bbn_tested_on_bbn_test.txt': {'precision': {'mean': 0.44,
   'std': 0.33},
  'recall': {'mean': 0.64, 'std': 0.33},
  'f1': {'mean': 0.52}},
 '../official_results/predictions/bert_ft_0_trained_on_bbn_tested_on_bbn_test.txt': {'precision': {'mean': 0.43,
   'std': 0.32},
  'recall': {'mean': 0.31, 'std': 0.24},
  'f1': {'mean': 0.36}}}

In [63]:
compute_average_f1_for_types(sons, suffix)

../official_results/predictions/adapter_2_trained_on_bbn_tested_on_bbn_test.txt
../official_results/predictions/adapter_16_trained_on_bbn_tested_on_bbn_test.txt
../official_results/predictions/bert_ft_2_trained_on_bbn_tested_on_bbn_test.txt
../official_results/predictions/bert_ft_0_trained_on_bbn_tested_on_bbn_test.txt


{'../official_results/predictions/adapter_2_trained_on_bbn_tested_on_bbn_test.txt': {'precision': {'mean': 0.43,
   'std': 0.35},
  'recall': {'mean': 0.48, 'std': 0.35},
  'f1': {'mean': 0.45}},
 '../official_results/predictions/adapter_16_trained_on_bbn_tested_on_bbn_test.txt': {'precision': {'mean': 0.45,
   'std': 0.31},
  'recall': {'mean': 0.52, 'std': 0.33},
  'f1': {'mean': 0.48}},
 '../official_results/predictions/bert_ft_2_trained_on_bbn_tested_on_bbn_test.txt': {'precision': {'mean': 0.45,
   'std': 0.33},
  'recall': {'mean': 0.53, 'std': 0.34},
  'f1': {'mean': 0.49}},
 '../official_results/predictions/bert_ft_0_trained_on_bbn_tested_on_bbn_test.txt': {'precision': {'mean': 0.3,
   'std': 0.31},
  'recall': {'mean': 0.11, 'std': 0.16},
  'f1': {'mean': 0.16}}}

In [64]:
suffix = 'BBN_preds.txt'
compute_average_f1_for_types(fathers, suffix, onoe = True)

../official_results/predictions/onoe_BBN_preds.txt


{'../official_results/predictions/onoe_BBN_preds.txt': {'precision': {'mean': 0.46,
   'std': 0.33},
  'recall': {'mean': 0.63, 'std': 0.34},
  'f1': {'mean': 0.53}}}

In [65]:
suffix = 'BBN_preds.txt'
compute_average_f1_for_types(sons, suffix, onoe = True)

../official_results/predictions/onoe_BBN_preds.txt


{'../official_results/predictions/onoe_BBN_preds.txt': {'precision': {'mean': 0.5,
   'std': 0.37},
  'recall': {'mean': 0.5, 'std': 0.35},
  'f1': {'mean': 0.5}}}

# Onto

In [66]:
types_path = '/datahdd/vmanuel/entity_typing_all_datasets/data/ontology/onto_ontology.txt'

In [79]:
with open(types_path, 'r') as inp:
    types = [l.replace('\n', '') for l in inp.readlines()]
types

fathers = []
sons = []
nephews = []

for t in types:
    for t2 in types:
        if t != t2 and t in t2:
            if t not in fathers:
                fathers.append(t)
            if t2 not in sons:
                sons.append(t2)

for t in sons:
    for t2 in sons:
        if t != t2 and t in t2:
            nephews.append(t2)

for t in sons:
    if t in fathers:
        del fathers[fathers.index(t)]   

for t in nephews:
    if t in sons:
        del sons[sons.index(t)]

In [80]:
fathers

['/location', '/organization', '/other', '/person']

In [81]:
sons

['/location/celestial',
 '/location/city',
 '/location/country',
 '/location/geography',
 '/location/geograpy',
 '/location/park',
 '/location/structure',
 '/location/transit',
 '/organization/company',
 '/organization/education',
 '/organization/government',
 '/organization/military',
 '/organization/music',
 '/organization/political_party',
 '/organization/sports_league',
 '/organization/sports_team',
 '/organization/stock_exchange',
 '/organization/transit',
 '/other/art',
 '/other/award',
 '/other/body_part',
 '/other/currency',
 '/other/event',
 '/other/food',
 '/other/health',
 '/other/heritage',
 '/other/internet',
 '/other/language',
 '/other/legal',
 '/other/living_thing',
 '/other/product',
 '/other/religion',
 '/other/scientific',
 '/other/sports_and_leisure',
 '/other/supernatural',
 '/person/artist',
 '/person/athlete',
 '/person/coach',
 '/person/doctor',
 '/person/legal',
 '/person/military',
 '/person/political_figure',
 '/person/religious_leader',
 '/person/title']

In [82]:
nephews

['/location/geography/body_of_water',
 '/location/geography/island',
 '/location/geography/mountain',
 '/location/geograpy/island',
 '/location/structure/airport',
 '/location/structure/government',
 '/location/structure/hospital',
 '/location/structure/hotel',
 '/location/structure/restaurant',
 '/location/structure/sports_facility',
 '/location/structure/theater',
 '/location/transit/bridge',
 '/location/transit/railway',
 '/location/transit/road',
 '/organization/company/broadcast',
 '/organization/company/news',
 '/other/art/broadcast',
 '/other/art/film',
 '/other/art/music',
 '/other/art/stage',
 '/other/art/writing',
 '/other/event/accident',
 '/other/event/election',
 '/other/event/holiday',
 '/other/event/natural_disaster',
 '/other/event/protest',
 '/other/event/sports_event',
 '/other/event/violent_conflict',
 '/other/health/malady',
 '/other/health/treatment',
 '/other/language/programming_language',
 '/other/living_thing/animal',
 '/other/product/car',
 '/other/product/com

In [111]:
suffix = 'trained_on_onto_tested_on_onto_test.txt'

compute_average_f1_for_types(fathers, suffix)

../official_results/predictions/adapter_2_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/bert_ft_0_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/adapter_16_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/bert_ft_2_trained_on_onto_tested_on_onto_test.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
          adapter_2_trained_on_onto_tested_on_onto | 0.71 | 0.12 | 0.72 | 0.08 | 0.71 
          bert_ft_0_trained_on_onto_tested_on_onto | 0.51 | 0.18 | 0.53 | 0.17 | 0.52 
         adapter_16_trained_on_onto_tested_on_onto | 0.75 | 0.13 | 0.76 | 0.08 | 0.75 
          bert_ft_2_trained_on_onto_tested_on_onto | 0.63 | 0.

In [112]:
suffix = 'trained_on_onto_tested_on_onto_test.txt'

compute_average_f1_for_types(sons, suffix)

../official_results/predictions/adapter_2_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/bert_ft_0_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/adapter_16_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/bert_ft_2_trained_on_onto_tested_on_onto_test.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
          adapter_2_trained_on_onto_tested_on_onto | 0.08 | 0.19 | 0.07 | 0.19 | 0.07 
          bert_ft_0_trained_on_onto_tested_on_onto | 0.09 | 0.17 | 0.05 | 0.1  | 0.06 
         adapter_16_trained_on_onto_tested_on_onto | 0.1  | 0.21 | 0.06 | 0.17 | 0.08 
          bert_ft_2_trained_on_onto_tested_on_onto | 0.1  | 0.

In [113]:
suffix = 'trained_on_onto_tested_on_onto_test.txt'

compute_average_f1_for_types(nephews, suffix)

../official_results/predictions/adapter_2_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/bert_ft_0_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/adapter_16_trained_on_onto_tested_on_onto_test.txt
../official_results/predictions/bert_ft_2_trained_on_onto_tested_on_onto_test.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
          adapter_2_trained_on_onto_tested_on_onto | 0.0  | 0.01 | 0.01 | 0.04 | 0.0  
          bert_ft_0_trained_on_onto_tested_on_onto | 0.03 | 0.1  | 0.01 | 0.04 | 0.01 
         adapter_16_trained_on_onto_tested_on_onto | 0.01 | 0.03 | 0.02 | 0.07 | 0.01 
          bert_ft_2_trained_on_onto_tested_on_onto | 0.02 | 0.

In [114]:
suffix = 'onto_preds.txt'
compute_average_f1_for_types(fathers, suffix, onoe = True)

../official_results/predictions/onoe_onto_preds.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
                               onoe_onto_preds.txt | 0.64 | 0.16 | 0.7  | 0.07 | 0.67 


In [115]:
suffix = 'onto_preds.txt'
compute_average_f1_for_types(sons, suffix, onoe = True)

../official_results/predictions/onoe_onto_preds.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
                               onoe_onto_preds.txt | 0.29 | 0.32 | 0.26 | 0.3  | 0.27 


In [116]:
suffix = 'onto_preds.txt'
compute_average_f1_for_types(nephews, suffix, onoe = True)

../official_results/predictions/onoe_onto_preds.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
                               onoe_onto_preds.txt | 0.18 | 0.32 | 0.18 | 0.3  | 0.18 


# Choi

In [137]:
COARSE = {'person', 'group', 'organization', 'location', 'entity', 'time', 'object', 'event', 'place'}
FINE = {'accident', 'actor', 'agency', 'airline', 'airplane', 'airport', 'animal', 'architect', 'army', 'art',
        'artist', 'athlete', 'attack', 'author', 'award', 'biology', 'body_part', 'bridge', 'broadcast',
        'broadcast_station', 'building', 'car', 'cemetery', 'chemistry', 'city', 'coach', 'company', 'computer',
        'conflict', 'country', 'county', 'currency', 'degree', 'department', 'director', 'disease', 'doctor', 'drug',
        'education', 'election', 'engineer', 'ethnic_group', 'facility', 'film', 'finance', 'food', 'game', 'geography',
        'god', 'government', 'health', 'heritage', 'holiday', 'hospital', 'hotel', 'institution', 'instrument',
        'internet', 'island', 'language', 'law', 'lawyer', 'league', 'leisure', 'library', 'living_thing',
        'mass_transit', 'medicine', 'military', 'mobile_phone', 'monarch', 'mountain', 'music', 'musician',
        'music_school', 'natural_disaster', 'news', 'news_agency', 'park', 'planet', 'play', 'political_party',
        'politician', 'product', 'programming_language', 'protest', 'province', 'rail', 'railway', 'religion',
        'religious_leader', 'restaurant', 'road', 'scientific_method', 'ship', 'sign', 'society', 'software', 'soldier',
        'spacecraft', 'sport', 'stage', 'stock_exchange', 'structure', 'subway', 'team', 'television_channel',
        'television_network', 'television_program', 'theater', 'title', 'train', 'transit', 'transportation',
        'treatment', 'water', 'weapon', 'website', 'writing'}

In [140]:
types_path = '/datahdd/vmanuel/entity_typing_all_datasets/data/ontology/types.txt'

In [143]:
with open(types_path, 'r') as inp:
    types = [l.replace('\n', '') for l in inp.readlines()]

ULTRAFINE = []

for t in types:
    if t not in COARSE and t not in FINE:
        ULTRAFINE.append(t)

In [144]:
len(COARSE)

9

In [145]:
len(FINE)

119

In [146]:
len(ULTRAFINE)

10203

In [147]:
suffix = 'trained_on_choi_tested_on_choi_test.txt'

compute_average_f1_for_types(COARSE, suffix)

../official_results/predictions/adapter_2_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/adapter_16_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/bert_ft_2_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/bert_ft_0_trained_on_choi_tested_on_choi_test.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
          adapter_2_trained_on_choi_tested_on_choi | 0.63 | 0.13 | 0.27 | 0.2  | 0.38 
         adapter_16_trained_on_choi_tested_on_choi | 0.64 | 0.13 | 0.24 | 0.2  | 0.35 
          bert_ft_2_trained_on_choi_tested_on_choi | 0.62 | 0.13 | 0.27 | 0.21 | 0.38 
          bert_ft_0_trained_on_choi_tested_on_choi | 0.35 | 0.

In [148]:
suffix = 'trained_on_choi_tested_on_choi_test.txt'

compute_average_f1_for_types(FINE, suffix)

../official_results/predictions/adapter_2_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/adapter_16_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/bert_ft_2_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/bert_ft_0_trained_on_choi_tested_on_choi_test.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
          adapter_2_trained_on_choi_tested_on_choi | 0.33 | 0.35 | 0.21 | 0.27 | 0.26 
         adapter_16_trained_on_choi_tested_on_choi | 0.36 | 0.36 | 0.23 | 0.28 | 0.28 
          bert_ft_2_trained_on_choi_tested_on_choi | 0.32 | 0.34 | 0.21 | 0.27 | 0.25 
          bert_ft_0_trained_on_choi_tested_on_choi | 0.24 | 0.

In [149]:
suffix = 'trained_on_choi_tested_on_choi_test.txt'

compute_average_f1_for_types(ULTRAFINE, suffix)

../official_results/predictions/adapter_2_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/adapter_16_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/bert_ft_2_trained_on_choi_tested_on_choi_test.txt
../official_results/predictions/bert_ft_0_trained_on_choi_tested_on_choi_test.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
          adapter_2_trained_on_choi_tested_on_choi | 0.16 | 0.33 | 0.09 | 0.24 | 0.12 
         adapter_16_trained_on_choi_tested_on_choi | 0.17 | 0.34 | 0.1  | 0.25 | 0.13 
          bert_ft_2_trained_on_choi_tested_on_choi | 0.16 | 0.32 | 0.1  | 0.24 | 0.12 
          bert_ft_0_trained_on_choi_tested_on_choi | 0.05 | 0.

In [150]:
suffix = 'choi_preds.txt'

compute_average_f1_for_types(COARSE, suffix, onoe=True)

../official_results/predictions/onoe_choi_preds.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
                               onoe_choi_preds.txt | 0.57 | 0.36 | 0.03 | 0.05 | 0.06 


In [151]:
suffix = 'choi_preds.txt'

compute_average_f1_for_types(FINE, suffix, onoe=True)

../official_results/predictions/onoe_choi_preds.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
                               onoe_choi_preds.txt | 0.32 | 0.42 | 0.1  | 0.18 | 0.15 


In [152]:
suffix = 'choi_preds.txt'

compute_average_f1_for_types(ULTRAFINE, suffix, onoe=True)

../official_results/predictions/onoe_choi_preds.txt


                                             model |  precision  |   recall    |  f1  
                                            -----------------------------------------
                                                   | mean | std  | mean | std  | mean 
          ---------------------------------------------------------------------------
                               onoe_choi_preds.txt | 0.16 | 0.35 | 0.11 | 0.27 | 0.13 
