In [1]:
from MTNCI import ShimaokaMTNCI
import torch
import sys 
sys.path.append('../figet-hyperbolic-space')
import figet


In [2]:
class argClass():
    
    def __init__(self, args):
        self.emb_size = 300 
        self.char_emb_size = 50 
        self.positional_emb_size = 25 
        self.context_rnn_size = 200
        self.attn_size = 100
        self.mention_dropout = 0.5
        self.context_dropout = 0.5
        
if torch.cuda.is_available():
        device = torch.device("cuda")
        torch.set_default_tensor_type(torch.DoubleTensor)
        
lopez_data = torch.load('../figet-hyperbolic-space/data/prep/MTNCI-fair/data.pt')        

args = {'emb_size': 300, 'char_emb_size': 50, 'positional_emb_size': 25, 'context_rnn_size':200,
        'attn_size': 100, 'mention_dropout' : 0.5, 'context_dropout': 0.5}
args = argClass(args)
vocabs = lopez_data['vocabs']
SHIMAOKA_OUT = args.context_rnn_size * 2 + args.emb_size + args.char_emb_size
out_spec = [{'manifold':'euclid', 'dim':[64, 10]},
            {'manifold':'poincare', 'dim':[128, 128, 10]}]

model = ShimaokaMTNCI(args, vocabs, device, 
                    input_d=SHIMAOKA_OUT,
                    out_spec = out_spec,
                    dims = [512, 512])

In [3]:
tensorboard_run_ID = 'shimaoka_MTNCI_fair_sampled'

model.set_checkpoint_path(checkpoint_path = '../source_files/checkpoints/{}'.format(tensorboard_run_ID))

checkpoint = torch.load(model.checkpoint_path)

model.load_state_dict(checkpoint['model_state_dict'])

model.eval()


ShimaokaMTNCI(
  (common_network): CommonLayer(
    (fully): ModuleList(
      (0): Linear(in_features=750, out_features=512, bias=True)
      (1): Linear(in_features=512, out_features=512, bias=True)
    )
    (bns): ModuleList(
      (0): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (dropout): Dropout(p=0.2, inplace=False)
    (leaky_relu): LeakyReLU(negative_slope=0.1)
  )
  (out_layers): ModuleList(
    (0): RegressionOutput(
      (out): ModuleList(
        (0): Linear(in_features=512, out_features=64, bias=True)
        (1): Linear(in_features=64, out_features=10, bias=True)
      )
      (dropout): Dropout(p=0.2, inplace=False)
      (leaky_relu): ReLU()
      (bns): ModuleList(
        (0): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (1): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_r

In [4]:
def get_dataset(data, batch_size, key):
    dataset = data[key]
    dataset.set_batch_size(batch_size)
    return dataset


test = get_dataset(lopez_data, 1024, "test")

test_labels = [lopez_data['vocabs']['type'].idx2label[label.item()] for entry in test for labels in entry[5] for label in labels]

test_entities = []
for entry in test: 
    for entities in entry[3]:
        entity_label = ''
        for entity in entities:
            if entity.item() != 0:
                entity_label += lopez_data['vocabs']['token'].idx2label[entity.item()] + ' '
        test_entities.append(entity_label)

test_data = {'data': test, 'labels': test_labels}


In [5]:
from DatasetManager import ShimaokaMTNCIDatasetManager as DatasetManager

nickel = True

FILE_ID = '16_3'

SOURCE_FILES_PATH = '/datahdd/vmanuel/MTNCI_datasets/source_files/'

EMBEDDING_PATH = SOURCE_FILES_PATH + 'embeddings/'

PATH_TO_HYPERBOLIC_EMBEDDING = EMBEDDING_PATH + FILE_ID + '16_3_nickel.pth'


PATH_TO_DISTRIBUTIONAL_EMBEDDING = EMBEDDING_PATH + FILE_ID + 'final_tree_type2vec_MTNCI'

CONCEPT_EMBEDDING_PATHS = [PATH_TO_DISTRIBUTIONAL_EMBEDDING, 
                           PATH_TO_HYPERBOLIC_EMBEDDING]


datasetManager = DatasetManager(FILE_ID)
datasetManager.set_device(device)
datasetManager.load_concept_embeddings(CONCEPT_EMBEDDING_PATHS = CONCEPT_EMBEDDING_PATHS, nickel = nickel)

-----------------------------
... loading concept embeddings ...
concept embeddings loaded in 0.05 seconds


In [6]:
from tqdm.notebook import tqdm

NAME = 'shimaoka_MTNCI_fair_sampled'

results_path = 'results/excel_results/' + NAME + '.txt'
TSV_path = 'results/excel_results/export_' + NAME + '.txt'

model.set_results_paths(results_path = results_path, TSV_path = TSV_path)

model.set_dataset_manager(datasetManager)

topn = [1]

model.type_prediction_on_test(topn, test_data, test_entities, test_labels)

 ...evaluating test predictions in distributional space... 
occurrence distributional


getting results for prediction 102760: 100%|██████████| 102760/102760 [16:49<00:00, 101.79it/s]
Writing results for Occurrence Level in distributional space, top 1: 100%|██████████| 122/122 [00:00<00:00, 55565.76it/s]


entity distributional


getting results for prediction 1162: 100%|██████████| 1162/1162 [00:11<00:00, 102.76it/s]
Writing results for Entity Level in distributional space, top 1: 100%|██████████| 118/118 [00:00<00:00, 50751.42it/s]


concept1 distributional


getting results for prediction 122: 100%|██████████| 122/122 [00:01<00:00, 100.99it/s]
Writing results for Concept Level (induce from occurrencies) in distributional space, top 1: 100%|██████████| 122/122 [00:00<00:00, 61157.53it/s]


concept2 distributional


getting results for prediction 118: 100%|██████████| 118/118 [00:01<00:00, 99.83it/s]
Writing results for Concept Level (induce from entities) in distributional space, top 1: 100%|██████████| 118/118 [00:00<00:00, 49334.92it/s]


 ...evaluating test predictions in hyperbolic space... 
occurrence hyperbolic


getting results for prediction 102760: 100%|██████████| 102760/102760 [46:32<00:00, 36.80it/s]
Writing results for Occurrence Level in hyperbolic space, top 1: 100%|██████████| 122/122 [00:00<00:00, 74300.14it/s]


entity hyperbolic


getting results for prediction 1162: 100%|██████████| 1162/1162 [00:31<00:00, 36.79it/s]
Writing results for Entity Level in hyperbolic space, top 1: 100%|██████████| 118/118 [00:00<00:00, 76840.22it/s]


concept1 hyperbolic


getting results for prediction 122: 100%|██████████| 122/122 [00:03<00:00, 36.87it/s]
Writing results for Concept Level (induce from occurrencies) in hyperbolic space, top 1: 100%|██████████| 122/122 [00:00<00:00, 76948.13it/s]


concept2 hyperbolic


getting results for prediction 118: 100%|██████████| 118/118 [00:03<00:00, 36.90it/s]
Writing results for Concept Level (induce from entities) in hyperbolic space, top 1: 100%|██████████| 118/118 [00:00<00:00, 56829.47it/s]


In [8]:
from collections import defaultdict
C_E = defaultdict(set)

for entity, label in zip(test_entities, test_labels):
    C_E[entity].add(label)
    
for k, v in C_E.items():
    if len(v) > 1:
        print(k)
        print(v)
print('done')

unk 
{'PublicTransitSystem', 'FictionalCharacter', 'Organisation', 'MilitaryUnit', 'River', 'MythologicalFigure', 'Arachnid', 'Fungus', 'Convention', 'Amphibian', 'Newspaper', 'Venue', 'VideoGame', 'ChemicalCompound', 'Species', 'City', 'Plant', 'Bird', 'EthnicGroup', 'Mollusca', 'Bridge', 'Drug', 'Crustacean', 'Surname', 'RadioStation', 'Animal', 'MilitaryStructure', 'Food', 'Fish', 'Holiday', 'Company', 'Village', 'Insect', 'Band', 'Settlement', 'Eukaryote', 'Mountain', 'Town', 'Film', 'Weapon', 'Island', 'Place', 'IceHockeyPlayer', 'Country', 'Mammal', 'Monarch', 'Website', 'Reptile'}
unk unk 
{'Species', 'City', 'Crustacean', 'Arachnid', 'Writer', 'Scientist', 'HistoricPlace', 'Food', 'Amphibian', 'Politician', 'OfficeHolder', 'Actor', 'Mollusca'}
unk springsit 
{'AdministrativeRegion', 'Place', 'Settlement'}
unk meuse 
{'Lake', 'BodyOfWater'}
done
