# RaTE: a Reproducible automatic Taxonomy Evaluation by Filling the Gap

**Input:** a taxonomy in the form of concept pairs
**Output:** a score associated with the input generated by a large masked language model

In [1]:
# from transformers import BertTokenizer, BertForMaskedLM, DistilBertForMaskedLM
from transformers import BertTokenizer, BertForMaskedLM
from transformers import pipeline
import nltk
# nltk.download('wordnet') // required for lemmatizer
# nltk.download('omw-1.4') // required for lemmatizer
from nltk.stem import WordNetLemmatizer
import torch
import pandas as pd

# All the hyperparameters to control the flow of RaTE

In [2]:
# parameters
top_k = 10  # specifies the number of predictions generated by LLM per query
eval_taxo_path = "taxos/HiExpan1.txt"  # indicates the path to the candidate taxonomy for evaluation
use_queries = "custom_queries/default_queries.txt"

In [3]:
query_templates = []
column_names = []

with open(use_queries, "r") as fin:
    for line in fin.readlines():
        if not line.startswith("#"):
            query, name = line.split(",")
            query_templates.append(query.strip())
            column_names.append(name.strip())

print(list(zip(query_templates, column_names)))

[('{token} [MASK]', '{token} [MASK]'), ('[MASK] {token}', '[MASK] {token}'), ('{token} is a [MASK]', 'is a'), ('{token} is an [MASK]', 'is an'), ('{token} is a kind of [MASK]', 'kind of'), ('{token} is a type of [MASK]', 'type of'), ('{token} is an example of [MASK]', 'example of'), ('[MASK] such as {token}', 'such as'), ('A [MASK] such as {token}', 'a such as'), ('An [MASK] such as {token}', 'an such as'), ('My favorite [MASK] is {token}', 'favorite')]


In [4]:
eval_parents = []
eval_children = []

with open(eval_taxo_path , "r") as fin:
    for concept_pair in fin.readlines():
        parent, child = concept_pair.split(',')
        eval_parents.append(parent.strip())
        eval_children.append(child.strip())

In [6]:
alt_accepted_answers = {}

with open("results/acceptable_alternative_answers.txt", "r") as fin:
    for line in fin.readlines():
        taxo_ent, alts = line.split(":")
        taxo_ent = taxo_ent.strip()
        alts = [x.strip() for x in alts.split(",")]
        alt_accepted_answers[taxo_ent] = alts
alt_accepted_answers

{'veggies': ['veggie', 'vegetable', 'vegetables'], 'dessert': ['desert']}

In [7]:
# instantiate a standard bert-base tokenizer
bert_base_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# instantiate another tokenizer for adding customized tokens
bert_base_extended = BertTokenizer.from_pretrained('bert-base-uncased')

not_in_bert_parent_lemmas = []

with open("model_configuration/add_tokenizer_vocabulary.txt", "r") as fin:
    for word in fin.readlines():
        not_in_bert_parent_lemmas.append(word.strip())

bert_base_extended.add_tokens(not_in_bert_parent_lemmas)

31

In [8]:
# can also use mps on mac
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

# load models
# model1a = BertForMaskedLM.from_pretrained('/content/drive/MyDrive/mlm_final_exps/entity_masking_train_test/checkpoint-117350')
# model1a.to(device)
# model1a.eval()
#
# model1b = BertForMaskedLM.from_pretrained('/content/drive/MyDrive/mlm_final_exps/entity_masking_train_test_only_one/final/checkpoint-58675')
# model1b.to(device)
# model1b.eval()
#
# model2a = BertForMaskedLM.from_pretrained('/content/drive/MyDrive/mlm_final_exps/random_masking_train_test/final/checkpoint-58675')
# model2a.to(device)
# model2a.eval()
#
# model2b = BertForMaskedLM.from_pretrained('/content/drive/MyDrive/mlm_final_exps/random_masking_train_only/checkpoint-82146')
# model2b.to(device)
# model2b.eval()
#
# model0a = BertForMaskedLM.from_pretrained('/content/drive/MyDrive/exp/yelp/out_random_masking-bert/checkpoint-80000')
# model0a.to(device)
# model0a.eval()
#
# model0b = DistilBertForMaskedLM.from_pretrained('/content/drive/MyDrive/exp/yelp/out-entity-masking/checkpoint-73675')
# model0b.to(device)
# model0b.eval()

In [9]:
# or, just use pipeline
# !pip install transformers
# load a single model or multiple

unmasker1a = pipeline('fill-mask', tokenizer=bert_base_extended, model="RaTE-Paper/yelp-m1a", device=device)
# unmasker1b = pipeline('fill-mask', tokenizer=bert_base_extended, model="RaTE-Paper/yelp-m1b", device=device)
# unmasker2a = pipeline('fill-mask', tokenizer=bert_base_extended, model="RaTE-Paper/yelp-m2a", device=device)
# unmasker2b = pipeline('fill-mask', tokenizer=bert_base_extended, model="RaTE-Paper/yelp-m2b", device=device)
#
# unmasker0a = pipeline('fill-mask', tokenizer='bert-base-uncased', model="RaTE-Paper/yelp-m0a", device=device)
# unmasker0b = pipeline('fill-mask', tokenizer='distilbert-base-uncased', model="RaTE-Paper/yelp-m0b", device=device)
#
# whole_word_unmasker = pipeline('fill-mask', model='bert-large-uncased-whole-word-masking', device=device)
# base_unmasker = pipeline('fill-mask', model='bert-base-uncased', device=device)

Downloading (…)lve/main/config.json:   0%|          | 0.00/728 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

In [10]:
lemmatizer = WordNetLemmatizer()

def lemmatize_a_word(word):
    split_word = word.split()
    # print(split_word)
    ngram=len(split_word)
    if ngram == 1:
        return lemmatizer.lemmatize(word, pos='n')
    else:
        last_word =split_word[-1]
        lem = lemmatizer.lemmatize(last_word, pos='n')
        new_word = ' '.join(split_word[:-1] + [lem])
        return new_word

# Customisable RaTE patterns
(add more descriptions)

In [11]:
# paper page 5
def generate_queries_paper(token):
    token = lemmatize_a_word(token) # lemmatize to remove inflection

    p1a = f" {token} [MASK] ."
    p1b = f" [MASK] {token} ."

    p2a = f" {token} is a [MASK] ."
    p2b = f" {token} is an [MASK] ."

    p3a = f" {token} is a kind of [MASK] ."
    p3b = f" {token} is a type of [MASK] ."
    p3c = f" {token} is an example of [MASK] ."

    p4a = f" [MASK] such as {token} ."
    p4b = f" A [MASK] such as {token} ."
    p4c = f" An [MASK] such as {token} ."

    p5a = f" My favorite [MASK] is {token} ."

    # read customized patterns from filestream?
    # ...

    return [p1a, p1b,
            p2a, p2b,
            p3a, p3b, p3c,
            p4a, p4b, p4c,
            p5a]

In [12]:
def generate_queries_from_template(token):
    test_queries = []

    for template in query_templates:
        test_queries.append(template.replace("{token}", token) + " .")

    return test_queries

In [13]:
from tqdm import tqdm

use_eval_models = [unmasker1a]


for predictor in use_eval_models:
    predictions = {}

    # get 1e results
    for child in tqdm(eval_children):
        queries = generate_queries_from_template(child)
        # queries = generate_queries_paper(child)
        queries_unmasked = predictor(queries, top_k=top_k)
        for i, column in enumerate(column_names):
            if child not in predictions:
                predictions[child] = {}
            predictions[child][column] = queries_unmasked[i]

100%|██████████| 524/524 [01:45<00:00,  4.95it/s]


In [20]:
def generate_result_frame(accept_noisy_answers=False):
    df_result = pd.DataFrame()
    df_result["hypernym"] = eval_parents
    df_result["hyponym"] = eval_children

    for column_name in tqdm(column_names):
        query_predictions = []

        for parent, child in zip(eval_parents, eval_children):
            preds = predictions[child][column_name]

            parent_in_preds = False

            for pred_list in preds:
                if parent == pred_list['token_str']:
                    parent_in_preds = True
                    break
                if accept_noisy_answers:
                    if parent in alt_accepted_answers:
                        if pred_list['token_str'] in alt_accepted_answers[parent]:
                            parent_in_preds = True
                            break
            query_predictions.append(parent_in_preds * 1)

        df_result[column_name] = query_predictions

    df_result['sum'] = df_result[column_names].sum(axis=1)
    return df_result

df_result = generate_result_frame(accept_noisy_answers=True)
df_result

100%|██████████| 11/11 [00:00<00:00, 821.87it/s]


Unnamed: 0,hypernym,hyponym,{token} [MASK],[MASK] {token},is a,is an,kind of,type of,example of,such as,a such as,an such as,favorite,sum
0,seafood,mussel,0,1,0,0,1,1,1,1,1,0,1,7
1,seafood,clam,0,0,0,0,0,0,0,1,0,0,0,1
2,seafood,lobster,0,0,0,0,1,1,1,1,1,1,1,7
3,seafood,oyster,0,0,0,0,1,1,1,1,1,1,1,7
4,seafood,shrimp,0,0,0,0,1,1,1,1,1,1,1,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
519,music,jazz music,1,0,0,0,1,1,1,1,1,0,1,7
520,music,jazz,1,0,0,0,1,1,1,1,1,1,1,8
521,music,upbeat music,1,0,0,0,1,1,1,1,1,0,1,7
522,music,front desk,0,0,0,0,0,0,0,0,0,0,0,0


# Generate and display RaTE score

In [23]:
print("RaTE score:", len(df_result.loc[df_result["sum"] > 0]) / len(df_result))

RaTE score: 0.8454198473282443


In [None]:
# additional code for predicting a masked token in the absence of unmaskers
# ref: https://gist.github.com/yuchenlin/a2f42d3c4378ed7b83de65c7a2222eb2

# def predict_word(text, model, tokenizer, topn=10):
def predict_word(text, model, tokenizer, distil=False):
    # Prepare tex
    text = '[CLS] '+ text.lstrip('[CLS] ').rstrip(' [SEP]')+' [SEP]'
    # Tokenize input
    tokenized_text = tokenizer.tokenize(text)

    # Mask a token that we will try to predict back with `BertForMaskedLM`
    masked_index = -1
    for i, token in enumerate(tokenized_text):
        if token=='[MASK]':
            masked_index = i
            break
    assert i>=0

    # Convert token to vocabulary indices
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
    # Define sentence A and B indices associated to 1st and 2nd sentences (see paper)
    segments_ids = [0]*len(tokenized_text)

    # Convert inputs to PyTorch tensors
    tokens_tensor = torch.tensor([indexed_tokens])
    segments_tensors = torch.tensor([segments_ids])

    # If you have a GPU, put everything on cuda
    tokens_tensor = tokens_tensor.to(device)
    if not distil:
        segments_tensors = segments_tensors.to(device)

    # Predict all tokens
    with torch.no_grad():
        if distil:
            outputs = model(tokens_tensor)
        else:
            outputs = model(tokens_tensor, token_type_ids=segments_tensors)
        predictions = outputs[0]

    # print("Predictions shape: " + str(predictions[0].shape))
    predicted_inds = torch.argsort(-predictions[0, masked_index])
    # print(predicted_inds[:topn])
    predicted_probs = [round(p.item(),4) for p in torch.softmax(predictions[0, masked_index], 0)[predicted_inds]]
    predicted_tokens = tokenizer.convert_ids_to_tokens([ind.item() for ind in predicted_inds])

    # return list(zip(predicted_tokens, predicted_probs))[:topn]
    return list(zip(predicted_tokens, predicted_probs)), dict(list(zip(predicted_tokens, predicted_probs)))