In [1]:
from scorer import GPTHypernymySuiteModel, HFLMScorer
import pandas as pd
import numpy as np
import os
from hypernymysuite.evaluation import all_evaluations
from tqdm import tqdm
import gensim
from leven import levenshtein
from nltk.corpus import wordnet as wn

In [2]:
def print_res_table(res, return_mean=False):
    metrics = []
    metrics.append(res['siege_bless']['other']['ap_test_inv'])
    metrics.append(res['siege_eval']['other']['ap_test_inv'])
    metrics.append(res['siege_leds']['other']['ap_test_inv'])
    metrics.append(res['siege_shwartz']['other']['ap_test_inv'])
    metrics.append(res['siege_weeds']['other']['ap_test_inv'])

    metrics.append(res['dir_dbless']['acc_test_inv'])
    metrics.append(res['dir_wbless']['acc_test_inv'])
    metrics.append(res['dir_bibless']['acc_test_inv'])

    metrics.append(res['cor_hyperlex']['rho_test_inv'])
    mean = np.mean(metrics)
    metrics.append(mean)
    metrics = [f'{val:.2f}'.replace('.', ',') for val in metrics]
    if return_mean:
        return ' '.join(metrics), mean
    return ' '.join(metrics)


In [3]:
model_name = 'gpt2-xl'
device = 'cuda'
scorer = HFLMScorer(model_name, device)

  "Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 "


In [4]:
eval_data_dir = 'data'

In [5]:
vocab = {}
vocab['<OOV>'] = 1
for file_name in os.listdir(eval_data_dir):
    file_path = os.path.join(eval_data_dir, file_name)
    df = pd.read_csv(file_path, sep='\t')
    for w in df['word1']:
        vocab[w] = 1
    for w in df['word2']:
        vocab[w] = 1

## HYPERNYMY PATTERNS

In [6]:
PROMPTS = {
    'gen': ["<hyper> is more general than <hypo>"],
    'spec': ["<hypo> is more specific than <hyper>"],
    'type': ["<hypo> is a type of <hyper>"],
    'hyper1': ["<hypo> which is a (example|class|kind|. . . ) of <hyper>"],
    'hyper2': ["<hypo> which is a example of <hyper>"],
    'hyper3': ["<hypo> which is a class of <hyper>"],
    'hyper4': ["<hypo> which is a kind of <hyper>"],
    'hyper5': ["<hypo> which is a type of <hyper>"],
    'hyper6': ["<hypo> (and|or) (any|some) other <hyper>"],
    'hyper7': ["<hypo> and any other <hyper>"],
    'hyper8': ["<hypo> and some other <hyper>"],
    'hyper9': ["<hypo> or any other <hyper>"],
    'hyper10': ["<hypo> or some other <hyper>"],
    'hyper11': ["<hypo> which is called <hyper>"],
    'hyper12': ["<hypo> a special case of <hyper>"],
    'hyper13': ["<hypo> is an <hyper> that"],
    'hyper14': ["(Unlike|like) (most|all|any|other) <hyper>, <hypo>"],
    'hyper15': ["unlike most <hyper>, <hypo>"],
    'hyper16': ["unlike all <hyper>, <hypo>"],
    'hyper17': ["unlike any <hyper>, <hypo>"],
    'hyper18': ["unlike other <hyper>, <hypo>"],
    'hyper19': ["like most <hyper>, <hypo>"],
    'hyper20': ["like all <hyper>, <hypo>"],
    'hyper21': ["like any <hyper>, <hypo>"],
    'hyper22': ["like other <hyper>, <hypo>"],
    'hyper23': ["<hyper> including <hypo>"],
    'hyper24': ["such <hyper> as <hypo>"],
}

In [7]:
patterns = PROMPTS['hyper10'] + PROMPTS['hyper24']
print(patterns)
hs_model = GPTHypernymySuiteModel(scorer, patterns, vocab)

['<hypo> or some other <hyper>', 'such <hyper> as <hypo>']


In [11]:
res = all_evaluations(hs_model)
print_res_table(res)

100%|██████████| 53/53 [00:06<00:00,  7.61it/s]
100%|██████████| 53/53 [00:05<00:00,  9.19it/s]
100%|██████████| 53/53 [00:06<00:00,  7.79it/s]
100%|██████████| 53/53 [00:05<00:00,  9.36it/s]
100%|██████████| 53/53 [00:06<00:00,  7.65it/s]
100%|██████████| 53/53 [00:05<00:00,  9.36it/s]
100%|██████████| 42/42 [00:05<00:00,  7.49it/s]
100%|██████████| 42/42 [00:04<00:00,  8.91it/s]
100%|██████████| 42/42 [00:05<00:00,  7.38it/s]
100%|██████████| 42/42 [00:04<00:00,  8.91it/s]
100%|██████████| 68/68 [00:08<00:00,  7.90it/s]
100%|██████████| 68/68 [00:07<00:00,  9.63it/s]
100%|██████████| 452/452 [01:04<00:00,  7.06it/s]
100%|██████████| 452/452 [00:54<00:00,  8.28it/s]
100%|██████████| 87/87 [00:11<00:00,  7.26it/s]
100%|██████████| 87/87 [00:09<00:00,  8.70it/s]
100%|██████████| 421/421 [00:47<00:00,  8.79it/s]
100%|██████████| 421/421 [00:40<00:00, 10.29it/s]
100%|██████████| 53/53 [00:07<00:00,  7.51it/s]
100%|██████████| 53/53 [00:05<00:00,  8.91it/s]
100%|██████████| 1644/1644 [04:4

'0,53 0,37 0,86 0,47 0,89 0,96 0,75 0,71 0,62 0,68'

## COMBINED PATTERNS

In [7]:
ft_model_name = 'cc.en.300.bin'
ft = gensim.models.FastText.load_fasttext_format(ft_model_name)

  


In [8]:
w2cands = {}
for w in tqdm(vocab):
    w2cands[w] = ft.wv.most_similar(w, topn=100)

  4%|▍         | 1921/46973 [03:31<1:26:02,  8.73it/s]

### preprocessing candidates 1

In [None]:
w2cands_f = {}
for w in tqdm(w2cands):
    w2cands_f[w] = [d for d in w2cands[w] if (d[1] not in w) and (w not in d[1]) and (levenshtein(w, d[1]) > (len(w) / 2))][:100]

### preprocessing candidates 2

In [None]:
for w in tqdm(w2cands_f):
    w2cands_f[w] = [d for d in w2cands_f[w] if '.' not in d[1] and d[1][0] != '-' and d[1][-1] != '-'][:100]

### preprocessing candidates 3

In [None]:
for w in tqdm(w2cands_f):
    w2cands_f[w] = [d for d in w2cands_f[w] if len(wn.synsets(d[1]) > 0)][:100]

### candidates cohypo ranking

In [None]:
patterns = ['<hypo> or <hyper>']
cohypo_model = GPTHypernymySuiteModel(scorer, patterns, vocab)

In [None]:
w2cands_f = {}
for w in w2cands_f:
    l = len(w2cands_f[w])
    scores = cohypo_model.predict_many([w for i in range(l)], [d[1] for d in w2cands_f[w]])
    w2cands_f[w] = sorted([[scores[i], w2cands_f[w][i][1]] for i in range(l)], key=lambda x: -x[0])

In [None]:
topk = 3
word2cohypos = {w: [d[1] for d in w2cands_f[w][:topk]] for w in w2cands_f}
word2cohypos