In [1]:
import jsonlines
import json
import os

from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = json.load(open('../../../data/ConceptNet/test.json', 'r'))

In [3]:
uid_example_map = {}
for example in tqdm(data):
    uid_example_map[example['uid']] = example

  0%|          | 0/161169 [00:00<?, ?it/s]

100%|██████████| 161169/161169 [00:00<00:00, 1835602.38it/s]


In [4]:
with open('madeof_ids.json', 'r') as fin:
    valid_ids = json.load(fin)

In [11]:
model_names = [
    'bert-base-uncased', 'bert-large-uncased',
    'roberta-base', 'roberta-large',
    'albert-base-v1', 'albert-large-v1', 'albert-xlarge-v1',
    'albert-base-v2', 'albert-large-v2', 'albert-xlarge-v2',
    'gpt-neo-125m', 'gpt-neo-1.3B', 'gpt-neo-2.7B', 'gpt-j-6b',
]

In [12]:
for model_name in model_names:
    model_path = os.path.join('../../../results/ConceptNet', model_name+'_ConceptNet_zeroshot')
    pred_path = os.path.join(model_path, 'pred_ConceptNet_test.jsonl')
    
    with jsonlines.open(pred_path) as fin:
        results = []
        
        for line in fin.iter():
            uid = line['uid']

            if uid not in valid_ids:
                continue

            subj = uid_example_map[uid]['subj']
            rel_id = uid_example_map[uid]['rel_id']
            label_text = line['label_text']
            top_100_text = line['top_100_text_remove_stopwords']
            mrr = line['mrr_remove_stopwords']
            hits_1 = line['hits@1_remove_stopwords']
            hits_10 = line['hits@10_remove_stopwords']
            hits_100 = line['hits@100_remove_stopwords']
            result = {
                'uid': uid,
                'subj': subj,
                'rel_id': rel_id,
                'label_text': label_text,
                'top_100_text': top_100_text,
                'mrr': mrr,
                'hits@1': hits_1,
                'hits@10': hits_10,
                'hits@100': hits_100,
            }
            results.append(result)

    out_path = os.path.join('results', model_name+'_madeof_predictions.jsonl')
    with open(out_path, 'w') as fout:
        print(model_name, len(results))
        for result in results:
            json.dump(result, fout)
            fout.write('\n')

bert-base-uncased 98
bert-large-uncased 98
roberta-base 98
roberta-large 98
albert-base-v1 98
albert-large-v1 98
albert-xlarge-v1 98
albert-base-v2 98
albert-large-v2 98
albert-xlarge-v2 98
gpt-neo-125m 98
gpt-neo-1.3B 98
gpt-neo-2.7B 98
gpt-j-6b 98


In [5]:
model_names = [
    'gpt-3.5-turbo-0125', 'gpt-4-0125-preview'
]

In [6]:
for model_name in model_names:
    model_path = os.path.join('../../../results/ConceptNet', model_name+'_ConceptNet_zeroshot')
    pred_path = os.path.join(model_path, 'pred_ConceptNet_test.jsonl')
    
    with jsonlines.open(pred_path) as fin:
        results = []
        
        for line in fin.iter():
            uid = line['uid']

            if uid not in valid_ids:
                continue

            subj = uid_example_map[uid]['subj']
            rel_id = uid_example_map[uid]['rel_id']
            label_text = line['label_text']
            top_5_text = line['top_5_text_remove_stopwords']
            hits_1 = line['hits@1_remove_stopwords']
            result = {
                'uid': uid,
                'subj': subj,
                'rel_id': rel_id,
                'label_text': label_text,
                'top_5_text': top_5_text,
                'hits@1': hits_1,
            }
            results.append(result)

    out_path = os.path.join('results', model_name+'_madeof_predictions.jsonl')
    with open(out_path, 'w') as fout:
        print(model_name, len(results))
        for result in results:
            json.dump(result, fout)
            fout.write('\n')

gpt-3.5-turbo-0125 52
gpt-4-0125-preview 52


In [15]:
model_names = [
    'bert-base-uncased', 'bert-large-uncased',
    'gpt-neo-125m','gpt-j-6b',
]

In [16]:
for model_name in model_names:
    model_path = os.path.join('../../../results/ConceptNet', model_name+'_ConceptNet_prompt_tuning')
    pred_path = os.path.join(model_path, 'pred_ConceptNet_test.jsonl')
    
    with jsonlines.open(pred_path) as fin:
        results = []
        
        for line in fin.iter():
            uid = line['uid']

            if uid not in valid_ids:
                continue

            subj = uid_example_map[uid]['subj']
            rel_id = uid_example_map[uid]['rel_id']
            label_text = line['label_text']
            top_100_text = line['top_100_text_remove_stopwords']
            mrr = line['mrr_remove_stopwords']
            hits_1 = line['hits@1_remove_stopwords']
            hits_10 = line['hits@10_remove_stopwords']
            hits_100 = line['hits@100_remove_stopwords']
            result = {
                'uid': uid,
                'subj': subj,
                'rel_id': rel_id,
                'label_text': label_text,
                'top_100_text': top_100_text,
                'mrr': mrr,
                'hits@1': hits_1,
                'hits@10': hits_10,
                'hits@100': hits_100,
            }
            results.append(result)

    out_path = os.path.join('results', model_name+'_prompt_tuning_madeof_predictions.jsonl')
    with open(out_path, 'w') as fout:
        print(model_name, len(results))
        for result in results:
            json.dump(result, fout)
            fout.write('\n')

bert-base-uncased 98
bert-large-uncased 98
gpt-neo-125m 98
gpt-j-6b 98
