In [None]:
!pip install transformers

from google.colab import drive
import sys

drive.flush_and_unmount()
drive.mount('/content/drive')
sys.path.append('/content/drive/MyDrive/dataset2022-main')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.29.2-py3-none-any.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m42.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.15.1 tokenizers-0.13.3 transformers-4.29.2
Drive not mou

In [None]:
import argparse
import string
from typing import List, Dict, Union
import pandas as pd
import numpy as np
from pathlib import Path


def read_lm_kbc_jsonl(file_path: Union[str, Path]) -> List[Dict]:
    """
    Reads a LM-KBC jsonl file and returns a list of dictionaries.

    Args:
        file_path: path to the jsonl file

    Returns:
        list of dictionaries, each possibly has the following keys:
        - "SubjectEntity": str
        - "Relation": str
        - "ObjectEntities":
            None or List[List[str]] (can be omitted for the test input)
    """
    rows = []
    with open(file_path, "r") as f:
        for line in f:
            row = json.loads(line)
            rows.append(row)
    return rows


def read_lm_kbc_jsonl_to_df(file_path: Union[str, Path]) -> pd.DataFrame:
    """
    Reads a LM-KBC jsonl file and returns a dataframe.
    """
    rows = read_lm_kbc_jsonl(file_path)
    df = pd.DataFrame(rows)
    return df


def write_lm_kbc_jsonl(data: List[Dict], file_path: str):
    """
    Writes a list of dictionaries to a LM-KBC jsonl file.

    Args:
        data: list of dictionaries, each possibly has the following keys:
            - "SubjectEntity": str
            - "Relation": str
            - "ObjectEntities":
                None or List[List[str]] (can be omitted for the test input)
        file_path: path to the jsonl file
    """
    with open(file_path, 'w') as f:
        for entry in data:
            f.write(json.dumps(entry) + '\n')

# def create_prompt(subject_entity: str, relation: str, mask_token: str) -> str:
#     prompt = f"{subject_entity}, {relation}, {mask_token}."
#     return prompt


def create_prompt(subject_entity: str, relation: str, mask_token: str) -> str:
    """
    Depending on the relation, we fix the prompt
    """

    prompt = mask_token

    if relation == "CountryBordersWithCountry":
        prompt = f"{subject_entity} shares border with {mask_token}."
    elif relation == "CountryOfficialLanguage":
        prompt = f"The official language of {subject_entity} is {mask_token}."
    elif relation == "StateSharesBorderState":
        prompt = f"{subject_entity} shares border with {mask_token} state."
    elif relation == "RiverBasinsCountry":
        prompt = f"{subject_entity} river basins in {mask_token}."
    elif relation == "ChemicalCompoundElement":
        prompt = f"{subject_entity} consists of {mask_token}, " \
                 f"which is an element."
    elif relation == "PersonLanguage":
        prompt = f"{subject_entity} speaks in {mask_token}."
    elif relation == "PersonProfession":
        prompt = f"{subject_entity} is a {mask_token} by profession."
    elif relation == "PersonInstrument":
        prompt = f"{subject_entity} plays {mask_token}, which is an instrument."
    elif relation == "PersonEmployer":
        prompt = f"{subject_entity} is an employer at {mask_token}, " \
                 f"which is a company."
    elif relation == "PersonPlaceOfDeath":
        prompt = f"{subject_entity} died at {mask_token}."
    elif relation == "PersonCauseOfDeath":
        prompt = f"{subject_entity} died due to {mask_token}."
    elif relation == "CompanyParentOrganization":
        prompt = f"The parent organization of {subject_entity} is {mask_token}."

    return prompt

def clean_object(obj: str) -> Union[str, None]:
    """
    Cleans the object by removing punctuation and lower-casing.
    """

    if not obj:
        return None

    for punctuation in string.punctuation:
        obj = obj.replace(punctuation, "")

    return obj.lower().strip()


def is_none_gts(gts: List[List[str]]) -> bool:
    """
    Checks if the ground truth object is none.
    """
    return not gts


def is_none_preds(preds: List[str]) -> bool:
    """
    Checks if the prediction object is none (with relaxing rules).
    """
    return preds is None or len(preds) == 0 or (
            len(preds) == 1 and
            (
                    list(preds)[0] is None or
                    list(preds)[0] is np.nan or
                    list(preds)[0].lower() in {"", "none", "null"}
            )
    )


def true_positives(preds: List[str], gts: List[List[str]]) -> int:
    """
    Calculates the number of true positives
    for a given pair of subject and relation.
    Method:
        Iterate over the ground truth objects, each is a list of possible
        aliases. For each ground truth object, check if the prediction
        contains any of its aliases. If so, increment the true positives by 1.

    Args:
        preds: list of normalized predictions
        gts: list of ground truth objects (lists of normalized aliases)

    Returns:
        true_positives: int
    """

    tp = 0
    for gt in gts:
        gt_set = set(gt)
        if any(pred in gt_set for pred in preds):
            tp += 1

    return tp


def precision(preds: List[str], gts: List[List[str]]) -> float:
    """
    Calculates the precision of the predictions
    for a given pair of subject and relation.

    Args:
        preds: list of predictions
        gts: list of ground truth objects

    Returns:
        precision: float
    """

    # when nothing is predicted, precision 1 irrespective of the ground truth value
    if is_none_preds(preds):
        return 1

    # When the ground truth object is none
    if is_none_gts(gts):
        return 1.0 if is_none_preds(preds) else 0.0

    # When the ground truth object is not none
    try:
        return min(true_positives(preds, gts) / len(preds), 1.0)
    except TypeError:
        return 0.0


def recall(preds: List[str], gts: List[List[str]]) -> float:
    """
    Calculates the recall of the predictions
    for a given pair of subject and relation.

    Args:
        preds: list of predictions
        gts: list of ground truth objects

    Returns:
        recall: float
    """

    # When the ground truth object is none return 1 even if there are predictions (edge case)
    if is_none_gts(gts):
        return 1.0

    # When the ground truth object is not none
    try:
        return true_positives(preds, gts) / len(gts)
    except TypeError:
        return 0.0


def f1_score(p: float, r: float) -> float:
    """
    Calculates the F1-score of the predictions
    for a given pair of subject and relation.

    Args:
        p: precision
        r: recall

    Returns:
        f1_score: float
    """

    try:
        return (2 * p * r) / (p + r)
    except ZeroDivisionError:
        return 0.0


def rows_to_dict(rows: List[Dict]) -> Dict:
    """
    Index the ground truth/prediction rows by subject entity and relation.
    """

    return {(r["SubjectEntity"], r["Relation"]): r["ObjectEntities"] for r in
            rows}


def evaluate_per_sr_pair(predictions_fp, ground_truth_fp) \
        -> List[Dict[str, float]]:
    pred_rows = read_lm_kbc_jsonl(predictions_fp)
    gt_rows = read_lm_kbc_jsonl(ground_truth_fp)

    pred_dict = rows_to_dict(pred_rows)
    gt_dict = rows_to_dict(gt_rows)

    results = []

    for subj, rel in gt_dict:
        # get and normalize the ground truth objects
        gts = []
        for gt in gt_dict[(subj, rel)]:
            gts.append([clean_object(obj) for obj in gt])

        # get and normalize the predictions
        preds = list(set(
            clean_object(obj) for obj in pred_dict.get((subj, rel), [])))

        # calculate the scores
        p = precision(preds, gts)
        r = recall(preds, gts)
        f1 = f1_score(p, r)

        results.append({
            "SubjectEntity": subj,
            "Relation": rel,
            "p": p,
            "r": r,
            "f1": f1
        })

        # if p > 1.0 or r > 1.0:
        #     print(f"{subj} {rel} {p} {r} {f1} {gts} {preds}")

    return sorted(results, key=lambda x: (x["Relation"], x["SubjectEntity"]))


def combine_scores_per_relation(scores_per_sr: List[Dict[str, float]]) -> dict:
    scores = {}
    for r in scores_per_sr:
        if r["Relation"] not in scores:
            scores[r["Relation"]] = []
        scores[r["Relation"]].append({
            "p": r["p"],
            "r": r["r"],
            "f1": r["f1"],
        })

    for rel in scores:
        scores[rel] = {
            "p": sum([x["p"] for x in scores[rel]]) / len(scores[rel]),
            "r": sum([x["r"] for x in scores[rel]]) / len(scores[rel]),
            "f1": sum([x["f1"] for x in scores[rel]]) / len(scores[rel]),
        }

    return scores

In [None]:
import json
from transformers import pipeline, AutoTokenizer, logging
from tqdm import tqdm
from collections import defaultdict

logging.set_verbosity_error()  # avoid irritating transformers warnings

class Args:
    dev_data = '/content/drive/MyDrive/dataset2022-main/data/dev.jsonl'
    dev_pred = '/content/drive/MyDrive/dataset2022-main/data/dev_pred.jsonl'
    model_names = [
        'mnaylor/mega-base-wikitext',
        'SRDdev/HingMaskedLM',
        'icelab/spacebert',
        'Hiecheol/bert-base-cased-wikitext2',
        'Lianglab/PharmBERT-cased',
        'soBeauty/bert-base-multilingual-cased-Confusion-mlm-20230603',
        'jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-18',
        'KooAI/KooBERT',
        'xyma/PROP-wiki',
        'globuslabs/ScholarBERT'
        'yorko/scibert_scivocab_uncased_long_4096',
        'adsabs/astroBERT',
        'kornosk/bert-political-election2020-twitter-mlm',
        'DunnBC22/bert-base-uncased-Masked_Language_Model-US_Economic_News_Articles',
        'albert-xxlarge-v2',
        'lexlms/legal-longformer-large',
        'law-ai/InLegalBERT',
        'climatebert/distilroberta-base-climate-f',
        'ahmedrachid/FinancialBERT'
        'bert-large-cased-whole-word-masking',
        'microsoft/BiomedVLP-CXR-BERT-general',
        'emilyalsentzer/Bio_ClinicalBERT',
        'bert-base-multilingual-cased',
        'bert-large-cased',
        'recobo/chemical-bert-uncased',
        'cardiffnlp/twitter-roberta-base-sep2022'
        
    ]
    task = 'fill-mask'

args = Args()

# For each model
best_model_per_relation = defaultdict(lambda: {'model': None, 'f1': 0})

for model_name in args.model_names:
    print(f"Processing with model: {model_name}")
    # Load tokenizer and model
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name)
    except OSError:
        print(f"Tokenizer for model {model_name} not found. Using 'bert-large-cased' tokenizer instead.")
    tokenizer = AutoTokenizer.from_pretrained('bert-large-cased')
    mask_token = tokenizer.mask_token

    # Initialize pipeline
    pipe = pipeline(args.task, model=model_name, tokenizer=tokenizer)

    # Load development data
    dev_data = read_lm_kbc_jsonl(args.dev_data)

    # Create prompts for development data
    dev_prompts = [create_prompt(item["SubjectEntity"], item["Relation"], mask_token) for item in dev_data]

    # Probe the model with the development prompts
    dev_outputs = []
    for out in tqdm(pipe(dev_prompts, batch_size=16), total=len(dev_prompts)):
        dev_outputs.append(out)

    # Save predictions
    dev_predictions = []
    for row, prompt, output in zip(dev_data, dev_prompts, dev_outputs):
        dev_predictions.append({
            "SubjectEntity": row["SubjectEntity"],
            "Relation": row["Relation"],
            "Prompt": prompt,
            "ObjectEntities": [seq["token_str"] for seq in output if seq["score"] > 0.5],  # replace 0.5 with your threshold
        })
    
    write_lm_kbc_jsonl(dev_predictions, args.dev_pred)
    
    # Evaluate the predictions
    scores_per_sr_pair = evaluate_per_sr_pair(args.dev_pred, args.dev_data)
    scores_per_relation = combine_scores_per_relation(scores_per_sr_pair)


    print('Model: ', model_name)
    print(scores_per_relation)

    # Update best model for each relation
    for relation, scores in scores_per_relation.items():
        if scores['f1'] > best_model_per_relation[relation]['f1']:
            best_model_per_relation[relation] = {'model': model_name, 'f1': scores['f1']}

# Print the model with the highest F1 score for each relation
for relation, model_info in best_model_per_relation.items():
    print(f"For relation '{relation}', the model with the highest F1 score is {model_info['model']} with F1 score {model_info['f1']}")



average_f1_score = sum(model_f1['f1'] for model_f1 in best_model_per_relation.values()) / len(best_model_per_relation)
print(f"Average F1 score across all relations: {average_f1_score}")

Processing with model: mnaylor/mega-base-wikitext


Downloading (…)okenizer_config.json:   0%|          | 0.00/346 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/29.3M [00:00<?, ?B/s]

100%|██████████| 594/594 [00:00<00:00, 422130.90it/s]

Model:  mnaylor/mega-base-wikitext
{'ChemicalCompoundElement': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'CompanyParentOrganization': {'p': 0.18, 'r': 0.68, 'f1': 0.12}, 'CountryBordersWithCountry': {'p': 0.3829787234042553, 'r': 0.02127659574468085, 'f1': 0.0}, 'CountryOfficialLanguage': {'p': 0.10638297872340426, 'r': 0.0, 'f1': 0.0}, 'PersonCauseOfDeath': {'p': 0.0, 'r': 0.5, 'f1': 0.0}, 'PersonEmployer': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'PersonInstrument': {'p': 0.0, 'r': 0.3, 'f1': 0.0}, 'PersonLanguage': {'p': 0.02, 'r': 0.0, 'f1': 0.0}, 'PersonPlaceOfDeath': {'p': 0.84, 'r': 0.5, 'f1': 0.4}, 'PersonProfession': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'RiverBasinsCountry': {'p': 0.38, 'r': 0.0, 'f1': 0.0}, 'StateSharesBorderState': {'p': 0.72, 'r': 0.0, 'f1': 0.0}}
Processing with model: SRDdev/HingMaskedLM





Downloading (…)okenizer_config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/89.2k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/49.6k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/251k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/957 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

100%|██████████| 594/594 [00:00<00:00, 123754.05it/s]

Model:  SRDdev/HingMaskedLM
{'ChemicalCompoundElement': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'CompanyParentOrganization': {'p': 1.0, 'r': 0.68, 'f1': 0.68}, 'CountryBordersWithCountry': {'p': 1.0, 'r': 0.02127659574468085, 'f1': 0.02127659574468085}, 'CountryOfficialLanguage': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonCauseOfDeath': {'p': 1.0, 'r': 0.5, 'f1': 0.5}, 'PersonEmployer': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonInstrument': {'p': 1.0, 'r': 0.3, 'f1': 0.3}, 'PersonLanguage': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonPlaceOfDeath': {'p': 1.0, 'r': 0.5, 'f1': 0.5}, 'PersonProfession': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'RiverBasinsCountry': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'StateSharesBorderState': {'p': 1.0, 'r': 0.0, 'f1': 0.0}}
Processing with model: icelab/spacebert





Downloading (…)okenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/549 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

100%|██████████| 594/594 [00:00<00:00, 129457.86it/s]


Model:  icelab/spacebert
{'ChemicalCompoundElement': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'CompanyParentOrganization': {'p': 1.0, 'r': 0.68, 'f1': 0.68}, 'CountryBordersWithCountry': {'p': 1.0, 'r': 0.02127659574468085, 'f1': 0.02127659574468085}, 'CountryOfficialLanguage': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonCauseOfDeath': {'p': 1.0, 'r': 0.5, 'f1': 0.5}, 'PersonEmployer': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonInstrument': {'p': 1.0, 'r': 0.3, 'f1': 0.3}, 'PersonLanguage': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonPlaceOfDeath': {'p': 1.0, 'r': 0.5, 'f1': 0.5}, 'PersonProfession': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'RiverBasinsCountry': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'StateSharesBorderState': {'p': 0.96, 'r': 0.0, 'f1': 0.0}}
Processing with model: Hiecheol/bert-base-cased-wikitext2
Tokenizer for model Hiecheol/bert-base-cased-wikitext2 not found. Using 'bert-large-cased' tokenizer instead.


100%|██████████| 594/594 [00:00<00:00, 965665.34it/s]


Model:  Hiecheol/bert-base-cased-wikitext2
{'ChemicalCompoundElement': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'CompanyParentOrganization': {'p': 1.0, 'r': 0.68, 'f1': 0.68}, 'CountryBordersWithCountry': {'p': 1.0, 'r': 0.02127659574468085, 'f1': 0.02127659574468085}, 'CountryOfficialLanguage': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonCauseOfDeath': {'p': 1.0, 'r': 0.5, 'f1': 0.5}, 'PersonEmployer': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonInstrument': {'p': 1.0, 'r': 0.3, 'f1': 0.3}, 'PersonLanguage': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonPlaceOfDeath': {'p': 1.0, 'r': 0.5, 'f1': 0.5}, 'PersonProfession': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'RiverBasinsCountry': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'StateSharesBorderState': {'p': 1.0, 'r': 0.0, 'f1': 0.0}}
Processing with model: Lianglab/PharmBERT-cased


100%|██████████| 594/594 [00:00<00:00, 1025696.41it/s]


Model:  Lianglab/PharmBERT-cased
{'ChemicalCompoundElement': {'p': 1.0, 'r': 0.01, 'f1': 0.013333333333333332}, 'CompanyParentOrganization': {'p': 0.52, 'r': 0.68, 'f1': 0.38}, 'CountryBordersWithCountry': {'p': 1.0, 'r': 0.05927051671732522, 'f1': 0.0702127659574468}, 'CountryOfficialLanguage': {'p': 0.8297872340425532, 'r': 0.21453900709219856, 'f1': 0.23191489361702128}, 'PersonCauseOfDeath': {'p': 0.86, 'r': 0.5, 'f1': 0.4}, 'PersonEmployer': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonInstrument': {'p': 1.0, 'r': 0.31085714285714283, 'f1': 0.31642857142857145}, 'PersonLanguage': {'p': 0.9, 'r': 0.0, 'f1': 0.0}, 'PersonPlaceOfDeath': {'p': 0.44, 'r': 0.5, 'f1': 0.2}, 'PersonProfession': {'p': 0.7, 'r': 0.0, 'f1': 0.0}, 'RiverBasinsCountry': {'p': 0.56, 'r': 0.04, 'f1': 0.04}, 'StateSharesBorderState': {'p': 0.98, 'r': 0.0, 'f1': 0.0}}
Processing with model: soBeauty/bert-base-multilingual-cased-Confusion-mlm-20230603
Tokenizer for model soBeauty/bert-base-multilingual-cased-Confusion-m

Downloading pytorch_model.bin:   0%|          | 0.00/712M [00:00<?, ?B/s]

100%|██████████| 594/594 [00:00<00:00, 1096091.76it/s]


Model:  soBeauty/bert-base-multilingual-cased-Confusion-mlm-20230603
{'ChemicalCompoundElement': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'CompanyParentOrganization': {'p': 1.0, 'r': 0.68, 'f1': 0.68}, 'CountryBordersWithCountry': {'p': 1.0, 'r': 0.02127659574468085, 'f1': 0.02127659574468085}, 'CountryOfficialLanguage': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonCauseOfDeath': {'p': 1.0, 'r': 0.5, 'f1': 0.5}, 'PersonEmployer': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonInstrument': {'p': 1.0, 'r': 0.3, 'f1': 0.3}, 'PersonLanguage': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonPlaceOfDeath': {'p': 1.0, 'r': 0.5, 'f1': 0.5}, 'PersonProfession': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'RiverBasinsCountry': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'StateSharesBorderState': {'p': 1.0, 'r': 0.0, 'f1': 0.0}}
Processing with model: jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-18


100%|██████████| 594/594 [00:00<00:00, 226471.83it/s]


Model:  jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-18
{'ChemicalCompoundElement': {'p': 0.6, 'r': 0.034999999999999996, 'f1': 0.05133333333333333}, 'CompanyParentOrganization': {'p': 0.98, 'r': 0.68, 'f1': 0.66}, 'CountryBordersWithCountry': {'p': 0.9361702127659575, 'r': 0.11464876730834175, 'f1': 0.16898160940714135}, 'CountryOfficialLanguage': {'p': 0.8297872340425532, 'r': 0.5421985815602837, 'f1': 0.5785460992907802}, 'PersonCauseOfDeath': {'p': 0.6, 'r': 0.55, 'f1': 0.3133333333333333}, 'PersonEmployer': {'p': 0.86, 'r': 0.0, 'f1': 0.0}, 'PersonInstrument': {'p': 1.0, 'r': 0.3308571428571429, 'f1': 0.3364285714285714}, 'PersonLanguage': {'p': 0.96, 'r': 0.11166666666666666, 'f1': 0.12466666666666666}, 'PersonPlaceOfDeath': {'p': 1.0, 'r': 0.5, 'f1': 0.5}, 'PersonProfession': {'p': 0.96, 'r': 0.052140756302521005, 'f1': 0.07028944911297852}, 'RiverBasinsCountry': {'p': 0.94, 'r': 0.12433333333333335, 'f1': 0.14496969696969697}, 'StateSharesBorde

100%|██████████| 594/594 [00:00<00:00, 522309.55it/s]


Model:  KooAI/KooBERT
{'ChemicalCompoundElement': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'CompanyParentOrganization': {'p': 0.8, 'r': 0.68, 'f1': 0.62}, 'CountryBordersWithCountry': {'p': 0.1276595744680851, 'r': 0.02127659574468085, 'f1': 0.0}, 'CountryOfficialLanguage': {'p': 0.9574468085106383, 'r': 0.0, 'f1': 0.0}, 'PersonCauseOfDeath': {'p': 0.9, 'r': 0.5, 'f1': 0.44}, 'PersonEmployer': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonInstrument': {'p': 0.98, 'r': 0.3, 'f1': 0.3}, 'PersonLanguage': {'p': 0.68, 'r': 0.0, 'f1': 0.0}, 'PersonPlaceOfDeath': {'p': 0.98, 'r': 0.5, 'f1': 0.48}, 'PersonProfession': {'p': 0.8, 'r': 0.0, 'f1': 0.0}, 'RiverBasinsCountry': {'p': 0.92, 'r': 0.0, 'f1': 0.0}, 'StateSharesBorderState': {'p': 0.06, 'r': 0.0, 'f1': 0.0}}
Processing with model: xyma/PROP-wiki


100%|██████████| 594/594 [00:00<00:00, 198677.56it/s]


Model:  xyma/PROP-wiki
{'ChemicalCompoundElement': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'CompanyParentOrganization': {'p': 1.0, 'r': 0.68, 'f1': 0.68}, 'CountryBordersWithCountry': {'p': 1.0, 'r': 0.02127659574468085, 'f1': 0.02127659574468085}, 'CountryOfficialLanguage': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonCauseOfDeath': {'p': 1.0, 'r': 0.5, 'f1': 0.5}, 'PersonEmployer': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonInstrument': {'p': 1.0, 'r': 0.3, 'f1': 0.3}, 'PersonLanguage': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'PersonPlaceOfDeath': {'p': 1.0, 'r': 0.5, 'f1': 0.5}, 'PersonProfession': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'RiverBasinsCountry': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'StateSharesBorderState': {'p': 1.0, 'r': 0.0, 'f1': 0.0}}
Processing with model: globuslabs/ScholarBERT


100%|██████████| 594/594 [00:00<00:00, 964917.34it/s]

Model:  globuslabs/ScholarBERT
{'ChemicalCompoundElement': {'p': 0.0, 'r': 0.0, 'f1': 0.0}, 'CompanyParentOrganization': {'p': 0.02, 'r': 0.68, 'f1': 0.0}, 'CountryBordersWithCountry': {'p': 0.0, 'r': 0.02127659574468085, 'f1': 0.0}, 'CountryOfficialLanguage': {'p': 0.02127659574468085, 'r': 0.0, 'f1': 0.0}, 'PersonCauseOfDeath': {'p': 0.34, 'r': 0.5, 'f1': 0.18}, 'PersonEmployer': {'p': 0.18, 'r': 0.0, 'f1': 0.0}, 'PersonInstrument': {'p': 0.06, 'r': 0.3, 'f1': 0.04}, 'PersonLanguage': {'p': 0.04, 'r': 0.0, 'f1': 0.0}, 'PersonPlaceOfDeath': {'p': 0.3, 'r': 0.5, 'f1': 0.14}, 'PersonProfession': {'p': 1.0, 'r': 0.0, 'f1': 0.0}, 'RiverBasinsCountry': {'p': 0.02, 'r': 0.0, 'f1': 0.0}, 'StateSharesBorderState': {'p': 0.94, 'r': 0.0, 'f1': 0.0}}
For relation 'ChemicalCompoundElement', the model with the highest F1 score is jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-18 with F1 score 0.05133333333333333
For relation 'CompanyParentOrganization', the model wi




In [None]:
# For relation 'CompanyParentOrganization', the model with the highest F1 score is bert-large-cased-whole-word-masking with F1 score 0.68
# For relation 'CountryBordersWithCountry', the model with the highest F1 score is bert-large-cased-whole-word-masking with F1 score 0.1954407294832827
# For relation 'CountryOfficialLanguage', the model with the highest F1 score is bert-large-cased-whole-word-masking with F1 score 0.7767730496453902
# For relation 'PersonCauseOfDeath', the model with the highest F1 score is emilyalsentzer/Bio_ClinicalBERT with F1 score 0.5
# For relation 'PersonEmployer', the model with the highest F1 score is bert-large-cased-whole-word-masking with F1 score 0.013333333333333332
# For relation 'PersonInstrument', the model with the highest F1 score is bert-large-cased with F1 score 0.34
# For relation 'PersonLanguage', the model with the highest F1 score is emilyalsentzer/Bio_ClinicalBERT with F1 score 0.5579999999999999
# For relation 'PersonPlaceOfDeath', the model with the highest F1 score is bert-large-cased-whole-word-masking with F1 score 0.5
# For relation 'PersonProfession', the model with the highest F1 score is None with F1 score 0
# For relation 'RiverBasinsCountry', the model with the highest F1 score is bert-large-cased with F1 score 0.3813333333333333
# For relation 'StateSharesBorderState', the model with the highest F1 score is None with F1 score 0
# Average F1 score across all relations: 0.3436844815940