In [1]:
input_path='/kaggle/Input/'
output_path='/kaggle/working/'

In [2]:
import pandas as pd
import numpy as np
import re
import pickle
import os
import csv
import math

In [3]:
with open(input_path+"Data/100_random_ids.pkl", "rb") as file:
    sampled_ids=pickle.load(file)

## merged workflow

In [90]:
def calculate_f1(df_recall,df_precision):
    f1_dict={}
    for name in name_list:
        f1=0
        for id in sampled_ids:
            if df_recall[name][id] + df_precision[name][id] > 0:
                f1+= float(2 * (df_recall[name][id] * df_precision[name][id]) / (df_recall[name][id] + df_precision[name][id]))
            else:
                # Handle the case when the denominator is zero
                f1+=0.0
        f1_dict.update({name:round(f1/len(sampled_ids),3)})
    return f1_dict

In [233]:
MODELS={"name":["SciBERT","SciNCL","SPECTER","SciBERT-based","SciNCL-based","SPECTER-based"]}

for id in range(len(MODELS["name"])):
    print("--------------------"+str(MODELS["name"][id])+"--------------------")
    # load and process the sampled dataset
    with open(input_path+"Data/100_random_ids.pkl", "rb") as file:
        sampled_ids=pickle.load(file)
    with open(input_path+"Eval/keyphrases_"+MODELS["name"][id]+".pkl", "rb") as file:
        keyphrases=pickle.load(file)
    sampled_keyphrases = {key: value for key, value in keyphrases.items() if key in sampled_ids}
    df_sampled=pd.DataFrame()
    df_sampled = pd.DataFrame.from_dict(sampled_keyphrases, orient='index').rename(columns={"TSK": "Task", "MTD": "Method","DST":"Dataset","DOM":"Domain","LAN":"Language"})
    df_sampled.insert(0, "ID", sampled_keyphrases.keys())
    df_sampled=df_sampled.sort_values('ID').reset_index(drop=True)

    # load the labeled data results
    data_labeled=pd.read_csv(input_path+"Eval/keyphrases_100_labeled.csv")

    # evaluate the model
    name_list1=["Task","Method","Dataset"]
    name_list2=["Domain","Language"]
    name_list = name_list1 + name_list2
    df_recall=pd.DataFrame(np.zeros((len(df_sampled), len(name_list))), 
                          columns=[str(column_name) for column_name in name_list])
    for i in range(len(df_sampled)):
        for j in name_list1:
            score=0
            sum_list=[]
            list1=df_sampled[j][i]
            list2=data_labeled[j][i].split(', ')
            for item in list2:
                for char in item.split(' '):
                    sum_list.append(char)
            for item1 in list1:
                flag=0
                for item2 in list2:
                    if item1 == item2:
                        score+=1
                        flag=1
                    elif item1 in item2 or item2 in item1:
                        score+=min(len(item1),len(item2))/max(len(item1),len(item2))
                        flag=1
                if flag==0:
                    for char in item1.split(' '):
                        frequency = sum(1 for s in sum_list if s == char)
                        score += frequency/len(sum_list)
            df_recall[j][i]=score/len(list2)
        for j in name_list2:
            score=0
            set1 = set(list(df_sampled[j][i]))
            set2 = set(list(data_labeled[j][i].split(', ')))
            common_items = set1.intersection(set2)
            df_recall[j][i]=len(common_items)/len(set2)

    df_recall.index=df_sampled["ID"]

    # load the precisions data that is generated togehter with the post-processed data in the 06 code
    with open(input_path+"Eval/precisions_"+MODELS["name"][id]+".pkl", "rb") as file:
        micro_precisions=pickle.load(file)
    df_precision = pd.DataFrame.from_dict(micro_precisions, orient='index')
    df_precision = df_precision.rename(columns={old_name: new_name for old_name, new_name in zip(df_precision.columns, name_list)})
    df_precision = df_precision.loc[sampled_ids].sort_index()

    # calculate the F1 scores
    print("micro_F1:")
    f1=calculate_f1(df_recall,df_precision)
    print(f1)
    print("macro_F1:")
    print(round(sum(list(f1.values()))/len(f1.keys()),3))

--------------------SciBERT--------------------
micro_F1:
{'Task': 0.67, 'Method': 0.681, 'Dataset': 0.676, 'Domain': 0.67, 'Language': 0.55}
macro_F1:
0.649
--------------------SciNCL--------------------
micro_F1:
{'Task': 0.63, 'Method': 0.643, 'Dataset': 0.487, 'Domain': 0.663, 'Language': 0.588}
macro_F1:
0.602
--------------------SPECTER--------------------
micro_F1:
{'Task': 0.636, 'Method': 0.655, 'Dataset': 0.521, 'Domain': 0.667, 'Language': 0.59}
macro_F1:
0.614
--------------------SciBERT-based--------------------
micro_F1:
{'Task': 0.791, 'Method': 0.795, 'Dataset': 0.746, 'Domain': 0.758, 'Language': 0.652}
macro_F1:
0.748
--------------------SciNCL-based--------------------
micro_F1:
{'Task': 0.772, 'Method': 0.748, 'Dataset': 0.654, 'Domain': 0.77, 'Language': 0.645}
macro_F1:
0.718
--------------------SPECTER-based--------------------
micro_F1:
{'Task': 0.777, 'Method': 0.779, 'Dataset': 0.711, 'Domain': 0.754, 'Language': 0.646}
macro_F1:
0.733
