In [None]:
import pandas as pd
from tqdm.std import tqdm
import json
import os

## load data

In [None]:
file_path = 'covid-19/emb_rank/'

In [None]:
filename_list = os.listdir(file_path)

In [None]:
filename_list

In [None]:
df_list = []
for filename in filename_list:
    df = pd.read_csv(os.path.join(file_path,filename))
    df_list.append(df)

covid_df = pd.concat(df_list).reset_index(drop=True)

In [None]:
covid_df

In [None]:
## extract valid colums
df = covid_df[["Query","GoldName","GoldID","standardConcept","conceptId"]]

In [None]:
df.fillna("null",inplace=True)

In [None]:
df

In [None]:
df = df[df.GoldID != "Mapping_Not_Found"]

In [None]:
df

In [None]:
df_group = df.groupby("GoldID")

In [None]:
m_vs_one_index = []
one_vs_m_index = []
one_vs_one_index = []
for key, value in df_group.groups.items():
    if len(value) > 1:
        m_vs_one_index.extend(value) ## add the index of multiple to one instance to list
    elif "||" in key:
        one_vs_m_index.extend(value)
    else:
        one_vs_one_index.extend(value)

## M vs One¶

In [None]:
m_vs_one_df = df[df.index.isin(m_vs_one_index)]

In [None]:
m_vs_one_df

## One vs M

In [None]:
one_vs_m_df = df[df.index.isin(one_vs_m_index)]

In [None]:
one_vs_m_df

## one vs one

In [None]:
one_vs_one_df = df[df.index.isin(one_vs_one_index)]

In [None]:
one_vs_one_df

In [None]:
## extracting goldId and predId
def extract_GoldIdAndPredId(df):
    goldIds = []
    predIds = []
    for row in df.itertuples():
        goldId = row.GoldID.split("||")
        goldIds.append(goldId)
        
        predId = row.conceptId
        predIds.append([predId])
    return goldIds, predIds

In [None]:
def calculate_acc_at_k(goldId, predId, k):
    correct = 0
    for gold, pred in zip(goldId, predId):
        if any(p in gold for p in pred[:k]):
            correct += 1
    return correct / len(goldId)


def calculate_precision_recall_f1(goldId, predId):
    true_positive = 0
    total_predicted = 0
    total_gold = 0
    
    for gold, pred in zip(goldId, predId):
        true_positive += len(set(gold) & set(pred))
        total_predicted += len(pred)
        total_gold += len(gold)
    
    precision = true_positive / total_predicted if total_predicted > 0 else 0
    recall = true_positive / total_gold if total_gold > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return precision, recall, f1


def calculate_precision_recall_at_k(goldId, predId, k):
    true_positive = 0
    total_predicted = 0
    total_gold = 0
    
    for gold, pred in zip(goldId, predId):
        true_positive += len(set(gold) & set(pred[:k]))
        total_predicted += min(len(pred[:k]), k)
        total_gold += len(gold)
    
    precision = true_positive / total_predicted if total_predicted > 0 else 0
    recall = true_positive / total_gold if total_gold > 0 else 0
    
    return precision, recall

## results of One vs One

In [None]:
goldIds, predIds = extract_GoldIdAndPredId(one_vs_one_df)

In [None]:
total_acc = {}

p, r, f1 = calculate_precision_recall_f1(goldIds,predIds)
total_acc['precision'] = p
total_acc['recall'] = r
total_acc['f1'] = f1

total_acc

## result M vs one

In [None]:
goldIds, predIds = extract_GoldIdAndPredId(m_vs_one_df)

In [None]:
total_acc = {}

p, r, f1 = calculate_precision_recall_f1(goldIds,predIds)
total_acc['precision'] = p
total_acc['recall'] = r
total_acc['f1'] = f1

total_acc

## result one vs M

In [None]:
goldIds, predIds = extract_GoldIdAndPredId(one_vs_m_df)

In [None]:
total_acc = {}

p, r, f1 = calculate_precision_recall_f1(goldIds,predIds)
total_acc['precision'] = p
total_acc['recall'] = r
total_acc['f1'] = f1

total_acc