# NEL

---

In [None]:
nlp_task = 'nel'

## Load processed gold standard data

In [None]:
!ls ../tool_results/spacy_entity_linker/spacy_entitylinker_sm.csv


../tool_results/spacy_entity_linker/spacy_entitylinker_sm.csv


In [None]:
import pandas as pd
import requests

# path to the gold standard file
task_gold_standard_path = f"../gold_standard/processed/{nlp_task}.csv"

# load processed NER Gold Standard Data
gs = pd.read_csv(task_gold_standard_path)

gs

Unnamed: 0,id,sample,entity,qid
0,19990213001379A,ACFT WAS TAXIING FOR TAKE OFF WHEN IT LOST CON...,"['ACFT', None, None]","['Q11436', None, None]"
1,19800217031649I,"AFTER TAKEOFF, ENGINE QUIT. WING FUEL TANK SUM...","['TAKEOFF', None, None]","['Q854248', None, None]"
2,19800217031649I,"AFTER TAKEOFF, ENGINE QUIT. WING FUEL TANK SUM...","['ENGINE', None, None]","['Q743004', None, None]"
3,19800217031649I,"AFTER TAKEOFF, ENGINE QUIT. WING FUEL TANK SUM...","['WING', None, None]","['Q161358', None, None]"
4,19800217031649I,"AFTER TAKEOFF, ENGINE QUIT. WING FUEL TANK SUM...","['FUEL TANK', 'TANK', None]","['Q1411232', 'Q1047832', None]"
...,...,...,...,...
505,20030620012809I,(-23) PILOT FAILED TO ASSURE THE OIL FILLER CA...,"['PILOT', None, None]","['Q2095549', None, None]"
506,20030620012809I,(-23) PILOT FAILED TO ASSURE THE OIL FILLER CA...,"['OIL FILLER CAP', 'CAP', None]","[None, 'Q2488579', None]"
507,20030620012809I,(-23) PILOT FAILED TO ASSURE THE OIL FILLER CA...,"['OIL', None, None]","['Q42962', None, None]"
508,20030620012809I,(-23) PILOT FAILED TO ASSURE THE OIL FILLER CA...,"['OIL', None, None]","['Q42962', None, None]"


## Load processed outputs tools to be evaluated

In [None]:
def load_df(nlp_task, tool):
    # load 
    tool_path = f"../tool_results/PROCESSED/{nlp_task}/"
    df = pd.read_csv(tool_path+tool+'.csv')
    # standardize
    #df = (df.reset_index(drop=True)).rename(columns={'c5_unique_id': 'id', 'c119_text': 'sample'}).drop(columns=['index'])
    return df

# load output from each tool to be evaluated
tools = ["spacy"] # Note that should exist files for each tool at ~/tool_results/nel//
df_tools = { tool: load_df(nlp_task, tool)   for tool in tools }

Visualize some samples for each tool

In [None]:
for tool in tools:
    print(tool)
    display(df_tools[tool].head(4))

spacy


Unnamed: 0,id,sample,qid,entity,description
0,19760606015529A,SUFFICIENT OPPORTUNITY EXISTED TO RELEASE WHEN...,193538,Opportunity,NASA Mars rover
1,19760606015529A,SUFFICIENT OPPORTUNITY EXISTED TO RELEASE WHEN...,3785514,High,song by Lighthouse Family
2,19780111000459A,ACFT DISPATCHER HARRASSMENT OF PILOT. PILOT FO...,67935434,United States Army Combat Fitness Test,Physical fitness test for the United States Army
3,19780111000459A,ACFT DISPATCHER HARRASSMENT OF PILOT. PILOT FO...,2044212,PILOT,historic programming language


## Evaluation

In [None]:
import os

def qid_semantic_similarity(q1, q2, similarity_type):
    # sim_api_call('Q1875633', 'Q42501', "class")
    api_url = f"https://kgtk.isi.edu/similarity_api?q1=Q{q1}&q2=Q{q2}&similarity_type={similarity_type}"
    #print(api_url)
    response = requests.get(api_url)
    if response.status_code == 200:
        response_json = response.json()
        # Check if "error" key is in the response
        if "error" in response_json:
            # Return this when the "error" key is present
            return {"similarity": -1, "q1": q1, "q2": q2}
        else:
            return response_json
    else:
        return {"similarity": -1, "q1": q1, "q2": q2}
    
def is_similar_entity(label1, label2):
    """
    Check if any word in label1 is in label2 and vice-versa.

    Parameters:
    - label1: The first entity label as a string.
    - label2: The second entity label as a string.

    Returns:
    - True if any word in label1 is in label2 and vice-versa, otherwise False.
    """
    # Tokenize labels into sets of words
    words_label1 = set(label1.lower().split())
    words_label2 = set(label2.lower().split())
    
    # Check for intersection between sets
    common_words = words_label1.intersection(words_label2)

    return bool(common_words)  # Returns True if there is any common word, False otherwise


def pairing_of_entities(gs, tools_df):
    results = []

    for _, gs_row in gs.iterrows():
        for _, tool_row in tools_df.iterrows():
            if gs_row['id'] == tool_row['id']:  # Match IDs before comparing
                
                # entity lexical similarity flag
                entity_sim = is_similar_entity(gs_row['entity'], tool_row['entity'])
                
                # qid semantic similarity scores
                qid_sim_jc_response = qid_semantic_similarity(gs_row['qid'], tool_row['qid'], "jc")
                qid_sim_class_response = qid_semantic_similarity(gs_row['qid'], tool_row['qid'], "class")
                
                results.append({
                    'id': gs_row['id'],
                    'entity_gs': gs_row['entity'],
                    'qid_gs': gs_row['qid'],
                    'entity_tool': tool_row['entity'],
                    'qid_tool': tool_row['qid'],
                    'entity_sim': entity_sim,
                    'qid_sim_jc': None, # qid_sim_jc_response['similarity'],
                    'qid_sim_class': None #qid_sim_class_response['similarity']
                })
                
    results_df = pd.DataFrame(results)
    return results_df


def evaluate_entity_similarity(gs, tools_df):
    print("Evaluating entity similarity...")
    
def pairing_of_entities_with_sim(gs, tools_df, output_file='results.csv'):
    results = []
    write_header = not os.path.exists(output_file)  # Check if the file exists to decide on writing headers
    
    for _, gs_row in gs.iterrows():
        for _, tool_row in tools_df.iterrows():
            if gs_row['id'] == tool_row['id']:  # Match IDs before comparing
                
                # entity lexical similarity flag
                entity_sim = is_similar_entity(gs_row['entity'], tool_row['entity'])
                
                # qid semantic similarity scores
                qid_sim_jc_response = qid_semantic_similarity(gs_row['qid'], tool_row['qid'], "jc")
                qid_sim_class_response = qid_semantic_similarity(gs_row['qid'], tool_row['qid'], "class")
                
                # Prepare the row to be written
                row_to_write = {
                    'id': gs_row['id'],
                    'entity_gs': gs_row['entity'],
                    'qid_gs': gs_row['qid'],
                    'entity_tool': tool_row['entity'],
                    'qid_tool': tool_row['qid'],
                    'entity_sim': entity_sim,
                    'qid_sim_jc': qid_sim_jc_response['similarity'],  # Assume these functions return similarity directly
                    'qid_sim_class': qid_sim_class_response['similarity']
                }
                
                # Append row to the results list for DataFrame conversion
                results.append(row_to_write)
                
                # Convert the row to a DataFrame to use to_csv for appending
                pd.DataFrame([row_to_write]).to_csv(output_file, mode='a', header=write_header, index=False)
                
                # Ensure the header is not written again
                write_header = False
                
    # Convert all accumulated results to a DataFrame
    results_df = pd.DataFrame(results)
    return results_df

import os, json

def call_semantic_similarity(input_file, url):
    file_name = os.path.basename(input_file)
    files = {
        'file': (file_name, open(input_file, mode='rb'), 'application/octet-stream')
    }
    
    print(files)
    resp = requests.post(url, files=files, params={'similarity_types': "[class, jc]"})
    s = json.loads(resp.json())
    return pd.DataFrame(s)


- Pairing of entity links between the gold standard and a tool

In [None]:
# output_file = "../evaluations/quantitative/nel/spacy_pairing.csv"
# pairing = pairing_of_entities_with_sim(gs, df_tools['spacy'], output_file)

- Check a pairing for a specific sample

In [None]:
# # pick a sample to display
# sample_id = '19800217031649I'
# sample_text = gs[gs.id == sample_id]['sample'].values[0]
# pairing_to_sample = pairing[pairing.id == sample_id]

# # print results for a sample
# print(f"Results for sample ID: {sample_id}")
# print(f"Text: {sample_text}")
# display(pairing_to_sample)

# # print only rows where they have same entity name 
# print(f"Show only results for same entity names for sample ID: {sample_id}")
# results_same_qid = pairing_to_sample[['id','entity_gs','entity_tool', 'qid_gs','qid_tool' ]][pairing_to_sample.entity_sim]
# display(results_same_qid)

In [None]:
#TAKEOFF	854248	
#PREFLIGHT	2108354	
# Q11436
# Q11436
# Q832489
print(qid_semantic_similarity('11436', '320599', "jc")['similarity'])
print(qid_semantic_similarity('11436', '832489', "jc")['similarity'])
print(qid_semantic_similarity('216197', '216197', "jc")['similarity'])

0.6996778267508654
0.7331227847702766
1.0


- Pairing with the same `Qid`

In [None]:
# same_ids = pairing[pairing.qid_tool == pairing.qid_gs]
# print(f"total: {same_ids.shape[0]}")
# display(same_ids)
# # save to file 
# pairing_to_sample.to_csv(f"../evaluations/quantitative/{nlp_task}/{tool}_same_qid.csv", index=False)

In [None]:
# SIM_API = 'https://kgtk.isi.edu/similarity_api'
# df = call_semantic_similarity('test_file.csv', SIM_API)
# df

In [None]:
# [Continue from here]

To calculate precision and recall where `gs` is considered the ground truth and `tool` is the answers provided by some tool or method, we first need to define these metrics in the context of your entity similarity task:

- **Precision**: Of all the entities identified by `tool`, how many were correctly identified as per `gs`? This is calculated as the number of true positives (TP) divided by the number of true positives and false positives (TP + FP).

- **Recall**: Of all the relevant entities present in `gs`, how many were identified by `tool`? This is calculated as the number of true positives (TP) divided by the number of true positives and false negatives (TP + FN).

Here's a step-by-step approach:

1. **True Positives (TP)**: Entities in `tool` that match entities in `gs` for the same `id` (check `is_similar_entity` function to understand the match criteria).
2. **False Positives (FP)**: Entities in `tool` that do not match any entity in `gs` for the same `id`.
3. **False Negatives (FN)**: Entities in `gs` that do not match any entity in `tool` for the same `id`.

In [None]:
def is_similar_entity(gs_entity, tool_entities):
    """
    Check if the gs_entity is similar to any of the entities in the tool_entities list.
    An entity is considered similar if it's a substring of any entity in the list, or vice versa.
    
    Parameters:
    - gs_entity: The entity from the gs DataFrame.
    - tool_entities: A list of entities from the df_tool DataFrame for a given id.
    
    Returns:
    - True if similar entity is found, False otherwise.
    """
    for tool_entity in tool_entities:
        if gs_entity in tool_entity or tool_entity in gs_entity:
            return True
    return False

def check_entity_similarity(gs, df_tool):
    """
    For each row in the gs DataFrame, check if the entity is similar to any entity in the df_tool,
    considering only rows with the same id.
    
    Parameters:
    - gs: The ground truth DataFrame with columns ['id', 'sample', 'entities'].
    - df_tool: The tool DataFrame with columns ['id', 'sample', 'entities', 'POS tags', 'labels'].
    
    Returns:
    - The gs DataFrame with an additional 'Similarity' column indicating if a similar entity was found in df_tool.
    """
    # Initialize an empty list to store similarity results
    similarities = []
    
    # Iterate through each row in gs
    for index, row in gs.iterrows():
        # Extract the id and entity for the current row
        gs_id, gs_entity = row['id'], row['entities']
        
        # Find entities in df_tool with the same id
        tool_entities = df_tool.loc[df_tool['id'] == gs_id, 'entities'].tolist()
        
        # Check for similarity and append the result
        similarities.append(is_similar_entity(gs_entity, tool_entities))
        
    # Add the similarity results to the gs DataFrame
    gs['Similarity'] = similarities
    
    return gs


def calculate_precision_recall_f1(gs, df_tool):
    """
    Calculate precision and recall based on entities comparison between gs (ground truth) and df_tool (answers).
    
    Parameters:
    - gs: DataFrame with columns ['id', 'sample', 'entities'] representing the ground truth.
    - df_tool: DataFrame with columns ['id', 'sample', 'entities', 'POS tags', 'labels'] representing the tool's answers.
    
    Returns:
    - A tuple containing precision and recall.
    """
    TP = 0  # True Positives
    FP = 0  # False Positives
    FN = 0  # False Negatives
    
    # Check for True Positives and False Negatives by iterating over gs
    for index, gs_row in gs.iterrows():
        gs_id, gs_entity = gs_row['id'], gs_row['entities']
        tool_entities = df_tool.loc[df_tool['id'] == gs_id, 'entities'].tolist()
        
        if any(gs_entity in tool_entity or tool_entity in gs_entity for tool_entity in tool_entities):
            TP += 1
        else:
            FN += 1
    
    # Check for False Positives by iterating over df_tool
    for index, tool_row in df_tool.iterrows():
        tool_id, tool_entity = tool_row['id'], tool_row['entities']
        gs_entities = gs.loc[gs['id'] == tool_id, 'entities'].tolist()
        
        if not any(tool_entity in gs_entity or gs_entity in tool_entity for gs_entity in gs_entities):
            FP += 1
    
    # Calculate precision and recall
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    
    # Calculating the F1 score
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return precision, recall, f1_score


def evaluate_nlr(gs, df_tools, tools):
    """
    Evaluate NLR tools by calculating precision, recall, and F1 score, and return a sorted and rounded DataFrame.
    
    Parameters:
    - gs: The ground truth DataFrame with columns ['id', 'sample', 'entities'].
    - df_tools: A dictionary with DataFrames for each tool, where each DataFrame contains ['id', 'sample', 'entities', ...].
    - tools: A list of tool names corresponding to keys in df_tools.
    
    Returns:
    - A DataFrame with each tool's precision, recall, and F1 score, sorted by F1 score in descending order and rounded to 2 decimal places.
    """
    results = []
    for tool in tools:
        precision, recall, f1_score = calculate_precision_recall_f1(gs, df_tools[tool])  # Assume this function is defined
        results.append({
            'Tool': tool,
            'Precision': precision,
            'Recall': recall,
            'F1 Score': f1_score
        })

    results_df = pd.DataFrame(results)
    results_df_sorted = results_df.sort_values(by='F1 Score', ascending=False)
    
    return results_df_sorted.round(2)

In [None]:

# results_df_sorted = evaluate_nlr(gs, df_tools, tools)
# results_df_sorted

Show some matches 

In [None]:
# check_entity_similarity(gs, df_tools['spacy'])