In [1]:
import json
from pathlib import Path

from nervaluate import Evaluator
from spacy import displacy

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from IPython.display import Markdown, display
def printmd(string):
    display(Markdown(string))

In [3]:
base_path = Path("../../../data")
data_path = base_path / "main_3_per_cluster_download.cba617d8-a055-4622-97a3-c194a148cbed.jsonl"

In [4]:
data_path

PosixPath('../../../data/main_3_per_cluster_download.cba617d8-a055-4622-97a3-c194a148cbed.jsonl')

In [5]:
with open(data_path, "r") as f:
    lines = [json.loads(x) for x in f.readlines()]

gold_standard_session = "main_3_per_cluster-Kameron"
other_sessions = ["main_3_per_cluster-Sampath", "main_3_per_cluster-Nikita"]

all_labels = ['base', 'change_direction', 'type_of', 'aspect_changing']

In [6]:
for other_session in other_sessions:
    other_session_subset = [x for x in lines if x["_session_id"] == other_session]
    gold_session_subset = [x for x in lines if x["_session_id"] == gold_standard_session]

    other_session_subset.sort(key=lambda x: x["original_md_text"])
    gold_session_subset.sort(key=lambda x: x["original_md_text"])

    assert [x["original_md_text"] for x in other_session_subset] == [x["original_md_text"] for x in gold_session_subset]

    other_spans = [[a for a in x["spans"] if all(k in a for k in ["start", "end", "label"])] for x in
                   other_session_subset]
    gold_spans = [[a for a in x["spans"] if all(k in a for k in ["start", "end", "label"])] for x in
                  gold_session_subset]

    evaluator = Evaluator(gold_spans, other_spans, tags=all_labels)

    results, results_per_tag = evaluator.evaluate()
    
    print(results["strict"])

{'correct': 1489, 'incorrect': 339, 'partial': 0, 'missed': 693, 'spurious': 257, 'possible': 2521, 'actual': 2085, 'precision': 0.7141486810551558, 'recall': 0.5906386354621183, 'f1': 0.6465479808944855}
{'correct': 1367, 'incorrect': 442, 'partial': 0, 'missed': 712, 'spurious': 225, 'possible': 2521, 'actual': 2034, 'precision': 0.6720747295968534, 'recall': 0.542245140817136, 'f1': 0.6002195389681668}


In [7]:
for other_session in other_sessions:
    other_session_subset = [x for x in lines if x["_session_id"] == other_session]
    gold_session_subset = [x for x in lines if x["_session_id"] == gold_standard_session]

    other_session_subset.sort(key=lambda x: x["original_md_text"])
    gold_session_subset.sort(key=lambda x: x["original_md_text"])

    assert [x["original_md_text"] for x in other_session_subset] == [x["original_md_text"] for x in gold_session_subset]
    
    samples = []
    
    for other, gold in zip(other_session_subset, gold_session_subset):
        
        sample = {
        "other_spans": [a for a in other["spans"] if all(k in a for k in ["start", "end", "label"])],
        "gold_spans": [a for a in gold["spans"] if all(k in a for k in ["start", "end", "label"])],
        "text": other["original_md_text"]
        }
        
        assert other["original_md_text"] == gold["original_md_text"], ("A", other["original_md_text"], "B", gold["original_md_text"])

        evaluator = Evaluator([sample["gold_spans"]], [sample["other_spans"]], tags=all_labels)

        results, results_per_tag = evaluator.evaluate()
                               
        sample["f1"] = results["strict"]["f1"]
                               
        samples.append(sample)
        
    samples.sort(key=lambda x: x["f1"])
    
    printmd(f"## {other_session}")
    
    for sample in samples[:5]:
        printmd("### GOLD tags")
        manual_doc = {
        "ents": sample["gold_spans"],
        "text": sample["text"]
        }
        displacy.render(manual_doc, style="ent", manual=True, jupyter=True)

        printmd("### SILVER tags")
        manual_doc = {
        "ents": sample["other_spans"],
        "text": sample["text"]
        }
        displacy.render(manual_doc, style="ent", manual=True, jupyter=True)

    
    

## main_3_per_cluster-Sampath

### GOLD tags

### SILVER tags

### GOLD tags

### SILVER tags

### GOLD tags

### SILVER tags

### GOLD tags

### SILVER tags

### GOLD tags

### SILVER tags

## main_3_per_cluster-Nikita

### GOLD tags

### SILVER tags

### GOLD tags

### SILVER tags

### GOLD tags

### SILVER tags

### GOLD tags

### SILVER tags

### GOLD tags

### SILVER tags