## Annotation Visualizations

### Imports & Initializations 

In [5]:
import json
import os

In [6]:
def extract_texts(json_data):
    return [task["data"]["text"] for task in json_data["tasks"]]

def extract_predictions(json_data):
    predictions = []
    for task in json_data["tasks"]:
        task_predictions = []
        for prediction in task["predictions"]:
            details = {
                "model_version": prediction["model_version"],
                "results": []
            }
            for result in prediction["result"]:
                result_details = {
                    "labels": result["value"]["labels"],
                    "text": result["value"]["text"]
                }
                details["results"].append(result_details)
            task_predictions.append(details)
        predictions.append(task_predictions)
    return predictions

def extract_annotations(json_data):
    annotations = []
    for task in json_data["tasks"]:
        task_annotations = []
        for annotation in task["annotations"]:
            details = {
                "results": []
            }
            for result in annotation["result"]:
                result_details = {
                    "labels": result["value"]["labels"],
                    "text": result["value"]["text"]
                }
                details["results"].append(result_details)
            task_annotations.append(details)
        annotations.append(task_annotations)
    return annotations




In [7]:
trial_type = "solo"
annotation_path = os.path.join("data", f"{trial_type}", "annotations")
annotations = {}

for file in os.listdir(annotation_path):
    if file.endswith("_annotations.json"):
        with open(os.path.join(annotation_path, file), "r") as f:
            data = json.load(f)
            participant_id = file.split("_")[1]
            annotations[participant_id] = data

### Accuracies

In [8]:
def tokenize(text):
    # Tokenize based on spaces while keeping track of character positions.
    tokens = []
    char_indices = []
    start = 0
    for word in text.split():
        end = start + len(word)
        tokens.append(word)
        char_indices.append((start, end))
        start = end + 1  # Assuming a single char for space
    return tokens, char_indices

def align_labels_with_tokens(char_indices, labels_range):
    # Initialize labels for each token with 'O' (no label)
    labels = ['O'] * len(char_indices)
    
    for start, end, label in labels_range:
        for i, (tok_start, tok_end) in enumerate(char_indices):
            if not (end < tok_start or start > tok_end):  # Overlap condition
                labels[i] = label
    return labels

def calculate_accuracy(task):
    text = task["data"]["text"]
    tokens, char_indices = tokenize(text)
    # Extracting predictions and annotations with character indices
    pred_ranges = [(r["value"]["start"], r["value"]["end"], r["value"]["labels"][0]) for p in task["predictions"] for r in p["result"]]
    anno_ranges = [(r["value"]["start"], r["value"]["end"], r["value"]["labels"][0]) for a in task["annotations"] for r in a["result"]]
    
    pred_labels = align_labels_with_tokens(char_indices, pred_ranges)
    anno_labels = align_labels_with_tokens(char_indices, anno_ranges)
    print(pred_labels)
    print(anno_labels)
    print()
    
    correct = sum(p == a for p, a in zip(pred_labels, anno_labels))
    total = len(tokens)
    return correct / total if total > 0 else 0


for participant_id in annotations.keys():
    json_data = annotations[participant_id]

    # Calculate accuracy for each task
    accuracies = [calculate_accuracy(task) for task in json_data["tasks"]]

    # Display or analyze the accuracies
    for i, accuracy in enumerate(accuracies, 1):
        print(f"Task {i}: Accuracy = {accuracy:.2%}")


['Person', 'O', 'Person', 'O', 'O', 'O', 'O', 'Person', 'O', 'O', 'O', 'O']
['Organization', 'O', 'Location', 'O', 'O', 'O', 'O', 'Location', 'O', 'O', 'O', 'O']

['Organization', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Organization', 'O', 'O', 'Organization', 'O', 'O', 'O', 'O', 'O', 'O']
['Location', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Location', 'O', 'O', 'Organization', 'Organization', 'Organization', 'O', 'O', 'O', 'O']

['O', 'Organization', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
['O', 'Location', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Location', 'O']

['Organization', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Organization', 'O', 'Location', 'Location', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',