In [None]:
from pathlib import Path
import os

The manual annotations where produced by dragging the documents into one of three folders. So we have to do some funny filesystem things to interpret them...

In [None]:
def get_rankings(base_path):
    rankings = {}
    folders = sorted(os.listdir(base_path))
    folders = [folder for folder in folders if folder[0] in {'1', '2', '3'}]
    folders.sort(key=lambda x: int(x.split('_')[0]))
    # Assign ranks based on folder order
    for rank, folder in enumerate(folders, start=1):
        folder_path = os.path.join(base_path, folder)
        if os.path.isdir(folder_path):
            for file in os.listdir(folder_path):
                rankings[file] = folder
    return rankings

In [None]:
import json


def add_annotations(goal_path, rankings, annotator_name, overwrite=False):
    for file in rankings:
        # find corresponding json file in goal_path
        base_name = Path(file).stem
        json_file = os.path.join(goal_path, base_name + '.json')
        if os.path.exists(json_file):
            with open(json_file, 'r') as f:
                data = json.load(f)
            # add rank to json
            if annotator_name not in data or overwrite:
                data[annotator_name] = rankings[file]
        else:
            print(f"File {json_file} does not exist.")

        # write json file
        with open(json_file, 'w') as f:
            json.dump(data, f, indent=4)

In [None]:
rankings_folders_expert1 = [
    '/home/brunobrocai/Data/MoWiKo/Paper-themKorp/_annotated_/annotateSelection-expert1',
    '/home/brunobrocai/Data/MoWiKo/Paper-themKorp/_annotated_/annotationBaseline2-expert1',
    '/home/brunobrocai/Data/MoWiKo/Paper-themKorp/_annotated_/annotationMarcel-expert1',
    '/home/brunobrocai/Data/MoWiKo/Paper-themKorp/_annotated_/annotateInfoAkt-expert1'
]
rankings_folders_expert2 = [
    '/home/brunobrocai/Data/MoWiKo/Paper-themKorp/_annotated_/annotateSelection-expert2',
    '/home/brunobrocai/Code/Paper-themKorpus/RQTR/_anno_/annotationBaseline2',
    '/home/brunobrocai/Code/Paper-themKorpus/RQTR/_anno_/annotationCollocationPMI1',
    '/home/brunobrocai/Data/MoWiKo/Paper-themKorp/_annotated_/annotateAI-expert2'
]
rankings_folders_student1 = [
    '/home/brunobrocai/Data/MoWiKo/Paper-themKorp/_annotated_/annotateSelection-student1',
    '/home/brunobrocai/Data/MoWiKo/Paper-themKorp/_annotated_/annotationRegex-student1'
]
rankings_folders_student2 = [
    '/home/brunobrocai/Data/MoWiKo/Paper-themKorp/_annotated_/annotationRegex-student2'
]

In [None]:
def get_rankings_dirlist(dirlist):
    rankings = {}
    for base_path in dirlist:
        rankings.update(get_rankings(base_path))
    return rankings

rankings_expert2 = get_rankings_dirlist(rankings_folders_expert1)
rankings_expert1 = get_rankings_dirlist(rankings_folders_expert1)
rankings_student1 = get_rankings_dirlist(rankings_folders_student1)
rankings_student2 = get_rankings_dirlist(rankings_folders_student2)

In [None]:
add_annotations(
    'final_corpus',
    rankings_expert2,
    'expert_annotator_2',
    overwrite=True
)
add_annotations(
    'final_corpus',
    rankings_expert1,
    'expert_annotator_1',
    overwrite=True
)
add_annotations(
    'final_corpus',
    rankings_student1,
    'student_annotator_1',
    overwrite=False
)
add_annotations(
    'final_corpus',
    rankings_student2,
    'student_annotator_2',
    overwrite=False
)