# Merge annotations
This notebook is used to place all annotations in one json file.

### 0. Import libraries and load data

In [None]:
import os
import re
import json 

ANNOTATIONS_PATH = "../../data/annotations/"

In [None]:
regex_pattern = r"^annotations_.*\.json$"
annotation_chunks = []

for filename in os.listdir(ANNOTATIONS_PATH):
    if filename in "annotations_0_610.json":
        continue
    
    if re.match(regex_pattern, filename):
        with open(f"{ANNOTATIONS_PATH}{filename}", "r") as f:
            annotation_chunks.append(json.load(f))

### 1. Analyze results of the first trial

In [None]:
paintings_ids_unprocessed = []
paintings_ids_to_check = []
paintings_ids_wo_objects = []

In [None]:
for annotation_chunk in annotation_chunks:
    paintings_ids_unprocessed.extend(annotation_chunk["paintings_ids_unprocessed"])
    paintings_ids_to_check.extend(annotation_chunk["paintings_ids_to_check"])
    paintings_ids_wo_objects.extend(annotation_chunk["paintings_ids_wo_objects"])

In [None]:
indices = list(set(paintings_ids_unprocessed + paintings_ids_to_check))
print(f"The number of paintings for which the annotator did not work correctly: {len(set(paintings_ids_unprocessed))}")
print(f"The number of paintings for which the judhe did not work correctly: {len(set(paintings_ids_to_check))}")
print(f"The number of paintings without objects: {len(set(paintings_ids_wo_objects))}")

### 2. Analyze results of the second trial
The paintings for which the annotator / judge initially failed, were processed again.

In [None]:
with open(f"{ANNOTATIONS_PATH}annotations_0_610.json", "r") as f:
    second_trial_annotations = json.load(f)

paintings_ids_unprocessed2 = set(second_trial_annotations["paintings_ids_unprocessed"])
paintings_ids_to_check2 = set(second_trial_annotations["paintings_ids_to_check"])
paintings_ids_wo_objects2 = set(paintings_ids_wo_objects).union(set(second_trial_annotations["paintings_ids_wo_objects"]))

In [None]:
annotation_chunks.append(second_trial_annotations)
all_annotations = []

for current_annotations in annotation_chunks:
    all_annotations.extend(current_annotations["annotations"])

seen_ids = set()
unique_annotations = []

for annotation in all_annotations:
    painting_id = annotation.get("painting_id")
    if painting_id not in seen_ids:
        seen_ids.add(painting_id)
        unique_annotations.append(annotation)

In [None]:
assert len(unique_annotations) + len(paintings_ids_unprocessed2) + len(paintings_ids_wo_objects2) == 12078
print(f"The final number of paintings for which the annotator did not work correctly: {len(paintings_ids_unprocessed2)}")
print(f"The final number of paintings for which the judge did not work correctly: {len(paintings_ids_to_check2)}")
print(f"The final number of paitings without objects: {len(paintings_ids_wo_objects2)}")
print(f"The final number of of annotated paintings: {len(unique_annotations)}")

### 3. Store results

In [None]:
with open(f"{ANNOTATIONS_PATH}unfiltered_annotations.json", "w") as f:
    json.dump(unique_annotations, f, indent=4)