In [12]:
import json

In [16]:
# Load the JSON files
def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

# Extract annotations from the JSON data
def extract_annotations(data, is_soham=False):
    annotations = {}
    for item in data:
        if is_soham: # For Soham's data, the image path is under the key "image" so we have handled this differently
            image_id = item['image'].split('-')[-1].split('.')[0]  # Extract image ID (For example, 540 from "e2495568-img_540.jpg")
            choice = item['choice']
        else:
            # For our data, the image path is under the key "file_upload"
            image_id = item['file_upload'].split('-')[-1].split('.')[0]
            choice = item['annotations'][0]['result'][0]['value']['choices'][0]
        annotations[image_id] = choice
    return annotations

# Combine annotations from all annotators
def combine_annotations(annotators_data):
    combined = {}
    for annotator, data in annotators_data.items():
        for image_id, choice in data.items():
            if image_id not in combined:
                combined[image_id] = {'Trucks': 0, 'No Trucks': 0}
            combined[image_id][choice] += 1
    return combined

def fleiss_kappa(combined_annotations, num_annotators):
    N = len(combined_annotations)  # Number of images
    k = 2  # Number of categories: "Trucks" and "No Trucks"

    # P_a (observed agreement)
    total_agreement = 0
    for counts in combined_annotations.values():
        total_agreement += sum(count * (count - 1) for count in counts.values())
    P_a = total_agreement / (N * num_annotators * (num_annotators - 1))

    # P_e (expected agreement by chance)
    category_totals = {}
    for counts in combined_annotations.values():
        for category, count in counts.items():
            if category not in category_totals:
                category_totals[category] = 0
            category_totals[category] += count
    total_assignments = sum(category_totals.values())
    P_e = sum((category_total / total_assignments) ** 2 for category_total in category_totals.values())

    # Calculate Fleiss' Kappa
    kappa = (P_a - P_e) / (1 - P_e)
    return kappa

In [17]:
laksh_data = load_json('CV_laksh.json')
soham_data = load_json('CV_soham.json')
tanish_data = load_json('CV_tanish.json')

laksh_annotations = extract_annotations(laksh_data)
soham_annotations = extract_annotations(soham_data, is_soham=True)  # Handle Soham's data differently
tanish_annotations = extract_annotations(tanish_data)

print(laksh_annotations)
print(soham_annotations)
print(tanish_annotations)

{'img_540': 'Trucks', 'img_541': 'Trucks', 'img_543': 'No Trucks', 'img_544': 'No Trucks', 'img_545': 'No Trucks', 'img_546': 'Trucks', 'img_547': 'Trucks', 'img_548': 'No Trucks', 'img_549': 'Trucks', 'img_550': 'No Trucks', 'img_551': 'Trucks', 'img_553': 'Trucks', 'img_554': 'No Trucks', 'img_555': 'No Trucks', 'img_556': 'No Trucks', 'img_557': 'No Trucks', 'img_558': 'No Trucks', 'img_559': 'No Trucks'}
{'img_540': 'Trucks', 'img_541': 'No Trucks', 'img_543': 'No Trucks', 'img_544': 'No Trucks', 'img_545': 'No Trucks', 'img_546': 'No Trucks', 'img_547': 'Trucks', 'img_548': 'No Trucks', 'img_549': 'Trucks', 'img_550': 'No Trucks', 'img_551': 'No Trucks', 'img_553': 'Trucks', 'img_554': 'No Trucks', 'img_555': 'No Trucks', 'img_556': 'No Trucks', 'img_557': 'No Trucks', 'img_558': 'No Trucks', 'img_559': 'Trucks'}
{'img_540': 'Trucks', 'img_541': 'Trucks', 'img_543': 'No Trucks', 'img_544': 'No Trucks', 'img_545': 'No Trucks', 'img_546': 'Trucks', 'img_547': 'Trucks', 'img_548': 'N

In [18]:
# Combine annotations
annotators_data = {
    'Laksh': laksh_annotations,
    'Soham': soham_annotations,
    'Tanish': tanish_annotations
}
combined_annotations = combine_annotations(annotators_data)

# Calculating Fleiss' Kappa
num_annotators = 3
kappa = fleiss_kappa(combined_annotations, num_annotators)

print(f"Fleiss' Kappa: {kappa:.4f}")

Fleiss' Kappa: 0.5325
