In [61]:
import os

import numpy as np
import pandas as pd
import json

from sklearn.metrics import cohen_kappa_score
import krippendorff as kd

In [62]:
version = "v5"

In [74]:
path = os.path.join("/", "Users", "alexandra", "Nextcloud-HTW", "SHARED", "SurfaceAI", "data", "mapillary_images", "training", f"{version}_c0", "metadata")
files = [
    "annotations_a1.csv",
    "annotations_a2.csv",
    "annotations_a3.csv",
]

nostreet_values = ["(mainly) no street visible", "surface / smoothness not visible"]
unsure_revise_value = "unsure - please revise"

In [75]:
df = pd.DataFrame()
for file in files:
    df = pd.concat([df, pd.read_csv(os.path.join(path, file))])

In [76]:
df["image_id"] = df.image.apply(
lambda x: str.split(
    x, "https://freemove.f4.htw-berlin.de/data/local-files/?d=v5/"
)[1]).apply(lambda x: int(str.split(x, ".jpg")[0]))

In [77]:
df.image_id.nunique()

180

In [78]:
len(df[df["nostreet"] == "unsure - please revise"].image_id.unique())

12

12 images were at least one annotator said "please revise".

In [81]:
df[~df.nostreet.isna()].image_id.nunique()

61

Another 49 images excluded by at least one annotator.

In [83]:
df.loc[~df.nostreet.isna(), "surface"] = "nostreet"
df.loc[~df.nostreet.isna(), "smoothness"] = "nostreet"

In [84]:
nostreet_ids = df[~df.nostreet.isna()].image_id.unique()
df = df[~df.image_id.isin(nostreet_ids)]

In [85]:
df.image_id.nunique()

119

In [89]:
# ensure every annotator has the same image count
df.annotator.value_counts()

annotator
1    119
5    119
4    119
Name: count, dtype: int64

In [90]:
# compare predictions 
image_id_counts = df.groupby(["image_id"]).size()
image_ids = image_id_counts[image_id_counts == 3].index
grouped_surface = df[df.image_id.isin(image_ids) ].groupby(["image_id", "surface"]).size()

# same surface rating 
round(100* len(grouped_surface[grouped_surface == 3]) / len(grouped_surface), 2)

91.94

In [91]:
# same smoothness rating
grouped_smoothness = df.groupby(["image_id", "smoothness"]).size()
round(100* len(grouped_smoothness[grouped_smoothness == 3]) / len(grouped_smoothness), 2)

31.49

In [93]:
# quality to rank
df["smoothness_int"] = df.smoothness.apply(lambda x: 
                                           0 if x == "ver_bad" 
                                           else 1 if x == "bad" 
                                           else 2 if x == "intermediate" 
                                           else 3 if x == "good" 
                                           else 2.5 if x == "nostreet"
                                           else 4)

In [95]:
rater1 = df[(df.image_id.isin(image_ids)) & (df.annotator == annotator1)].sort_values(by=["image_id"])
rater2 = df[(df.image_id.isin(image_ids) )& (df.annotator == annotator2)].sort_values(by=["image_id"])
rater3 = df[(df.image_id.isin(image_ids) )& (df.annotator == annotator3)].sort_values(by=["image_id"])
krippendorfs_alpha_surf = kd.alpha(np.array([
                rater1.surface.tolist(), 
                rater2.surface.tolist(),
                rater3.surface.tolist()]), level_of_measurement='nominal')
krippendorfs_alpha_smooth = kd.alpha(np.array([
                rater1.smoothness.tolist(), 
                rater2.smoothness.tolist(),
                rater3.smoothness.tolist()]), level_of_measurement='nominal')
krippendorfs_alpha_smooth_int = kd.alpha(np.array([
                rater1.smoothness_int.tolist(), 
                rater2.smoothness_int.tolist(),
                rater3.smoothness_int.tolist()]), level_of_measurement='ordinal')

print(f"Krippendorfs alpha for surface: {round(krippendorfs_alpha_surf, 2)}")
print(f"Krippendorfs alpha for smoothness: {round(krippendorfs_alpha_smooth, 2)}")
print(f"Krippendorfs alpha for smoothness as ordinal: {round(krippendorfs_alpha_smooth_int, 2)}")

Krippendorfs alpha for surface: 0.96
Krippendorfs alpha for smoothness: 0.52
Krippendorfs alpha for smoothness as ordinal: 0.74
