In [1]:
import pandas as pd

In [3]:

def clean_smoothness(metadata):
    """Clean smoothness column of metadata dataframe according to defined OSM smoothness values

    Args:
        metadata (df): dataframe with image metadata, including column "smoothness"

    Returns:
        df: dataframe with cleaned smoothness column "smoothness_clean"
    """
    metadata["smoothness"] = metadata.smoothness.str.strip()
    metadata["smoothness_clean"] = metadata["smoothness"].replace(
        [
            "horrible",
            "very_horrible",
            "impassable",
        ],
        "very bad",
    )

    metadata["smoothness_clean"] = metadata["smoothness_clean"].replace(
        ["perfect", "very_good"], "excellent"
    )
    return metadata


def clean_surface(metadata):
    """Clean surface column of metadata dataframe according to defined OSM surface values

    Args:
        metadata (df): dataframe with image metadata, including column "surface"

    Returns:
        df: dataframe with cleaned surface column "surface_clean"
    """
    metadata["surface"] = metadata.surface.str.strip()
    metadata["surface_clean"] = metadata["surface"].replace(
        [
            "compacted",
            "gravel",
            "ground",
            "fine_gravel",
            "dirt",
            "grass",
            "earth",
            "sand",
        ],
        "unpaved",
    )
    metadata["surface_clean"] = metadata["surface_clean"].replace(
        ["cobblestone", "unhewn_cobblestone"], "sett"
    )
    metadata["surface_clean"] = metadata["surface_clean"].replace(
        ["concrete:plates", "concrete:lanes"], "concrete",
    )
    return metadata

In [2]:
#annotations = pd.read_csv("/Users/alexandra/Nextcloud-HTW/SHARED/SurfaceAI/data/mapillary_images/training/V6/metadata/annotations_combined.csv", index_col=False)
annotations = pd.read_csv("/Users/alexandra/Nextcloud-HTW/SHARED/SurfaceAI/data/mapillary_images/training/v12/metadata/annotations_combined.csv", index_col=False)

In [5]:
annotations.groupby(["surface", "smoothness"]).size()

surface        smoothness  
asphalt        bad              123
               excellent        783
               good            1334
               intermediate     623
concrete       bad               54
               excellent        189
               good             340
               intermediate     249
paving_stones  bad               31
               excellent        342
               good             693
               intermediate     197
sett           bad              518
               good              99
               intermediate     659
unpaved        bad              336
               intermediate     305
               very_bad         158
dtype: int64

In [5]:
metadata = pd.read_csv("/Users/alexandra/Documents/GitHub/dataset_creation/data/v5/train_image_selection_metadata.csv", index_col=False)
metadata2 = pd.read_csv("/Users/alexandra/Documents/GitHub/dataset_creation/data/v4/v4_train_image_selection_metadata.csv", index_col=False)
metadata_full = pd.concat([metadata, metadata2], ignore_index=True)

In [6]:

metadata = metadata_full[metadata_full.id.isin(annotations.image_id)]

In [7]:
grouping_level = ["surface"]
grouping_level_ = ["surface_clean"]

In [8]:
#original_class_counts = pd.read_csv("/Users/alexandra/Documents/GitHub/dataset_creation/data/v5/original_class_counts.csv", index_col=False)
#remaining_class_counts = pd.read_csv("/Users/alexandra/Documents/GitHub/dataset_creation/data/v5/remaining_classes_after_V8.csv", index_col=False)

annotated_class_counts = pd.DataFrame(annotations.groupby(grouping_level).size())

In [9]:
df = annotations[["image_id", "surface", "smoothness"]].set_index("image_id").join(metadata[["id", "surface_clean", "smoothness_clean"]].set_index("id"), how="inner", rsuffix='_osm')

In [10]:
incorrect_surface = df[df.surface != df.surface_clean]
incorrect_surface = pd.DataFrame(incorrect_surface.groupby(grouping_level_).size())

In [11]:
if grouping_level == ["surface"]:
    label_as_osm = pd.DataFrame(df[(df.surface == df.surface_clean)].groupby(grouping_level).size())
else:
    label_as_osm = pd.DataFrame(df[(df.surface == df.surface_clean) & (df.smoothness == df.smoothness_clean)].groupby(grouping_level).size())

In [12]:
original_class_counts = pd.DataFrame(metadata_full.groupby(grouping_level_).size())

In [13]:
remaining_class_counts = pd.read_csv("/Users/alexandra/Documents/GitHub/dataset_creation/data/v5/remaining_classes_after_V12.csv", index_col=False)

In [14]:
remaining_class_counts = remaining_class_counts.groupby(grouping_level_).sum()

In [15]:
annotated_class_counts.columns=["ds_counts"]
original_class_counts.columns=["orig_counts"]
label_as_osm.columns=["label_as_osm"]
incorrect_surface.columns=["incorrect_surface"]

In [16]:
osm_tag_counts = pd.read_csv("/Users/alexandra/Nextcloud-HTW/SHARED/SurfaceAI/data/OSM/osm_tag_counts_germany.csv")

In [17]:
osm_tag_counts.surface = osm_tag_counts.surface.str.strip()
osm_tag_counts.smoothness = osm_tag_counts.smoothness.str.strip()

In [18]:
osm_tag_counts = clean_surface(osm_tag_counts)
osm_tag_counts = clean_smoothness(osm_tag_counts)

In [19]:
if grouping_level == ["surface"]:
    drop_col = ["surface", "smoothness", "smoothness_clean"]
else:
    drop_col = ["surface", "smoothness"]

osm_tag_counts = osm_tag_counts.drop(drop_col, axis=1).groupby(grouping_level_).sum()

In [20]:
osm_tag_counts["OSM_share"] = round(osm_tag_counts.ct / sum(osm_tag_counts.ct)* 100, 2)

In [21]:
cts = (original_class_counts
 .join(remaining_class_counts, on=grouping_level_, how="left")
 .join(annotated_class_counts, on=grouping_level_, how="left")
 .join(incorrect_surface, on=grouping_level_, how="left")
 .join(label_as_osm, on=grouping_level_, how="left")
 .join(osm_tag_counts, how="left")
 .fillna(0)
)

In [22]:
cts["annotated_counts"] = cts.orig_counts - cts.remaining_counts
cts["annotated_counts_only_correct_surface"] = cts.annotated_counts - cts.incorrect_surface
cts["annotation_ratio"] = (cts.ds_counts / cts.annotated_counts).round(3) * 100
cts["correct_OSM/ds_counts"] = round(cts.label_as_osm / cts.ds_counts * 100)
cts["correct_OSM/annotation_counts"] = round(cts.label_as_osm / cts.annotated_counts * 100, 2)
cts["correct_OSM/annotation_counts_only_correct_surface"] = round(cts.label_as_osm / cts.annotated_counts_only_correct_surface * 100, 2)
cts["times_better"] = round(cts["correct_OSM/annotation_counts"] / cts.OSM_share, 2)
cts["times_better_correct_surface"] = round(cts["correct_OSM/annotation_counts_only_correct_surface"] / cts.OSM_share, 2)

In [23]:
cts[["ds_counts", "annotated_counts", "annotated_counts_only_correct_surface", "annotation_ratio", "OSM_share", "label_as_osm", "correct_OSM/ds_counts", "correct_OSM/annotation_counts", "correct_OSM/annotation_counts_only_correct_surface"]]

Unnamed: 0_level_0,ds_counts,annotated_counts,annotated_counts_only_correct_surface,annotation_ratio,OSM_share,label_as_osm,correct_OSM/ds_counts,correct_OSM/annotation_counts,correct_OSM/annotation_counts_only_correct_surface
surface_clean,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
asphalt,2864,4353,3342,65.8,54.43,1951,68.0,44.82,58.38
concrete,832,2101,1465,39.6,2.33,613,74.0,29.18,41.84
paving_stones,1263,5188,4328,24.3,15.77,982,78.0,18.93,22.69
sett,1276,3559,3239,35.9,4.04,999,78.0,28.07,30.84
unpaved,799,1571,1252,50.9,22.19,575,72.0,36.6,45.93


In [81]:
cts[["OSM_share", "correct_OSM/annotation_counts", "correct_OSM/annotation_counts_only_correct_surface", "times_better", "times_better_correct_surface"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,OSM_share,correct_OSM/annotation_counts,correct_OSM/annotation_counts_only_correct_surface,times_better,times_better_correct_surface
surface_clean,smoothness_clean,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
asphalt,bad,0.69,6.16,8.0,8.93,11.59
asphalt,excellent,20.31,41.29,47.94,2.03,2.36
asphalt,good,29.29,42.58,51.16,1.45,1.75
asphalt,intermediate,4.05,15.4,22.08,3.8,5.45
concrete,bad,0.23,6.38,7.69,27.74,33.43
concrete,excellent,0.37,18.93,35.51,51.16,95.97
concrete,good,0.94,18.15,22.44,19.31,23.87
concrete,intermediate,0.79,20.59,29.01,26.06,36.72
paving_stones,bad,0.22,1.41,1.66,6.41,7.55
paving_stones,excellent,2.98,11.3,13.69,3.79,4.59


In [227]:
cts.times_better.min()

1.45

In [231]:
cts.times_better.mean()

11.248333333333333

In [84]:
cts.times_better.median()

6.1

In [229]:
cts.times_better.max()

51.16

In [82]:
cts.times_better_correct_surface.mean()

15.841666666666665

In [83]:
cts.times_better_correct_surface.median()

7.365

In [49]:
cts.to_csv("/Users/alexandra/Documents/GitHub/dataset_creation/data/v5/V9_analysis.csv")

In [53]:
annotations["nostreet"].notna().value_counts()

nostreet
False    6725
True     2654
Name: count, dtype: int64