In [77]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score
import os
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import shutil

### manual reviewed images from classified_images_add_on folder = v4b with adapted labeling scheme

In [145]:
data_metadata_path = os.path.join("..", "data", "road_scenery_experiment", "metadata")

roads_metadata_path = os.path.join("..", "data", "V1_0")
# rails_metadata_path = os.path.join("..", "data", "training", "V12", "metadata")

focus_data_path = os.path.join("..", "data", "V1_0", "s_1024")
no_focus_1_data_path = os.path.join("..", "data", "training", "V12", "annotated", "no_street")
no_focus_2_data_path = os.path.join("..", "data", "training", "V12", "annotated", "not_recognizable")

classified_images_path = os.path.join("..", "data", "road_scenery_experiment")

# new folder
path_b = os.path.join(classified_images_path, "classified_images_add_on_b")
os.makedirs(path_b, exist_ok=True)
path_c = os.path.join(classified_images_path, "classified_images_add_on_c")
os.makedirs(path_c, exist_ok=True)
path_test = os.path.join(classified_images_path, "test_classified_images")
os.makedirs(path_test, exist_ok=True)

In [94]:
# annotations
roads_file_name = "v1_0_incl_roadtype.csv"
# rails_file_name = "annotations_combined.csv"

In [117]:
roads = pd.read_csv(os.path.join(roads_metadata_path, roads_file_name))
# rails = pd.read_csv(os.path.join(rails_metadata_path, rails_file_name))
roads["image_id"] = roads["mapillary_image_id"].astype("string")
roads

Unnamed: 0,mapillary_image_id,user_id,user_name,captured_at,longitude,latitude,train,surface_type,surface_quality,roadtype,image_id
0,1000927687276860,103606985215406,dsmm,1655735677353,13.287805,51.502597,True,concrete,intermediate,road(car),1000927687276860
1,1001349260556407,102468835332129,ber319,1659015876793,14.426536,52.217757,True,unpaved,bad,path (unspecified),1001349260556407
2,1001419624308587,100232345559807,carlheinz,1698249177500,13.457938,52.512914,True,asphalt,good,road(car),1001419624308587
3,1001712924422413,100232345559807,carlheinz,1699107239000,13.386874,52.564970,True,concrete,excellent,path (unspecified),1001712924422413
4,1001853784055707,102627028648372,kartonage,1652268883000,13.389880,52.546260,True,asphalt,excellent,road(car),1001853784055707
...,...,...,...,...,...,...,...,...,...,...,...
9117,435956961754271,103080845264750,teddy73,1657610360090,9.227063,49.181487,False,asphalt,good,road / path,435956961754271
9118,152523294419317,103080845264750,teddy73,1681130193000,9.179631,49.120123,False,asphalt,excellent,road / path,152523294419317
9119,303366287999275,100562292188203,changchun1,1612977717350,9.172729,49.106565,False,unpaved,bad,road / path,303366287999275
9120,563555448482947,100562292188203,changchun1,1654801255232,9.176029,49.098283,False,asphalt,excellent,road / path,563555448482947


In [139]:
roads.groupby("roadtype")["image_id"].count()

roadtype
cycleway                632
cycleway (Hochbord)     108
cycleway (lane)           3
nature path              11
path (unspecified)     2569
pedestrian              820
pedestrian area          12
road / path             613
road(car)              3787
sidewalk                173
Name: image_id, dtype: int64

In [96]:
def count_images_per_version(images):
    list_V1_0 = []
    list_V12 = []
    for image_id in images:
        if image_id in roads["image_id"].values:
            list_V1_0.append(image_id)
        else:
            list_V12.append(image_id)
    print(f"V1_0: {len(list_V1_0)}")
    print(f"V12 : {len(list_V12)}")

def copy_images(images, folder_0, folder_1):
    # store manually classified images in folder
    for image_id in images: 
        # find original path
        img_path = ''
        if img_path == '':
            directory = focus_data_path
            for root, _, fnames in sorted(os.walk(directory, followlinks=True)):
                for fname in sorted(fnames):
                    if image_id == os.path.splitext(fname)[0]:
                        img_path = root
        if img_path == '':
            directory = no_focus_1_data_path
            for root, _, fnames in sorted(os.walk(directory, followlinks=True)):
                for fname in sorted(fnames):
                    if image_id == os.path.splitext(fname)[0]:
                        img_path = root
        if img_path == '':
            directory = no_focus_2_data_path
            for root, _, fnames in sorted(os.walk(directory, followlinks=True)):
                for fname in sorted(fnames):
                    if image_id == os.path.splitext(fname)[0]:
                        img_path = root
        # print(img_path)    
        # copy image from original path
        if img_path == '':
            print("image not found")
        else:
            destination_folder_path = os.path.join(path_b, folder_0, folder_1)
            os.makedirs(destination_folder_path, exist_ok=True)
            image_filename = os.path.join(img_path, f"{image_id}.jpg")
            shutil.copy(image_filename, destination_folder_path)
    print("images copied.")

#### 1_1_rails_on_road

In [97]:
images = [
    '221161713149925',
    '169832331809758',
    '253370653147368',
]
folder_0 = '1_1_road'
folder_1 = '1_1_rails_on_road'

count_images_per_version(images=images)
copy_images(images=images, folder_0=folder_0, folder_1=folder_1)

V1_0: 2
V12 : 1
images copied.


#### 1_1_road_general

In [98]:
images = [
    '116226134367703',
    '848854242505441',
    '918908606618366',
    '950024289091801',
    '1050942202320681',
    '344993797586136',
    '1829362023906325',
    '183985896927301',
    '204079172180318',
    '266120208588107',
    '266949115758514',
    '279256010593903',
    '300945445072687',
    '451534342810972',
    '519968362346653',
    '753555371993719',
    '921669361831221',
    '3908271332733799',
    '3987674981309635',
    '173780227985752',
    '173948484732553',
    '196735465622056',
    '218718489705578',
    '286196716535769',
    '477515223457749',
    '582003383203689',
    '614095730069612',
    '1106452096555277',
    '2868254156774123',
    '215445400383578',
    '477720423548792',
    '1154482918347377',
    '3482275121874721',
    '796521804600796',
    '1541644216575062',
    '320173627129723',
    '1025839491875203',
    
]
folder_0 = '1_1_road'
folder_1 = '1_1_road_general'

count_images_per_version(images=images)
copy_images(images=images, folder_0=folder_0, folder_1=folder_1)

V1_0: 23
V12 : 14
images copied.


#### 1_2_cycleway

In [99]:
images = [
    '972054073520951',
    '978835482856480',
    '1215094608930318',
    '480909003363888',
    '301607931491044',
    '2790059851304330',
    '510299086656980',
    '2249463938545633',
    '1359774084884097',

]
folder_0 = '1_2_bicycle'
folder_1 = '1_2_cycleway'

count_images_per_version(images=images)
copy_images(images=images, folder_0=folder_0, folder_1=folder_1)

V1_0: 8
V12 : 1
images copied.


#### 1_2_lane

In [100]:
images = [
    '624988475969800',
    '989857672036153',
    '3279640285599984',
    '753310445346502',
    '1407324080073515',

]
folder_0 = '1_2_bicycle'
folder_1 = '1_2_lane'

count_images_per_version(images=images)
copy_images(images=images, folder_0=folder_0, folder_1=folder_1)

V1_0: 5
V12 : 0
images copied.


#### 1_3_footway

In [101]:
images = [
    '1839147786254721',
    '136629909310612',
    '245602324194279',
    '419574676987277',
    '481582810113525',
    '991428678263320',
    '197494732097614',
    '286671793116773',
    '299291965157820',
    '326816338874331',
    '499839418126899',
    '196798938945460',
    '221553542747918',
    '869315276953721',
    '473831378230203',
    '174612247947003',
    '290847905877203',
    '378034133460413',
    '499677204516456',
    '535107560838134',
    '926212044609318',
    '945279133269517',
    '1103271800178577',
    '1148622429040835',
    '319481369561010',
    '812781093559893',
    '147832250559631',

]
folder_0 = '1_3_pedestrian'
folder_1 = '1_3_footway'

count_images_per_version(images=images)
copy_images(images=images, folder_0=folder_0, folder_1=folder_1)

V1_0: 24
V12 : 3
images copied.


#### 1_3_railway_platform

In [102]:
images = [
    '484658582793742',

]
folder_0 = '1_3_pedestrian'
folder_1 = '1_3_railway_platform'

count_images_per_version(images=images)
copy_images(images=images, folder_0=folder_0, folder_1=folder_1)

V1_0: 0
V12 : 1
images copied.


#### 1_4_path_unspecified

In [103]:
images = [
    '1026867184724352',
    '2592796111026066',
    '2761012494163934',
    '2854159114839880',
    '442186417698926',
    '487555790173627',
    '218000296514945',
    '219498599981563',
    '222864689372166',
    '244895674067208',
    '487991572804462',
    '506508233820143',
    '463596594870396',
    '124060540040970',
    '223335419220306',
    '241411111107395',
    '263417145477211',
    '328372752084666',
    '406182564813130',
    '517956436289045',
    '737581974277337',
    '747947762539432',
    '1075801136278537',
    '1946275238856744',
    '2804168709834725',
    '115272391257642',
    '116688777432358',
    '150418520289088',
    '159666206102483',
    '293602322216891',
    '294585238740754',
    '298866831687977',
    '302893904569363',
    '367072628017330',
    '379871647104999',
    '446304400560943',
    '489200056203229',
    '499910875082102',
    '529816658428785',
    '900838930761517',
    '1283390972210717',
    '4412362475462200',
    '214962446826304',
    '231778258745160',
    '332147712156247',
    '369922577728324',
    '387727523300875',
    '502981427947417',
    '510760020525622',
    '532060294833202',
    '968744757200696',
    '972874383554080',
    '477090190196309',
    '291853759271655',
    '295433112116798',
    '295969965405543',
    '312308850494391',
    '390634795785055',
    '767159560840267',
    '775730993067142',
    '115596197541937',
    '147155400663559',
    '149580297072234',
    '161085605952805',
    '177035940937162',
    '211837607078863',
    '229997005171631',
    '297364688463362',
    '336639397809408',
    '465828381146658',
    '527611928400376',
    '533989854277826',
    '748644372493436',
    '854499018469277',
    '862926044294045',
    '917164212396751',
    '963070594462537',
    '1101994970276453',
    '1208204933182414',
    '1403630246682750',
    '1424818291203908',
    '1426382634372444',
    '3460673977544242',
    '3497809510452245',
    '3607652592671079',
    '4301645763220600',
    '148645153931760',
    '193172985988756',
    '326695868884464',
    '440440181191674',
    '450855000019304',
    '518554816231216',
    '698649858502682',
    '814950900279217',
    '949587752510420',
    '979924692751693',
    '4022491164502097',
    '180217147289257',
    '472687841714738',
    '789418778880343',
    '1580934648981725',
    '2792191921093322',

]
folder_0 = '1_4_path'
folder_1 = '1_4_path_unspecified'

count_images_per_version(images=images)
copy_images(images=images, folder_0=folder_0, folder_1=folder_1)

V1_0: 80
V12 : 22
images copied.


#### 2_1_all

In [104]:
images = [
    '799077210983910',
    '1375697869572806',
    '388685622320862',
    '614378786172794',
    '753276885302923',
    '2892226361050163',
    '5462830377092634',
    '150287681206019',
    '213893441055859',
    '217046179960564',
    '219182906374670',
    '220854309805609',
    '221613619297854',
    '241075517817065',
    '281343288218409',
    '290615755884615',
    '290647182725960',
    '292308855860665',
    '293772545568960',
    '471160640609447',
    '626801255067300',
    '679336479935005',
    '694568368405325',
    '698056955233999',
    '761178301250563',
    '774084846620088',
    '894431858639782',
    '908532619931153',
    '933077960872514',
    '2806083429705411',
    '4042645752457954',
    '6273449889374724',
    '275700997640876',
    '297267685295531',
    '467515047872343',
    '522383155432702',
    '561243495263912',
    '726159688466990',
    '793180855795498',
    '180689270633134',
    '196310018979669',
    '308681570756945',
    '314571826842492',
    '461836511711416',
    '484206869516268',
    '563614909181287',
    '583781422600969',
    '749419355731155',
    '826858871257367',
    '1055988138140399',
    '1380571488982401',
    '4473506466007149',
    '232730755314875',
    '705360803771160',
    '176078024400410',
    '258242096028720',
    '319083883015178',
    '475570117291326',
    '513751499808441',
    '527181768456836',
    '1109077039584930',
    '1366664490507089',
    '3921022881358538',
    '4118228834905630',

]
folder_0 = '2_1_no_focus_no_street'
folder_1 = '2_1_all'

count_images_per_version(images=images)
copy_images(images=images, folder_0=folder_0, folder_1=folder_1)

V1_0: 30
V12 : 34
images copied.


#### 2_1_surface_covered

In [105]:
images = [
    '1034099547123945',
    '1390553837972341',
    '3454581384778856',

]
folder_0 = '2_1_no_focus_no_street'
folder_1 = '2_1_surface_covered'

count_images_per_version(images=images)
copy_images(images=images, folder_0=folder_0, folder_1=folder_1)

V1_0: 0
V12 : 3
images copied.


### remaining V1_0 images sorted by road type annotation in label studio

In [108]:
# annotated scenery images files
annotated_scenery_files = [
    'annotations_scenery_v3.csv',
    'annotations_scenery_v4a.csv',
    'annotations_scenery_v4b.csv',
]

In [110]:
annot = pd.DataFrame()
for file in annotated_scenery_files:
    df = pd.read_csv(os.path.join(data_metadata_path, file))
    annot = pd.concat([annot, df], ignore_index=True)
annot.set_index('image_id').index.value_counts()

image_id
1002484893825364    1
1205849419855089    1
123319973327041     1
122799386488232     1
1226425001093665    1
                   ..
321890382776443     1
320675572788752     1
320672532916325     1
319113172920647     1
933077960872514     1
Name: count, Length: 6597, dtype: int64

In [115]:
annot = annot[["image_id", "road_scenery"]]
annot["image_id"] = annot["image_id"].astype("string")

In [147]:
print(roads.shape)
remaining = roads[~roads["image_id"].isin(annot["image_id"])].set_index("image_id")
remaining.groupby("roadtype")["mapillary_image_id"].count()

(9122, 11)


roadtype
cycleway               271
cycleway (Hochbord)     12
path (unspecified)     576
pedestrian             554
pedestrian area          2
road / path            613
road(car)              954
sidewalk               149
Name: mapillary_image_id, dtype: int64

In [135]:
directory = focus_data_path
for root, _, fnames in sorted(os.walk(directory, followlinks=True)):
    for fname in sorted(fnames):
        image_id = os.path.splitext(fname)[0]
        if image_id in remaining.index:
            if remaining.loc[image_id]["roadtype"] in [
                'cycleway',
                'cycleway (Hochbord)',
                'cycleway (lane)',
                'pedestrian',
                'sidewalk',
            ]:
                destination_folder_path = os.path.join(path_c, remaining.loc[image_id]["roadtype"])
                os.makedirs(destination_folder_path, exist_ok=True)
                image_filename = os.path.join(root, fname)
                shutil.copy(image_filename, destination_folder_path)

print("images copied.")

images copied.


#### remove test images from

In [151]:
directory = path_c
for root, _, fnames in sorted(os.walk(directory, followlinks=True)):
    for fname in sorted(fnames):
        image_id = os.path.splitext(fname)[0]
        if image_id in remaining.index:
            if remaining.loc[image_id]["train"] == False:
                image_filename = os.path.join(root, fname)
                rel_path = os.path.relpath(root, path_c)
                destination_folder_path = os.path.join(path_test, rel_path)
                os.makedirs(destination_folder_path, exist_ok=True)
                shutil.copy(image_filename, destination_folder_path)

print("images deleted.")

images deleted.
