In [70]:
import os
import pandas as pd
import geopandas as gpd
import requests
import json

output_file = os.path.join("res/dataset_PS/centrum_west_small/backprojected/object_locations.csv")

'''Get labels from Project Sidewalk'''

base_url = 'https://sidewalk-amsterdam.cs.washington.edu/v2/access/attributesWithLabels?' + \
'lat1={}&lng1={}&lat2={}&lng2={}' 
whole = (52.303, 4.8, 52.425, 5.05)
#centrum_west = (52.364925, 4.87444, 52.388692, 4.90641)
coords = whole
url = base_url.format(*coords)
local_dump = url.replace('/', '|')
try:
    project_sidewalk_labels = json.load(open(local_dump, 'r'))
except Exception as e:
    print("Couldn't load local dump")
    project_sidewalk_labels = requests.get(url.format(*coords)).json()
    json.dump(project_sidewalk_labels, open(local_dump, 'w'))

od_labels_path = pd.read_csv(output_file)
# Convert od_labels_path to a geopandas dataframe
od_labels_df = gpd.GeoDataFrame(od_labels_path, geometry=gpd.points_from_xy(od_labels_path.lon, od_labels_path.lat))
print('Number of od_labels: {}'.format(len(od_labels_df)))

# For fairness, remove duplicates from od_labels_df based on pano_id
od_labels_df = od_labels_df.drop_duplicates(subset=['pano_id'])
print('Number of od_labels after removing duplicates: {}'.format(len(od_labels_df)))

Number of od_labels: 1940
Number of od_labels after removing duplicates: 970


In [72]:
ps_labels_df = gpd.GeoDataFrame.from_features(project_sidewalk_labels['features'])
print('Number of labels before filtering: {}'.format(len(ps_labels_df)))
ps_labels_df = ps_labels_df[ps_labels_df['label_type'] == 'Obstacle']
print('Number of labels after filtering: {}'.format(len(ps_labels_df)))
# Filter ps_labels so that it only contains labels where "gsv_panorama_id" is in the list of pano_ids in od_labels
ps_labels_df = ps_labels_df[ps_labels_df["gsv_panorama_id"].isin(od_labels_df["pano_id"])]
print('Number of PS labels after filtering per panos: {}'.format(len(ps_labels_df)))

# create a circle buffer around the points in ps_labels_df
buffer_distance = 0.000106 # meters    
gpd_ps_labels_df = gpd.GeoDataFrame(geometry=ps_labels_df.geometry.buffer(buffer_distance))

od_labels_df = od_labels_df.reset_index(drop=True)
gpd_ps_labels_df = gpd_ps_labels_df.reset_index(drop=True)

# join the two dataframes based on the points that intersect the buffered points in ps_labels_buffered
joined_df = gpd.sjoin(od_labels_df, gpd_ps_labels_df, predicate='within')

print(len(od_labels_df))
print(len(gpd_ps_labels_df))
print(len(joined_df))



Number of labels before filtering: 10616
Number of labels after filtering: 1728
Number of PS labels after filtering per panos: 1460
970
1460
874


In [None]:
# Visualize the joined dataframe
import folium

hmap = folium.Map(location=[52.3676, 4.90], zoom_start=12, tiles='stamentoner',)

# Visualize pd_labels_df
for i in range(len(ps_labels_df)):
    folium.CircleMarker(location=[row["geometry"].y, row["geometry"].x],
                        radius=1,
                        color='red',
                        fill=True,
                        fill_color='red',
                        fill_opacity=0.5).add_to(hmap)
    
# Visualize od_labels_df

In [73]:
# Calculate the number of correct and incorrect predictions
num_correct = len(joined_df)
num_incorrect = len(od_labels_df) - num_correct

# Calculate accuracy
accuracy = num_correct / len(od_labels_df)

# Calculate precision and recall
precision = num_correct / len(ps_labels_df)
recall = num_correct / len(od_labels_df)

# Create a pandas dataframe to store the results
metrics_df = pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall]})

# Print the results
print(metrics_df)

   Accuracy  Precision    Recall
0  0.901031    0.59863  0.901031
