In [110]:
from shapely.geometry import Point, Polygon
import pandas as pd
import geopandas as gpd
import pyproj
from shapely.ops import transform
from shapely import wkt
import shapely
import pickle

## Load data

In [111]:
# # Connect to blobfuse folder
os.system('sudo blobfuse /home/azureuser/cloudfiles/code/blobfuse/sidewalk --tmp-path=/mnt/resource/blobfusetmp --config-file=/home/azureuser/cloudfiles/code/blobfuse/fuse_connection_sidewalk.cfg -o attr_timeout=3600 -o entry_timeout=3600 -o negative_timeout=3600 -o allow_other -o nonempty')
os.system('sudo blobfuse /home/azureuser/cloudfiles/code/blobfuse/ovl --tmp-path=/mnt/resource/blobfusetmp --config-file=/home/azureuser/cloudfiles/code/blobfuse/fuse_connection_ovl.cfg -o attr_timeout=3600 -o entry_timeout=3600 -o negative_timeout=3600 -o allow_other -o nonempty')

0

In [112]:
# Load polygons as put out by T2N
raw_polygons = gpd.read_file("/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/T2N output/Grachtengordel West/output-Polygons-22-05-2024_18.shp")

In [123]:
# Filter for crosswalks polygons
CW_polygons = raw_polygons[raw_polygons['f_type'] == 'crosswalk']
CW_polygons = CW_polygons.drop(columns='f_type')

In [143]:
# Set CRS
CRS = "epsg:4326"

We only want the crosswalks that are in the area we are interested in. T2N has generated crosswalks for a larger area so we filter for the correct areas.

In [115]:
# Get polygon of area (as created in 1a)
GW_polygon = pd.read_csv("../data/polygons/Grachtengordel West polygon.csv")

# Separate the polygon string
pol_string = GW_polygon.loc[0][0]

# Set polygon to correct format
GW_polygon = wkt.loads(pol_string)

  pol_string = GW_polygon.loc[0][0]


In [124]:
# Only keep crosswalk polygons that fall within the bouds of the area
indexes = []

for CW in CW_polygons.itertuples():
    if not (GW_polygon.intersects(CW[1])):
        indexes.append(CW[0])

# Drop rows based on index
CW_polygons = CW_polygons.drop(index=indexes)
CW_polygons = CW_polygons.reset_index(drop=True)

In [81]:
# Save the filtered polygons
CW_polygons_path = "/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/T2N output/Grachtengordel West/CW polygons"

CW_polygons.to_file(CW_polygons_path)

In [128]:
# Retrieve Project Sidewalk gdf as created in 1a
PSW_og = pd.read_csv("../data/PSW dfs/Grachtengordel CW PSW")

In [129]:
# Drop the unnecessary columns from the project sidewalk dataframe
PSW = PSW_og.copy(deep=True)

# Set correct format polygons
PSW['geometry'] = PSW['geometry'].apply(wkt.loads)

# Drop columns 
PSW = PSW.drop(['Unnamed: 0','attribute_id', 'osm_street_id', 'severity', 'is_temporary', 'image_capture_date', 'agree_count', 'disagree_count', 'notsure_count', 'label_tags'], axis=1)

# Save as gdf
PSW_gdf = gpd.GeoDataFrame(PSW, geometry='geometry')

## Transformation
We need to transform the PSW points to the same format as the crosswalk polygons so we can compare them.

In [131]:
project = pyproj.Transformer.from_proj(
    pyproj.Proj(init='epsg:28992'), # source coordinate system
    pyproj.Proj(init='epsg:4326')) # destination coordinate system

# Function to transform the geometry of a gdf
def apply_projection(geometry):
    transformed_geometry = transform(project.transform, geometry)
    return transformed_geometry

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)


As some PSW points do not overlap with the crosswalk they are supposed to tag, we change the points to bigger polygons. This will make sure we do not miss any true positives because of problems with the validation data.

In [134]:
# Copy the original PSW data twice
PSW_point = PSW_gdf.copy()
PSW_radius = PSW_gdf.copy()

# Buffer the original PSW to create a circle instead of a point
PSW_radius['geometry'] = PSW_radius['geometry'].buffer(4)

# Apply the projection to both gdfs to transform the coordinates
PSW_point['geometry'] = PSW_point['geometry'].apply(apply_projection)
PSW_radius['geometry'] = PSW_radius['geometry'].apply(apply_projection)

In [88]:
# Save the PSW points and the PSW circles for visual analysis in QGIS
point_path = "/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/T2N output/Grachtengordel West/PSW point.shp"
radius_path = "/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/T2N output/Grachtengordel West/PSW radius.shp"

PSW_point.to_file(point_path)
PSW_radius.to_file(radius_path)

## Validation

For the validation we need to consider four cases:
- **True Positive**: crosswalk polygon that contains a PSW point.
- **False Positive**: crosswalk polygon that does not contain a PSW point.
- **True Negative**: no crosswalk polygon and no PSW point.
- **False Negative**: PSW point that does not overlap with a crosswalk polygon.

The first validation will be based on whether a project sidewalk point is contained fully in one of the polygons.

In [163]:
# Compute true positives and false positives
def validate(CW_gdf, PSW_df):

    CW_list = list(CW_gdf['geometry'])

    TP = []
    TP_index = []
    FP = []
    FP_index = []
    FN = []
    FN_index = []


    # List to keep track of PSW points that have a match
    PSW_matches = []

    # Loop over all crosswalks
    for i in range(0, len(CW_list)):

        polygon = CW_list[i]

        polygon_matches = []

        for annotation in PSW_df.itertuples():
            PSW_polygon = annotation[1]
            PSW_index = annotation[0]

            if polygon.intersects(PSW_polygon):
                polygon_matches.append(PSW_index)

                if PSW_index not in PSW_matches:
                    PSW_matches.append(PSW_index)
        
        if len(polygon_matches) > 0:
            TP.append([polygon, polygon_matches])
            TP_index.append(i)

        else:
            #print("adding to FP")
            FP.append(polygon)
            FP_index.append(i)

    for i in range(0, len(PSW_df)):
        if i not in list(PSW_matches):
            FN.append(PSW_df.iloc[i][0])
            FN_index.append(i)
    
    # Print results
    print("TP: ", len(TP))
    print("FP: ", len(FP))
    print("FN: ", len(FN))
    print("\n")
    print("all PSW crosswalks: ", len(PSW_radius))
    print("PSW crosswalks identified: ", len(PSW_matches))
    print("total polygons T2N: ", len(FP) + len(TP))
    print("\n")

    precision = len(TP) / (len(TP) + len(FP))
    recall = len(TP) / (len(TP) + len(FN))

    print("Precision: ", precision)
    print("Recall: ", recall)
    print("\n")

    # Create gdfs of polygons
    TP_pols = []
    for i in TP_index:
        TP_pols.append(CW_polygons.iloc[i]['geometry'])

    FP_pols = []
    for i in FP_index:
        FP_pols.append(CW_polygons.iloc[i]['geometry'])

    TP_pols_df = pd.DataFrame({'geometry': TP_pols})
    TP_pols_gdf = gpd.GeoDataFrame(TP_pols_df, geometry='geometry')
    TP_pols_gdf = TP_pols_gdf.set_crs(CRS)

    FP_pols_df = pd.DataFrame({'geometry': FP_pols})
    FP_pols_gdf = gpd.GeoDataFrame(FP_pols_df, geometry='geometry')
    FP_pols_gdf = FP_pols_gdf.set_crs(CRS)

    FN_radius_df = pd.DataFrame({'geometry': FN})
    FN_radius_gdf = gpd.GeoDataFrame(FN_radius_df, geometry='geometry')
    FN_radius_gdf = FN_radius_gdf.set_crs(CRS)
  

    return TP_pols_gdf, FP_pols_gdf, FN_radius_gdf, PSW_matches

In [164]:
TP_pols_gdf, FP_pols_gdf, FN_radius_gdf, PSW_matches = validate(CW_polygons, PSW_radius)

TP:  39
FP:  26
FN:  23


all PSW crosswalks:  67
PSW crosswalks identified:  44
total polygons T2N:  65


Precision:  0.6
Recall:  0.6290322580645161




  FN.append(PSW_df.iloc[i][0])


In [93]:
# Save TP, FP and FN polygons
TP_path = "/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/T2N output/Grachtengordel West/TP polygons radius.shp"
FP_path = "/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/T2N output/Grachtengordel West/FP polygons radius.shp"
FN_path = "/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/T2N output/Grachtengordel West/FN polygons radius.shp"

TP_pols_gdf.to_file(TP_path)
FP_pols_gdf.to_file(FP_path)
FN_radius_gdf.to_file(FN_path)