In [2]:
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
import pyproj
from shapely.ops import transform

## Load data

In [1]:
# Connect to blobfuse folder
os.system('sudo blobfuse /home/azureuser/cloudfiles/code/blobfuse/sidewalk --tmp-path=/mnt/resource/blobfusetmp --config-file=/home/azureuser/cloudfiles/code/blobfuse/fuse_connection_sidewalk.cfg -o attr_timeout=3600 -o entry_timeout=3600 -o negative_timeout=3600 -o allow_other -o nonempty')

0

In [4]:
# Load polygons
polygons = gpd.read_file("/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/tiles/Osdorp complete/22 output/output-Polygons-29-04-2024_17.shp")

In [5]:
# Filter crosswalks polygons
CW_polygons = polygons[polygons['f_type'] == 'crosswalk']

We only want the crosswalks that are in Osdorp-Midden. We filter for those.

In [6]:
# Get Osdorp polygon
Os = pd.read_csv("../data/polygons/polygon Osdorp-Midden")

# Change geometry column to geometry type
Os['geometry'] = gpd.GeoSeries.from_wkt(Os['geometry'])

# Save only polygon as variable
Os_pol = Os.loc[0][0]

  Os_pol = Os.loc[0][0]


In [7]:
# Only keep crosswalk polygons that fall within the bouds of Osdorp-Midden
indexes = []

for CW in CW_polygons.itertuples():
    if not Os_pol.contains(CW[2]):
        indexes.append(CW[0])

# Drop rows based on index
CW_polygons = CW_polygons.drop(index=indexes)

In [8]:
# CW_polygons.to_file("/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/tiles/Osdorp complete/22 output/CW_polygons_22")

In [9]:
# Load CW polygons
CW_pols_20 = gpd.read_file("/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/tiles/Osdorp complete/20 output/CW_polygons_20.shp")
CW_pols_21 = gpd.read_file("/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/tiles/Osdorp complete/21 output/CW_polygons_21.shp")

# Load PSW data
PSW = gpd.read_file("/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/tiles/Osdorp complete/CW_psw_points.shp")

## Transformation
We need to transform the PSW points to the same format as the crosswalk polygons so we can compare them.

In [10]:
project = pyproj.Transformer.from_proj(
    pyproj.Proj(init='epsg:4326'), # source coordinate system
    pyproj.Proj(init='epsg:28992')) # destination coordinate system

def apply_projection(geometry):
    # Your projection transformation code here
    transformed_geometry = transform(project.transform, geometry)
    return transformed_geometry

  in_crs_string = _prepare_from_proj_string(in_crs_string)
  in_crs_string = _prepare_from_proj_string(in_crs_string)


As some PSW points do not overlap with the crosswalk they are supposed to tag, we change the points to bigger polygons. This will make sure we do not miss any true positives because of problems with the validation data.

In [11]:
# Make circle from point
PSW_circle = PSW.copy()
PSW_circle['geometry'] = PSW_circle['geometry'].apply(apply_projection)
PSW_circle['geometry'] = PSW_circle['geometry'].buffer(0.00003)
# PSW_circle.to_file("/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/tiles/Osdorp complete/PSW_WGS_radius.shp")

## Validation

For the validation we need to consider four cases:
- **True Positive**: crosswalk polygon that contains a PSW point.
- **False Positive**: crosswalk polygon that does not contain a PSW point.
- **True Negative**: no crosswalk polygon and no PSW point.
- **False Negative**: PSW point that does not overlap with a crosswalk polygon.

The first validation will be based on whether a project sidewalk point is contained fully in one of the polygons.

In [12]:
# Compute true positives and false positives
def validate(CW_df, PSW_df):
    TP = []
    TP_index = []
    FP = []
    FP_index = []
    FN = []
    FN_index = []

    # List to keep track of PSW points that have a match
    PSW_matches = []

    # Loop over all crosswalks
    for CW in CW_df.itertuples():
        CW_matches = []

        # Loop over all psw points
        for point in PSW_df.itertuples():

            # Check if any of the points is in the crosswalk polygon
            if CW[2].intersects(point[2]):
                CW_matches.append(point[2])
                
                # Add PSW point to matches if it is not already in there
                if point[2] not in PSW_matches:
                    PSW_matches.append(point[2])

        # If a crosswalk has one or more matches, append to true positives
        if len(CW_matches) != 0:
            TP.append([CW[2], CW_matches])
            TP_index.append(CW[0])

        # If a crosswalk has no matches, append to false positives
        if len(CW_matches) == 0:
            FP.append(CW[2])
            FP_index.append(CW[0])

    for point in PSW_df.itertuples():
        if point[2] not in PSW_matches:
            FN.append(point[2])
            FN_index.append(point[0])
    
    return TP, FP, FN, TP_index, FP_index, FN_index, PSW_matches

In [13]:
TP_20, FP_20, FN_20, TP_index_20, FP_index_20, FN_index_20, PSW_matches_20 = validate(CW_pols_20, PSW_circle)
TP_21, FP_21, FN_21, TP_index_21, FP_index_21, FN_index_21, PSW_matches_21 = validate(CW_pols_21, PSW_circle)

# Create dataframes
CW_TP_20 = CW_pols_20.iloc[TP_index_20, :]
CW_FP_20 = CW_pols_20.iloc[FP_index_20, :]
PSW_FN_20 = PSW_og.iloc[FN_index_20, :]
CW_TP_21 = CW_pols_21.iloc[TP_index_21, :]
CW_FP_21 = CW_pols_21.iloc[FP_index_21, :]
PSW_FN_21 = PSW_og.iloc[FN_index_21, :]

print("Total crosswalks PSW: ", len(PSW))
print("-------------------------------------")
print("Results for zoom level 20")
print("Total polygons T2N: ", len(CW_pols_20))
print("True positives: ", len(TP_20))
print("False positives: ", len(FP_20))
print("False negatives: ", len(FN_20))
print("PSW matches: ", len(PSW_matches_20))

print("-------------------------------------")
print("Results for zoom level 21")
print("Total polygons T2N: ", len(CW_pols_21))
print("True positives: ", len(TP_21))
print("False positives: ", len(FP_21))
print("False negatives: ", len(FN_21))
print("PSW matches: ", len(PSW_matches_21))


  return lib.intersects(a, b, **kwargs)


NameError: name 'PSW_og' is not defined

In [None]:
# Compute accuracy, precision, and recall
def metrics(TP, FP, FN, zoom):
    precision = len(TP) / (len(TP) + len(FP))
    recall = len(TP) / (len(TP) + len(FN))

    print("Zoom level "+zoom+":")
    print("Precision: ", precision)
    print("Recall: ", recall)

    return precision, recall

In [None]:
prec_20, rec_20 = metrics(TP_20, FP_20, FN_20, "20")
print("--------------------------------------------")
prec_21, rec_21 = metrics(TP_21, FP_21, FN_21, "21")

Zoom level 20:
Precision:  0.75
Recall:  0.75
--------------------------------------------
Zoom level 21:
Precision:  0.7327586206896551
Recall:  0.8947368421052632


In [None]:
# CW_TP_21.to_file("/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/tiles/Osdorp complete/21 output/CW_TP_radius_21.shp")
# CW_FP_21.to_file("/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/tiles/Osdorp complete/21 output/CW_FP_radius_21.shp")
# PSW_FN_21.to_file("/home/azureuser/cloudfiles/code/blobfuse/sidewalk/processed_data/crossings_project/tiles/Osdorp complete/21 output/PSW_WSG_FN_radius_21.shp")