## imports

In [1]:
import os
os.environ['USE_PYGEOS'] = '0'
import json
import uuid
import geopandas as gpd
from geopandas import sjoin_nearest
import requests
import pandas as pd
from shapely.geometry import Point, shape , Polygon, LineString ,MultiPolygon
from tqdm import tqdm
import numpy as np
import time
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns
import ORS      # import from Ors module folder
from ORS.client import RoutingClient
from ORS.route import Route
from matplotlib.lines import Line2D
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Define data and output folder paths
data_folder_path = os.path.join('..', '..', 'data', '01_input_data')

routing_output_folder_path = os.path.join('..', '..', 'data', '02_intermediate_output', 'routing')

In [152]:
# load the young population grids already created in  QGIS
young_data = gpd.read_file(os.path.join(data_folder_path, 'young_population_grid.gpkg'))

# load the schools data
schools_osm = gpd.read_file(os.path.join(data_folder_path, 'schools_osm.gpkg'))


## preprocess data for routing

In [153]:
# Filter only school amenities from OSM data
schools_osm = schools_osm[schools_osm['amenity'] == 'school']

# Calculate centroids for young data
young_data['geometry'] = young_data['geometry'].centroid

# Perform a spatial join to find the nearest school for each young data point
joined_young_schools = gpd.sjoin_nearest(young_data, schools_osm, distance_col="distance_to_school",
                                         lsuffix="young", rsuffix="school")

# Change CRS to EPSG 4326 for the joined data
joined_young_schools = joined_young_schools.to_crs(epsg=4326)

# Change CRS to EPSG 4326 for the schools' geometry data
schools_osm_geometry = schools_osm[['osm_id', 'geometry']].copy()
schools_osm_geometry = schools_osm_geometry.to_crs(epsg=4326)

route_df = joined_young_schools.merge(schools_osm_geometry, on='osm_id', how='left', suffixes=('', '_school'))

route_df.head()

Unnamed: 0,Young mean,Id,sum,geometry,index_school,osm_id,amenity,distance_to_school,geometry_school
0,0.004387,0,0.001152,POINT (-43.79522 -22.90463),701,823942258,school,7382.502411,POINT (-43.72337 -22.90078)
1,0.004881,1,0.008887,POINT (-43.79527 -22.90586),701,823942258,school,7396.860207,POINT (-43.72337 -22.90078)
2,0.006552,2,0.00057,POINT (-43.79516 -22.90699),701,823942258,school,7395.964279,POINT (-43.72337 -22.90078)
3,0.005898,3,0.001244,POINT (-43.79352 -22.90309),701,823942258,school,7200.536924,POINT (-43.72337 -22.90078)
4,0.005199,4,0.028538,POINT (-43.79408 -22.90414),701,823942258,school,7262.701492,POINT (-43.72337 -22.90078)


## Defining avopid polygon  routes 

In [None]:
# Read AVOID POLYGON
geojson_file_path = os.path.join(data_folder_path, 'dispute_area.geojson')

with open(geojson_file_path, 'r') as f:
    geojson_data = json.load(f)

In [3]:
# Extract coordinates from the GeoJSON file
coordinates = []

for feature in geojson_data['features']:
    if feature['geometry']['type'] == 'MultiPolygon':
        for polygon in feature['geometry']['coordinates']:
            coordinates.append(polygon)
    elif feature['geometry']['type'] == 'Polygon':
        coordinates.append(feature['geometry']['coordinates'])

# Create the avoid_polygons variable
avoid_polygons = {
    "coordinates": coordinates,
    "type": "MultiPolygon"
}

In [4]:
# Add intersecting column of avoid polygon for  short trips
polygons = []
for feature in geojson_data['features']:
    geometry = feature['geometry']
    if geometry['type'] == 'MultiPolygon':
        for polygon in geometry['coordinates']:
            polygons.append(shape({'type': 'Polygon', 'coordinates': polygon}))
    elif geometry['type'] == 'Polygon':
        polygons.append(shape(geometry))

avoid_multipolygon = MultiPolygon(polygons)

## ORS route functions

 check if local ORS build running here >>> http://localhost:8081/ors/v2/health 

In [3]:
ors_url=" http://localhost:8081/ors" 
profile = "foot-walking" 
ors_client = RoutingClient(base_url=ors_url)

In [157]:
def make_default_ors_request(coord1, coord2, csv_factor, avoid_polygons=None):
    """
    Create a default OpenRouteService request.
    """  

    preferences = "shortest"      # can be preferences = "fastest" 
    coord1 = (coord1.x, coord1.y)    # Convert coordinates to tuples
    coord2 = (coord2.x, coord2.y)
   
    extra = ['csv'] 
    col = 'crime_index' # csv column defined in the csv file 

    default_ors_request = {
        "instructions": False,
        "preference": preferences,
        "elevation": False,
        "continue_straight": True,
        "extra_info": extra,
        "options": {
            "avoid_features": ["ferries"],
            "profile_params": {
                "weightings": {
                    "csv_factor": csv_factor, # csv factor range from 0 to 1 (o.0/1.0)
                    "csv_column": col
                }
            }
        },
        "coordinates": [coord1, coord2],
    }
    if avoid_polygons is not None:
        default_ors_request["options"]["avoid_polygons"] = avoid_polygons # add avoid polygons to the request if provided
    
    return default_ors_request


def fetch_route_data(coord1, coord2, csv_factor, avoid_polygons=None):
    """
    Helper function to make a request to the API and retrieve the route data.
    """
    try:
        # Prepare the API request parameters
        default_ors_request = make_default_ors_request(coord1, coord2, csv_factor, avoid_polygons)
        
        # Make the API call and get the response
        normal_response = ors_client.request(params=default_ors_request, profile=profile, format="geojson")
        
        # Extract route data from the response
        normal_route = normal_response.routes[0]
        df = normal_route.as_dataframe()
        
        return df, normal_route
    
    except Exception as e:
        return None, None


In [6]:
def ors(gdf, csv_factor, avoid_polygons=None):
    """
    Calculate route information from each origin to the nearest school.
    """
    # Ensure 'geometry' and 'geometry_school' columns are points
    gdf['geometry'] = gdf['geometry'].apply(lambda x: x if isinstance(x, tuple) else x)
    gdf['geometry_school'] = gdf['geometry_school'].apply(lambda x: x if isinstance(x, tuple) else x)
    
    route_geometries = []
    route_distances = []
    route_durations = []
    route_csv_means = []
    route_csv_maxes = []
    route_ids = []  # List to store respective IDs
    
    for index, row in tqdm(gdf.iterrows(), total=gdf.shape[0]):
        # Fetch route data
        df, normal_route = fetch_route_data(row['geometry'], row['geometry_school'], csv_factor, avoid_polygons)
        
        if df is not None and normal_route is not None:
            csv_mean = df['csv'].mean()  # Get the mean value of the CSV column
            csv_max = df['csv'].max()    # Get the maximum value of the CSV column
            dis = normal_route.distance  # Get the distance of the route in meters
            dur = normal_route.duration  # Get the duration of the route in seconds
            geom = normal_route.geometry # Get the geometry of the route
            
            # Append to lists
            route_geometries.append(geom)
            route_distances.append(dis)
            route_durations.append(dur)
            route_csv_means.append(csv_mean)
            route_csv_maxes.append(csv_max)
            route_ids.append(row['Id'])  # Store respective ID
        else:
            # Append none values
            route_geometries.append(None)
            route_distances.append(None)
            route_durations.append(None)
            route_csv_means.append(None)
            route_csv_maxes.append(None)
            route_ids.append(row['Id'])  # Store respective ID

    # Create a new GeoDataFrame
    new_gdf = gpd.GeoDataFrame(gdf)  # Copy the original GeoDataFrame
    new_gdf['route_geometry'] = route_geometries  # Add route geometries
    new_gdf['route_distance'] = [d / 1000 if d is not None else None for d in route_distances]  # Convert route distance from meters to kilometers
    new_gdf['route_duration'] = [d / 60 if d is not None else None for d in route_durations]  # Convert route duration from seconds to minutes
    new_gdf['route_csv_mean'] = route_csv_means # Add route CSV means
    new_gdf['route_csv_max'] = route_csv_maxes  # Add route CSV maximum values
    new_gdf['Id'] = route_ids  # Add respective IDs
    
    return new_gdf

In [159]:
def save_allroute(input_df, output_filename):
    # Select necessary columns and rename geometry column
    gdf = input_df[['route_geometry', 'Id', 'osm_id', 'route_distance', 'route_duration','route_csv_max', 'route_csv_mean']].copy()
    gdf = gdf.rename(columns={'route_geometry': 'geometry'})
    # Drop rows with missing geometry
    gdf = gdf.dropna(subset=['geometry'])
    #Create GeoDataFrame
    gdf = gpd.GeoDataFrame(gdf, geometry='geometry')
    # Set CRS to EPSG:4326
    gdf.crs = "EPSG:4326"
    # Save to file
    gdf.to_file(output_filename, driver='GPKG')
    
    return gdf

run route request for short and safe route and save dfs

In [162]:
short_df = ors(route_df, csv_factor=0.0, avoid_polygons= None)

100%|██████████| 200/200 [00:14<00:00, 13.61it/s]


In [163]:
file_path = os.path.join(routing_output_folder_path, 'Short_df_lines.gpkg')
short_df_lines = save_allroute(short_df, file_path)

In [164]:
# add a column with 0 or 1 if it intersects with avoid polygon
short_df_lines['intersects_polygon'] = short_df_lines['geometry'].apply(lambda x: 0 if avoid_multipolygon.intersects(x) else 1)


In [165]:
safe_df = ors(route_df, csv_factor=1.0, avoid_polygons=avoid_polygons)

  0%|          | 0/200 [00:00<?, ?it/s]

100%|██████████| 200/200 [00:24<00:00,  8.19it/s]


In [166]:
file_path = os.path.join(routing_output_folder_path, 'safe_df_lines.gpkg')
safe_df_lines = save_allroute(safe_df, file_path)

## Filter ID if not starting & ending at same point

In [175]:

# Create copies of DataFrames for processing
filtered_short_df = short_df_lines.copy()
filtered_safe_df = safe_df_lines.copy()

# Initialize columns for start and end matching indicators
filtered_safe_df['start_match'] = ''
filtered_safe_df['end_match'] = ''

# Iterate over rows in filtered_short_df to filter safe routes
for idx, row in tqdm(filtered_short_df.iterrows(), total=len(filtered_short_df), desc='Filtering safe routes'):
    origin_id = row['Id']
    
    # Fetch geometries from filtered_safe_df and filtered_short_df
    safe_geometry = filtered_safe_df.loc[filtered_safe_df['Id'] == origin_id, 'geometry'].iloc[0] if not filtered_safe_df.loc[filtered_safe_df['Id'] == origin_id].empty else None
    short_geometry = filtered_short_df.loc[filtered_short_df['Id'] == origin_id, 'geometry'].iloc[0] if not filtered_short_df.loc[filtered_short_df['Id'] == origin_id].empty else None
    
    # Check if both geometries exist
    if safe_geometry is not None and short_geometry is not None:
        # Check if start coordinates match
        if safe_geometry.coords[0] != short_geometry.coords[0]:
            filtered_safe_df.loc[filtered_safe_df['Id'] == origin_id, 'start_match'] = 'Mismatch'
        
        # Check if end coordinates match
        if safe_geometry.coords[-1] != short_geometry.coords[-1]:
            filtered_safe_df.loc[filtered_safe_df['Id'] == origin_id, 'end_match'] = 'Mismatch'
    else:
        # Handle cases where one or both geometries are None
        if safe_geometry is None:
            filtered_safe_df.loc[filtered_safe_df['Id'] == origin_id, 'start_match'] = 'No Safe Geometry'
            filtered_safe_df.loc[filtered_safe_df['Id'] == origin_id, 'end_match'] = 'No Safe Geometry'
        if short_geometry is None:
            filtered_safe_df.loc[filtered_safe_df['Id'] == origin_id, 'start_match'] = 'No Short Geometry'
            filtered_safe_df.loc[filtered_safe_df['Id'] == origin_id, 'end_match'] = 'No Short Geometry'

# Identify IDs to keep based on start or end matching criteria
cleaned_safe_ids = filtered_safe_df[((filtered_safe_df['start_match'] != 'Mismatch') & (filtered_safe_df['end_match'] != 'Mismatch'))]['Id']
cleaned_short_ids = filtered_short_df[filtered_short_df['Id'].isin(cleaned_safe_ids)]['Id']

# Filter DataFrames based on identified IDs to keep
filtered_safe_df = filtered_safe_df[filtered_safe_df['Id'].isin(cleaned_safe_ids)]
filtered_short_df = filtered_short_df[filtered_short_df['Id'].isin(cleaned_short_ids)]

filtered_merged_data = route_df[route_df['Id'].isin(cleaned_safe_ids)]


Filtering safe routes:   0%|          | 0/132 [00:00<?, ?it/s]

Filtering safe routes: 100%|██████████| 132/132 [00:01<00:00, 97.46it/s] 


## get processed data for each rotues

This function calculates similarity ratios, filters inaccessible routes, categorizes route durations and distances, computes percentage increases, and calculates increases in crime mean and max.

In [176]:
def share_ratio(geometry1, geometry2):
    intersection = geometry1.intersection(geometry2)
    if intersection.is_empty:
        return 0.0
    else:
        return (intersection.length / geometry1.length) * 100
    

def get_processed_data(csv, merged_data, short_df, avoid_polygons):
    
    safe_df = ors(merged_data, csv_factor=csv, avoid_polygons=avoid_polygons)    # Construct the ORS URL
    
    similarity_ratios = []
    for idx, row in tqdm(short_df.iterrows(), total=len(short_df), desc='Calculating similarity ratios'):
        origin_id = row['Id']
        # Check if origin_id exists in safe_df
        if origin_id in safe_df['Id'].values:
            safe_geometry = safe_df.loc[safe_df['Id'] == origin_id, 'route_geometry'].iloc[0]
            short_geometry = short_df.loc[short_df['Id'] == origin_id, 'geometry'].iloc[0]
            if safe_geometry is not None and short_geometry is not None:
                similarity_ratio = share_ratio(short_geometry, safe_geometry)
            else:
                similarity_ratio = float('nan')  # or 0.0
        else:
            similarity_ratio = float('nan')  # or 0.0
            
        similarity_ratios.append(similarity_ratio)

    safe_df['similarity_ratio'] = similarity_ratios

    # Merge data
    merged_df = pd.merge(short_df[['Id',  'route_duration','route_distance', 'route_csv_max', 'route_csv_mean','intersects_polygon']], 
                         safe_df[['Id', 'route_duration','route_distance', 'route_csv_max', 'route_csv_mean','similarity_ratio','total_population']], 
                         on='Id', suffixes=('_short', '_safe'))
    # Define bins and labels
    bins = [0, 10, 20, 30, 40, 50, float('inf')]
    bin_labels = ['0-10', '10-20', '20-30', '30-40', '40-50', '50+']
    merged_df['duration_bins_short'] = pd.cut(merged_df['route_duration_short'], bins=bins, labels=bin_labels, right=False)
    merged_df['duration_bins_safe'] = pd.cut(merged_df['route_duration_safe'], bins=bins, labels=bin_labels, right=False)
    
    bins = [0, 1, 2, 3, 4, 5, float('inf')]
    bin_labels = ['0-1', '1-2', '2-3', '3-4', '4-5', '5+']
    merged_df['distance_bins_safe'] = pd.cut(merged_df['route_distance_safe'], bins=bins, labels=bin_labels, right=False)
    merged_df['distance_bins_short'] = pd.cut(merged_df['route_distance_short'], bins=bins, labels=bin_labels, right=False)


    # Calculate percentage increase
    merged_df['percentage_increase_dur'] = ((merged_df['route_duration_safe'] - merged_df['route_duration_short']) / merged_df['route_duration_short']) * 100
    merged_df['percentage_increase_dis'] = ((merged_df['route_distance_safe'] - merged_df['route_distance_short']) / merged_df['route_distance_short']) * 100

    merged_df[(merged_df['percentage_increase_dur'] < 400)]  #### filter ouliers

    # Calculate increase ratio for max intersecting safety score
    merged_df['increase_ratio_mean'] = ((merged_df['route_csv_mean_safe'] - merged_df['route_csv_mean_short']) / merged_df['route_csv_mean_short'] * 100) 
    merged_df['increase_ratio_max'] = ((merged_df['route_csv_max_safe'] - merged_df['route_csv_max_short']) / merged_df['route_csv_max_short']* 100)
    # inf_rows = processed_data_10[processed_data_10["increase_ratio_mean"] == float("inf")] # max is also same rows

    merged_df = merged_df.dropna(subset=['route_duration_short', 'route_duration_safe'])
    
    return merged_df, safe_df


In [182]:
csv_values = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 , 1.0] #Run for differnt csv weights

for csv in csv_values:
    
    processed_data, safe_df = get_processed_data(csv, filtered_merged_data, filtered_short_df, avoid_polygons)
    
    csv_filename = os.path.join(routing_output_folder_path, f'route_data_{csv}.csv') # save the processed data
  
    processed_data.to_csv(csv_filename, index=False)

    # gpkg_filename = os.path.join(routing_output_folder_path, f'safe_df_csv_{csv}.gpkg') # save the safe data (optinal)

    # save_allroute(safe_df, gpkg_filename)

## save route segements of every route

In [180]:
def ors_df(gdf, csv_factor, avoid_polygons=None):
    """
    Process origins and create a new GeoDataFrame with updated columns, then save route data as CSV files.
    """    
    os.makedirs(routing_output_folder_path, exist_ok=True)
    
    gdf['geometry'] = gdf['geometry'].apply(lambda x: x if isinstance(x, tuple) else x)
    gdf['geometry_school'] = gdf['geometry_school'].apply(lambda x: x if isinstance(x, tuple) else x)

    # Iterate over each row in the GeoDataFrame
    for _, row in gdf.iterrows():
        coord1 = row['geometry']
        coord2 = row['geometry_school']
        row_id = row['Id']

        # Fetch route data
        df, _ = fetch_route_data(coord1, coord2, csv_factor, avoid_polygons)

        if df is not None:
            # Create a subdirectory named after the csv_factor inside the output folder
            dir_name = os.path.join(routing_output_folder_path, f'csv_factor_{csv_factor}')
            os.makedirs(dir_name, exist_ok=True)

            # Define the file name using the row ID
            file_name = os.path.join(dir_name, f"{row_id}.csv")

            # Save the DataFrame to a CSV file
            df.to_csv(file_name, index=False)


In [181]:
ors_df(filtered_merged_data, csv_factor=0.0,avoid_polygons= None)  # save Shortest route

In [None]:

ors_df(filtered_merged_data, csv_factor=1.0,avoid_polygons=avoid_polygons) # save Safest route