# Proposed Roads Processing and Raster Export

This notebook processes a collection of line vectors into raster files with the 'speed' attribute used as the raster pixel value.

In [68]:
import os, sys
from datetime import date

import common_rasterio_ops as rast_ops

import re
import numpy as np
from numpy import random

import rasterio
from rasterio import features, transform
from rasterio.transform import Affine
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.io import MemoryFile

import pandas as pd
import geopandas as gpd

import shapely
from shapely.geometry import shape, Point, LineString, MultiLineString

sys.path.append('../../src/')

from gostrocks.src.GOSTRocks.misc import tPrint

### Setup

File paths

In [6]:
geo_pth = r'P:\PAK\GEO'
data_dir = r'../../data'

rast_dir = r'rast_inputs'
vect_in_dir = r'vect_inputs'
vect_out_dir = r'vect_out'

rd_dir = r'roads'
dest_dir = r'destinations'
speed_dir = r'speed'
fric_dir = r'friction'
acc_dir = r'access'

Date

In [71]:
today = date.today().strftime("%y%m%d")

Projections

In [7]:
# change this to whatever the desired metric output projection is -- this notebook assumes you want outputs in meters
dest_crs = 'EPSG:32642'

Load in KP as clipping object

In [8]:
kp = gpd.read_file(os.path.join(geo_pth,r'Boundaries/OCHA/pak_admbnda_adm1_ocha_pco_gaul_20181218.shp'))
kp = kp[kp['ADM1_EN'] == 'Khyber Pakhtunkhwa']
kp = kp.to_crs(dest_crs)

# Buffer the polygon by 20km so we take in nearby markets and roads that may be used
kp.geometry = kp.buffer(20000)

### Important file names

In [11]:
prop_tab_raw = r'Proposed_raw//needed_proposed_roads_data_211017.csv'
pgr = r'Proposed_raw//KP_RoadforUpgrade_Correct_211014'
prop_geo_raw = pgr +'.gpkg'

In [12]:
prop_geo_clean = pgr + '_clean.shp'

### Pre-processing

Tabular

In [13]:
prop_tab = pd.read_csv(os.path.join(data_dir,rd_dir,prop_tab_raw),na_values='')

In [14]:
prop_tab[::20]

Unnamed: 0.1,Unnamed: 0,Road_ID,Road_Name,Improvement Description,Spatial data missing?,Krishna Comments,District,Lat_Start,Lon_Start,Lat_End,...,Avg_Altitude,Avg_Slope,Terrain,Pavement_Type,Current_Road_Cond,Current_Surface,Current_Road_Class,Upgrade_Road_Cond,Upgrade_Surface,Upgrade_Road_Class
0,0,1,Green Lasht - Lot Oweer Road (via Barum-Shahbr...,,,,Upper Chitral,,,,...,2478.186498,21.519165,Hills,Shingle,Poor,Earthen,Collector Road,Very Good,Asphaltic,Collector Road
20,20,21,Hangu Shahukhel Raisan Road,,,,Hangu,,,,...,781.454841,4.910391,Plains,TST,Poor,Paved,District Road,Very Good,Asphaltic,District Road
40,40,41,Palosa Sar to Ghundi Mirkhan khel UC Ghundi Mi...,,"Coordinates submitted, reviewing",Need to check the road (Coordinates are not in...,Karak,,,,...,,,,,Poor,Gravel,,Very Good,Asphaltic,District Road
60,60,61,Ama Khel to Darra Bain Road,,"Coordinates submitted, reviewing",Need to check the road,Tank,32.3066,70.50648,,...,,,Plains,TST,Poor,Gravel,,Very Good,Asphaltic,District Road
80,80,81,Sirajudin Madrassa to Bar Kambarkhel,,,,Khuber,,,,...,683.422647,5.233356,Plains,,Poor,Paved,District Road,Very Good,Asphaltic,District Road


In [15]:
na_fill_dict = {
    'Current_Road_Cond' : 'Poor',
    'Current_Surface' : 'Unpaved',
    'Current_Road_Class' : 'Access Road',
    'Upgrade_Road_Cond' : 'Very Good',
    'Upgrade_Surface' : 'Asphaltic',
    'Upgrade_Road_Class' : 'District Road'
}

In [16]:
prop_tab = prop_tab.fillna(na_fill_dict)

In [17]:
prop_tab.to_csv(os.path.join(data_dir,rd_dir,r'Proposed_raw//needed_proposed_roads_data_211004_nafill.csv'))

In [18]:
prop_tab.columns

Index(['Unnamed: 0', 'Road_ID', 'Road_Name', 'Improvement Description',
       'Spatial data missing?', 'Krishna Comments', 'District', 'Lat_Start',
       'Lon_Start', 'Lat_End', 'Lon_End', 'Current_Road_Class_Raw',
       'Road_Actual_Length_km', 'Avg_Altitude', 'Avg_Slope', 'Terrain',
       'Pavement_Type', 'Current_Road_Cond', 'Current_Surface',
       'Current_Road_Class', 'Upgrade_Road_Cond', 'Upgrade_Surface',
       'Upgrade_Road_Class'],
      dtype='object')

In [19]:
prop_tab = prop_tab[['Road_ID','Road_Name','District','Current_Road_Cond','Current_Surface','Current_Road_Class','Upgrade_Road_Cond','Upgrade_Surface','Upgrade_Road_Class']]
prop_tab.rename({'Road_ID' : 'SN'},axis=1,inplace=True)

In [20]:
# prop_tab.to_csv(os.path.join(data_dir,rd_dir,r'Proposed_raw//Clean_proposed_roads_attributes_211017.csv'))

Geo

In [21]:
pre_prop = gpd.read_file(os.path.join(data_dir,rd_dir,prop_geo_raw),driver="GPKG").to_crs(dest_crs)

In [22]:
pre_prop = pre_prop[pre_prop['SN'].isna() == False]

In [23]:
pre_prop = pre_prop[['SN','geometry']]

In [24]:
# pre_prop.to_file(os.path.join(data_dir,rd_dir,prop_geo_clean))

**Stop**</br>At this stage go to ArcGIS Pro and "Add Surface Information" from a DEM to the cleaned roads. Add Z_Mean, S_Length, and Avg_Slope to the road lines.
Hacky yes, but this is very painful to do in Python and very simple to do there. Load the data back in below.
</br> Once you are done continue on in this notebook.

## Load in and process roads vectors

Note: need to add elevation data and slope to proposed roads prior to this step using ArcGIS's Add Surface Info

In [25]:
prop_tab = pd.read_csv(os.path.join(data_dir,rd_dir,r'Proposed_raw//Clean_proposed_roads_attributes_211017.csv'))

In [26]:
# load back in your roads data after joining surface information in ArcGIS
prop_raw = gpd.read_file(os.path.join(data_dir,rd_dir,prop_geo_clean)).to_crs(dest_crs)

In [27]:
prop_raw.head(2)

Unnamed: 0,SN,Z_Mean,SLength,Avg_Slope,geometry
0,7,1394.454701,24512.9283,16.396382,"LINESTRING (754400.310 3941840.318, 754400.683..."
1,7,1460.468933,338.217335,6.488805,"LINESTRING (752244.146 3969417.479, 752227.927..."


Each road has multiple segments which are of different lengths and slopes. To get the most accurate value for each road we must weight segment's values by their length when combining

Calculate length-weighted slope and elevation by road

In [28]:
# calculate road segment weight (based on length as part of road's total length

rds_length = prop_raw.groupby('SN')['SLength'].sum().rename('total_length',axis=1)
prop_raw = pd.merge(prop_raw,rds_length,how='left',on='SN')
prop_raw['length_wt'] = prop_raw['SLength'] / prop_raw['total_length']

In [29]:
# calculate weighted elev and slope per road segment

prop_raw['Z_Mean'] = prop_raw.Z_Mean * prop_raw.length_wt
prop_raw['Avg_Slope'] = prop_raw.Avg_Slope * prop_raw.length_wt

In [30]:
# calculate weighted average elevation and slope per road, slim down to one data frame, join in other tabular info

prop_slim = prop_raw.groupby('SN')[['Z_Mean','Avg_Slope']].sum()
prop_slim = pd.merge(prop_slim,prop_tab,how='left',on='SN')
prop_slim.SN = prop_slim.SN.astype(int)

Dissolve the results by SN

In [31]:
# the agg function just works on the road lengths
prop_d = prop_raw.dissolve(by='SN',aggfunc='sum').reset_index()

Consolidate the road lines on their common IDs and then join the resulting geometries to the slimmed down proposed roads DF

In [32]:
prop_raw = gpd.GeoDataFrame(pd.merge(prop_slim,prop_d[['SN','geometry']],how='left',on='SN'))

In [33]:
prop_raw[::15]

Unnamed: 0,SN,Z_Mean,Avg_Slope,Road_Name,District,Current_Road_Cond,Current_Surface,Current_Road_Class,Upgrade_Road_Cond,Upgrade_Surface,Upgrade_Road_Class,geometry
0,1,2276.075397,25.214012,Green Lasht - Lot Oweer Road (via Barum-Shahbr...,Upper Chitral,Poor,Earthen,Collector Road,Very Good,Asphaltic,Collector Road,"MULTILINESTRING ((772711.858 4003961.232, 7727..."
15,17,937.098895,26.903658,Khanano Dheri to Dadam Road,Torghar,Poor,Gravel,District Road,Very Good,Asphaltic,District Road,"MULTILINESTRING ((842269.655 3817473.455, 8422..."
30,41,837.345029,3.53407,Palosa Sar to Ghundi Mirkhan khel UC Ghundi Mi...,Karak,Poor,Gravel,Access Road,Very Good,Asphaltic,District Road,"MULTILINESTRING ((710636.516 3670889.511, 7106..."
45,72,896.729189,7.154232,Jar Mula Kalay Naraza Road,Bajaur,Poor,Gravel,Access Road,Very Good,Asphaltic,Access Road,"MULTILINESTRING ((738975.433 3851825.310, 7389..."


## Populate speeds for the roads based on their type, status, pavement, and location
First align the data model with the existing roads data model

In [34]:
# create a temporary copy to iterate over
prop_rds = prop_raw.copy()

In [35]:
prop_rds.dtypes

SN                       int32
Z_Mean                 float64
Avg_Slope              float64
Road_Name               object
District                object
Current_Road_Cond       object
Current_Surface         object
Current_Road_Class      object
Upgrade_Road_Cond       object
Upgrade_Surface         object
Upgrade_Road_Class      object
geometry              geometry
dtype: object

Terrain

In [36]:
# narrowing range to 8/16 based on satellite imagery observation
prop_rds['Terrain'] = pd.cut(prop_rds['Avg_Slope'], [-np.inf, 8, 16, np.inf], 
                           labels = ['Plains', 'Hills', 'Mountains']) # change labels here

Conditions -- leftover

In [37]:
prop_rds.Current_Road_Class = prop_rds.Current_Road_Class.replace('Village road','Collector Road')
prop_rds.Current_Road_Class = prop_rds.Current_Road_Class.fillna('Collector Road')

In [38]:
road_condition_filter = [prop_rds['Terrain'].str.contains('Plains') & prop_rds['Current_Road_Class'].str.contains('Provincial'),    
    prop_rds['Terrain'].str.contains('Plains') & prop_rds['Current_Road_Class'].str.contains('District'),    
    prop_rds['Terrain'].str.contains('Plains') & prop_rds['Current_Road_Class'].str.contains('Access'),    
    prop_rds['Terrain'].str.contains('Plains') & prop_rds['Current_Road_Class'].str.contains('Collector'),
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Current_Road_Class'].str.contains('Provincial'),    
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Current_Road_Class'].str.contains('District'),    
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Current_Road_Class'].str.contains('Access'),    
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Current_Road_Class'].str.contains('Collector'),
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Current_Road_Class'].str.contains('Provincial'),    
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Current_Road_Class'].str.contains('District'),    
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Current_Road_Class'].str.contains('Access'),    
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Current_Road_Class'].str.contains('Collector')]

In [39]:
prop_rds.columns

Index(['SN', 'Z_Mean', 'Avg_Slope', 'Road_Name', 'District',
       'Current_Road_Cond', 'Current_Surface', 'Current_Road_Class',
       'Upgrade_Road_Cond', 'Upgrade_Surface', 'Upgrade_Road_Class',
       'geometry', 'Terrain'],
      dtype='object')

### Calculate Speeds

Base speeds from road type and terrain

In [40]:
terrain_current_class_filter = [prop_rds['Terrain'].str.contains('Plains') & prop_rds['Current_Road_Class'].str.contains('Provincial'),    
    prop_rds['Terrain'].str.contains('Plains') & prop_rds['Current_Road_Class'].str.contains('District'),    
    prop_rds['Terrain'].str.contains('Plains') & prop_rds['Current_Road_Class'].str.contains('Access'),    
    prop_rds['Terrain'].str.contains('Plains') & prop_rds['Current_Road_Class'].str.contains('Collector'),
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Current_Road_Class'].str.contains('Provincial'),    
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Current_Road_Class'].str.contains('District'),    
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Current_Road_Class'].str.contains('Access'),    
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Current_Road_Class'].str.contains('Collector'),
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Current_Road_Class'].str.contains('Provincial'),    
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Current_Road_Class'].str.contains('District'),    
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Current_Road_Class'].str.contains('Access'),    
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Current_Road_Class'].str.contains('Collector')]
                        
terrain_upgrade_class_filter = [prop_rds['Terrain'].str.contains('Plains') & prop_rds['Upgrade_Road_Class'].str.contains('Provincial'),    
    prop_rds['Terrain'].str.contains('Plains') & prop_rds['Upgrade_Road_Class'].str.contains('District'),    
    prop_rds['Terrain'].str.contains('Plains') & prop_rds['Upgrade_Road_Class'].str.contains('Access'),    
    prop_rds['Terrain'].str.contains('Plains') & prop_rds['Upgrade_Road_Class'].str.contains('Collector'),
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Upgrade_Road_Class'].str.contains('Provincial'),    
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Upgrade_Road_Class'].str.contains('District'),    
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Upgrade_Road_Class'].str.contains('Access'),    
    prop_rds['Terrain'].str.contains('Hills') & prop_rds['Upgrade_Road_Class'].str.contains('Collector'),
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Upgrade_Road_Class'].str.contains('Provincial'),    
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Upgrade_Road_Class'].str.contains('District'),    
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Upgrade_Road_Class'].str.contains('Access'),    
    prop_rds['Terrain'].str.contains('Mountains') & prop_rds['Upgrade_Road_Class'].str.contains('Collector')]
                        

In [41]:
# Corresponding list of speeds, with line breaks by terrain type for editing and readability
speeds_lst = [80,50,40,20,\
              60,40,30,15,\
              40,30,20,10]

In [42]:
# initial dry speeds to calculate msn speeds
prop_rds['current_base_speed'] = np.select(terrain_current_class_filter,speeds_lst,default=0.5) # very low default as path speeds will be re-calculated separately using terrain down the road
prop_rds['upgrade_base_speed'] = np.select(terrain_upgrade_class_filter,speeds_lst,default=0.5)

**Modify based on condition + season**

Define a filter based on surface + condition that we'll use to assign speed modifiers in each season. Note that we add an `Asphaltic` surface type unique to proposed roads because this was specified by the client. We assume it's very good and has minimal speed reductions even under difficult conditions

In [43]:
current_class_filter = [prop_rds['Current_Surface'].str.contains('Earthen') & prop_rds['Current_Road_Cond'].str.contains('Good'),    
                        prop_rds['Current_Surface'].str.contains('Earthen') & prop_rds['Current_Road_Cond'].str.contains('Fair'),    
                        prop_rds['Current_Surface'].str.contains('Earthen') & prop_rds['Current_Road_Cond'].str.contains('Poor'),
                        prop_rds['Current_Surface'].str.contains('Gravel') & prop_rds['Current_Road_Cond'].str.contains('Good'),    
                        prop_rds['Current_Surface'].str.contains('Gravel') & prop_rds['Current_Road_Cond'].str.contains('Fair'),    
                        prop_rds['Current_Surface'].str.contains('Gravel') & prop_rds['Current_Road_Cond'].str.contains('Poor'),
                        prop_rds['Current_Surface'].str.contains('Paved') & prop_rds['Current_Road_Cond'].str.contains('Good'),    
                        prop_rds['Current_Surface'].str.contains('Paved') & prop_rds['Current_Road_Cond'].str.contains('Fair'),    
                        prop_rds['Current_Surface'].str.contains('Paved') & prop_rds['Current_Road_Cond'].str.contains('Poor'),
                        prop_rds['Current_Surface'].str.contains('Asphaltic') & prop_rds['Current_Road_Cond'].str.contains('Good'),    
                        prop_rds['Current_Surface'].str.contains('Asphaltic') & prop_rds['Current_Road_Cond'].str.contains('Fair'),    
                        prop_rds['Current_Surface'].str.contains('Asphaltic') & prop_rds['Current_Road_Cond'].str.contains('Poor')]

upgrade_class_filter = [prop_rds['Upgrade_Surface'].str.contains('Earthen') & prop_rds['Upgrade_Road_Cond'].str.contains('Good'),    
                        prop_rds['Upgrade_Surface'].str.contains('Earthen') & prop_rds['Upgrade_Road_Cond'].str.contains('Fair'),    
                        prop_rds['Upgrade_Surface'].str.contains('Earthen') & prop_rds['Upgrade_Road_Cond'].str.contains('Poor'),
                        prop_rds['Upgrade_Surface'].str.contains('Gravel') & prop_rds['Upgrade_Road_Cond'].str.contains('Good'),    
                        prop_rds['Upgrade_Surface'].str.contains('Gravel') & prop_rds['Upgrade_Road_Cond'].str.contains('Fair'),    
                        prop_rds['Upgrade_Surface'].str.contains('Gravel') & prop_rds['Upgrade_Road_Cond'].str.contains('Poor'),
                        prop_rds['Upgrade_Surface'].str.contains('Paved') & prop_rds['Upgrade_Road_Cond'].str.contains('Good'),    
                        prop_rds['Upgrade_Surface'].str.contains('Paved') & prop_rds['Upgrade_Road_Cond'].str.contains('Fair'),    
                        prop_rds['Upgrade_Surface'].str.contains('Paved') & prop_rds['Upgrade_Road_Cond'].str.contains('Poor'),
                        prop_rds['Upgrade_Surface'].str.contains('Asphaltic') & prop_rds['Upgrade_Road_Cond'].str.contains('Good'),    
                        prop_rds['Upgrade_Surface'].str.contains('Asphaltic') & prop_rds['Upgrade_Road_Cond'].str.contains('Fair'),    
                        prop_rds['Upgrade_Surface'].str.contains('Asphaltic') & prop_rds['Upgrade_Road_Cond'].str.contains('Poor')]

Dry season

In [44]:
# Corresponding list of dry season speed modifiers, with line breaks by terrain type for editing and readability

dry_mods_lst = [ 0.6, 0.5, 0.4,\
                 0.75, 0.6, 0.5,\
                 1, 0.8, 0.6, 
                 1, 1, 0.8]

prop_rds['current_dry_mod'] = np.select(current_class_filter,dry_mods_lst,default=0.5) # very low default as path speeds will be re-calculated separately using terrain down the road
prop_rds['upgrade_dry_mod'] = np.select(upgrade_class_filter,dry_mods_lst,default=0.5) # very low default as path speeds will be re-calculated separately using terrain down the road

prop_rds['current_dry_speed'] = prop_rds.current_dry_mod * prop_rds.current_base_speed
prop_rds['upgrade_dry_speed'] = prop_rds.upgrade_dry_mod * prop_rds.upgrade_base_speed

In [45]:
# check that the last values are not null (indicating all speeds were successfully calculated)
prop_rds.sort_values('current_dry_speed').current_dry_speed.tail()

26    30.0
51    30.0
52    30.0
53    30.0
23    30.0
Name: current_dry_speed, dtype: float64

In [46]:
# check that the last values are not null (indicating all speeds were successfully calculated)
prop_rds.sort_values('upgrade_dry_speed').upgrade_dry_speed.tail()

32    50.0
36    50.0
37    50.0
39    50.0
58    50.0
Name: upgrade_dry_speed, dtype: float64

Monsoon

In [47]:
# Corresponding list of monsoon speed modifiers, with line breaks by terrain type for editing and readability

msn_mods_lst = [ 0.4, 0.3, 0.2,\
                 0.7, 0.5, 0.4,\
                 0.9, 0.75, 0.5,
                 1, 0.9, 0.75]

prop_rds['current_msn_mod'] = np.select(current_class_filter,msn_mods_lst,default=0.5) # very low default as path speeds will be re-calculated separately using terrain down the road
prop_rds['upgrade_msn_mod'] = np.select(upgrade_class_filter,msn_mods_lst,default=0.5) # very low default as path speeds will be re-calculated separately using terrain down the road

prop_rds['current_msn_speed'] = prop_rds.current_msn_mod * prop_rds.current_base_speed
prop_rds['upgrade_msn_speed'] = prop_rds.upgrade_msn_mod * prop_rds.upgrade_base_speed

Winter

In [48]:
# Corresponding list of monsoon speed modifiers, with line breaks by terrain type for editing and readability

winter_mods_lst = [ 0.4, 0.3, 0.2,\
                    0.7, 0.5, 0.4,\
                    1, 0.75, 0.5,
                    1, 0.9, 0.75]

prop_rds['current_winter_mod'] = np.select(current_class_filter,winter_mods_lst,default=0.5) # very low default as path speeds will be re-calculated separately using terrain down the road
prop_rds['upgrade_winter_mod'] = np.select(upgrade_class_filter,winter_mods_lst,default=0.5) # very low default as path speeds will be re-calculated separately using terrain down the road

In [49]:
# Define which winter mods to apply to which road based on their terrain

winter_cutoff_lst = [prop_rds['Terrain'] == 'Plains', \
                     prop_rds['Terrain'] == 'Hills', \
                     prop_rds['Terrain'] == 'Mountains']

In [50]:
winter_current_mod_revised =  [ prop_rds['current_dry_speed'], \
                       (prop_rds['current_dry_speed'] * (prop_rds['current_winter_mod'] + (1 - prop_rds['current_winter_mod']) /2) ), \
                       (prop_rds['current_base_speed'] * prop_rds['current_winter_mod'])]

winter_upgrade_mod_revised =  [ prop_rds['upgrade_dry_speed'], \
                       (prop_rds['upgrade_dry_speed'] * (prop_rds['upgrade_winter_mod'] + (1 - prop_rds['upgrade_winter_mod']) /2) ), \
                       (prop_rds['upgrade_base_speed'] * prop_rds['upgrade_winter_mod'])]

In [51]:
prop_rds['current_winter_speed'] = np.select(winter_cutoff_lst, winter_current_mod_revised, default=prop_rds['current_dry_speed'])
prop_rds['upgrade_winter_speed'] = np.select(winter_cutoff_lst, winter_upgrade_mod_revised, default=prop_rds['upgrade_dry_speed'])

In [52]:
# check that the last values are not null (indicating all speeds were successfully calculated)
prop_rds.sort_values('current_winter_speed').current_winter_speed.tail()

53    30.0
25    30.0
26    30.0
22    30.0
23    30.0
Name: current_winter_speed, dtype: float64

In [53]:
# check that the last values are not null (indicating all speeds were successfully calculated)
prop_rds.sort_values('upgrade_dry_speed').upgrade_dry_speed.tail()

32    50.0
36    50.0
37    50.0
39    50.0
58    50.0
Name: upgrade_dry_speed, dtype: float64

Check

In [54]:
test = prop_rds[['SN','Current_Road_Cond','Current_Surface','Current_Road_Class',\
              'Upgrade_Road_Cond','Upgrade_Surface','Upgrade_Road_Class',\
              'Terrain',
              'current_dry_mod','current_msn_mod','current_winter_mod',
              'current_base_speed','current_dry_speed','current_msn_speed','current_winter_speed',\
              'upgrade_base_speed','upgrade_dry_speed','upgrade_msn_speed','upgrade_winter_speed']]

In [55]:
# if all results are 0 or positive this means dry speeds are >= than winter speeds, as they should be
test['spd_dif'] = test['current_dry_speed'] - test['current_winter_speed']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test['spd_dif'] = test['current_dry_speed'] - test['current_winter_speed']


### Calculate the distance to the Nth nearest destination of each type from the end point of the line

This is necessary to clip the later friction and access surface outputs -- otherwise they will be enormous and difficult to work with

In [56]:
# adapted from: https://automating-gis-processes.github.io/site/notebooks/L3/nearest-neighbor-faster.html
# more info here https://towardsdatascience.com/using-scikit-learns-binary-trees-to-efficiently-find-latitude-and-longitude-neighbors-909979bd929b

from sklearn.neighbors import BallTree

def get_nth_nearest(src_points, candidates, k_neighbors=5):
    """Find nearest K neighbors for all source points from a set of candidate points, return Kth neighbor. If smaller than K, return nearest."""

    # Create tree from the candidate points
    tree = BallTree(candidates, leaf_size=15, metric='haversine')

    # Find closest points and distances
    
    if len(candidates) < k_neighbors:
        distances, indices = tree.query(src_points, k=1)
    else:
        distances, indices = tree.query(src_points, k=k_neighbors)

    # Transpose to get distances and indices into arrays
    distances = distances.transpose()
    indices = indices.transpose()

    # Get closest indices and distances (i.e. array at index 0)
    # note: for the second closest points, you would take index 1, etc.
    # Robert -- setting to the 5th nearest feature as an arbitrary cutoff
    
    if len(candidates) < k_neighbors:
        closest = indices[0]
        closest_dist = distances[0]
    else:
        closest = indices[(k_neighbors - 1)]
        closest_dist = distances[(k_neighbors - 1)]

    # Return indices and distances
    return (closest, closest_dist)

def nearest_neighbors(left_gdf, right_gdf, return_dist=False, k_neighbors=5):
    """
    For each point in left_gdf, find closest point in right GeoDataFrame and return them.

    NOTICE: Assumes that the input Points are in WGS84 projection (lat/lon).
    """

    left_geom_col = left_gdf.geometry.name
    right_geom_col = right_gdf.geometry.name

    # Ensure that index in right gdf is formed of sequential numbers
    right = right_gdf.copy().reset_index(drop=True)

    # Parse coordinates from points and insert them into a numpy array as RADIANS
    left_radians = np.array(left_gdf[left_geom_col].apply(lambda geom: (geom.y * np.pi / 180, geom.x * np.pi / 180)).to_list())
    right_radians = np.array(right[right_geom_col].apply(lambda geom: (geom.y * np.pi / 180, geom.x * np.pi / 180)).to_list())

    # Find the nearest points
    # -----------------------
    # closest ==> index in right_gdf that corresponds to the closest point
    # dist ==> distance between the nearest neighbors (in meters)

    closest, dist = get_nth_nearest(src_points=left_radians, candidates=right_radians, k_neighbors=k_neighbors)

    # Return points from right GeoDataFrame that are closest to points in left GeoDataFrame
    closest_points = right.loc[closest]

    # Ensure that the index corresponds the one in left_gdf
    closest_points = closest_points.reset_index(drop=True)

    # Add distance if requested
    if return_dist:
        # Convert to meters from radians
        earth_radius = 6371000  # meters
        closest_points['distance'] = dist * earth_radius

    return closest_points

# from https://gis.stackexchange.com/questions/378806/multi-part-geometries-do-not-provide-a-coordinate-sequence-error-when-extracti
# only works on LineStrings, not MultiLineStrings

def add_start_end_nodes_to_gdf(gdf):
    
# This function adds start and end nodes to geodataframe

    gdf['start_node'] = None
    gdf['end_node'] = None

    for index, row in gdf.iterrows():
        coords = [(coords) for coords in list(row['geometry'].coords)]
        start_node, end_node = [coords[i] for i in (0, -1)]
        gdf.at[index, 'start_node'] = start_node
        gdf.at[index, 'end_node'] = end_node


Set up a dict of destinations we can loop over

In [58]:
dest_fils = {
    re.findall(r'KP_(.*?).gpkg',fil)[0]: gpd.clip(gpd.read_file(os.path.join(data_dir,dest_dir,'reference',fil))\
                                                      .reset_index(drop=True)\
                                                      .set_crs(4326).to_crs(dest_crs),\
                                                  kp) \
    for fil in os.listdir(os.path.join(data_dir,dest_dir,'reference')) if fil.endswith(".gpkg")
}

In [59]:
dest_fils.keys()

dict_keys(['District_HQs', 'education_allboys', 'education_allgirls', 'education_boys', 'education_boys_high', 'education_boys_middle', 'education_boys_primary', 'education_girls', 'education_girls_high', 'education_girls_middle', 'education_girls_primary', 'education_high', 'education_middle', 'education_primary', 'health_family', 'health_pharmacies', 'health_primary', 'health_private', 'health_public', 'health_secondary', 'health_tertiary', 'markets_All', 'markets_Central', 'Provincial_HQ'])

Calculate distance from each line segment to the Nth destination point (or nearest if < N features)

In [60]:
# sort notebook

prop_rds = prop_rds.sort_values('SN').reset_index(drop=True)

# create a temporary df we'll use to perform calculations

prop_rds_pts = prop_rds.copy()

# create single LineString objects so we can calculate their endpoints and measure from those
# NOTE this is hacky and problematic if the multilinestrings are really long
# TO DO: find the center point of the Linestring instead

prop_rds_pts.geometry = pd.Series([i[0]  if i.type == 'MultiLineString' else i for i in prop_rds_pts.geometry])
prop_rds_pts = prop_rds_pts.set_geometry('geometry')

# calculate their start / end points

add_start_end_nodes_to_gdf(prop_rds_pts)

# make the end point the geometry

prop_rds_pts['pt_geometry'] = pd.Series([Point(i) for i in prop_rds_pts.end_node])
prop_rds_pts = prop_rds_pts.set_geometry('pt_geometry')

# calculate the distance to the Nth feature on the prop_rds GDF using this pts GDF. 
# N =5 for most features, 2 for very dispersed features like markets and HQs. You may wish to vary these parameters based on your context

for key, gdf in dest_fils.items():
    
    if key in ['District_HQs','markets_All','markets_Central']:
    # pad the distance calculated by the road's length and an extra half kilometer to ensure no part of the road is ever excluded
    
        prop_rds[f'{key}_nth_nearest_distance'] = (nearest_neighbors(prop_rds_pts.to_crs(4326),\
                                                                     gdf.to_crs(4326),\
                                                                     return_dist=True,
                                                                      k_neighbors=2)['distance'] + prop_rds_pts['geometry'].length + 500).astype(int)
    else: 
    # pad the distance calculated by the road's length and an extra half kilometer to ensure no part of the road is ever excluded
    
        prop_rds[f'{key}_nth_nearest_distance'] = (nearest_neighbors(prop_rds_pts.to_crs(4326),\
                                                                     gdf.to_crs(4326),\
                                                                     return_dist=True,
                                                                      k_neighbors=5)['distance'] + prop_rds_pts['geometry'].length + 500).astype(int)

# add start and end nodes in case they're useful later

prop_rds.start_node = prop_rds_pts.start_node.astype(str)
prop_rds.end_node = prop_rds_pts.end_node.astype(str)


  super(GeoDataFrame, self).__setattr__(attr, val)


In [61]:
prop_rds.columns

Index(['SN', 'Z_Mean', 'Avg_Slope', 'Road_Name', 'District',
       'Current_Road_Cond', 'Current_Surface', 'Current_Road_Class',
       'Upgrade_Road_Cond', 'Upgrade_Surface', 'Upgrade_Road_Class',
       'geometry', 'Terrain', 'current_base_speed', 'upgrade_base_speed',
       'current_dry_mod', 'upgrade_dry_mod', 'current_dry_speed',
       'upgrade_dry_speed', 'current_msn_mod', 'upgrade_msn_mod',
       'current_msn_speed', 'upgrade_msn_speed', 'current_winter_mod',
       'upgrade_winter_mod', 'current_winter_speed', 'upgrade_winter_speed',
       'District_HQs_nth_nearest_distance',
       'education_allboys_nth_nearest_distance',
       'education_allgirls_nth_nearest_distance',
       'education_boys_nth_nearest_distance',
       'education_boys_high_nth_nearest_distance',
       'education_boys_middle_nth_nearest_distance',
       'education_boys_primary_nth_nearest_distance',
       'education_girls_nth_nearest_distance',
       'education_girls_high_nth_nearest_distance',
 

In [63]:
prop_rds[prop_rds['SN'] == 60]['health_secondary_nth_nearest_distance']

38    46582
Name: health_secondary_nth_nearest_distance, dtype: int32

#### Export

In [66]:
prop_rds.Terrain = prop_rds.Terrain.astype(str)

In [67]:
# prop_rds.to_file(os.path.join(data_dir,rd_dir,r'Proposed_final//Proposed_roads_processed_211021_second_dist.gpkg'),driver="GPKG")

In [72]:
prop_rds.to_file(os.path.join(data_dir,rd_dir,f'Proposed_final//Proposed_roads_processed_{today}_test.gpkg'),driver="GPKG")

#### Manually merge fifth and second nearest distance columns

Hacky solution to the problem that the 5th nearest distance is way too far for a few features -- Distritc HQs and all types of markets. This pointlessly extends processing time. TO DO make this flexibility part of the core code above.

In [142]:
pr_second = gpd.read_file(os.path.join(data_dir,rd_dir,r'Proposed_final//Proposed_roads_processed_211021_second_dist.gpkg'),driver="GPKG")
pr_fifth = gpd.read_file(os.path.join(data_dir,rd_dir,r'Proposed_final//Proposed_roads_processed_211014.gpkg'),driver="GPKG")

In [126]:
dif_cols = ['District_HQs_fifth_nearest_distance','markets_All_fifth_nearest_distance','markets_Central_fifth_nearest_distance']

In [139]:
# pr_second.columns

In [140]:
pr_fifth.drop(dif_cols,axis=1,inplace=True)

In [134]:
pr_final = pd.merge(pr_fifth,pr_second[['SN','District_HQs_fifth_nearest_distance','markets_All_fifth_nearest_distance','markets_Central_fifth_nearest_distance']],how='left',on='SN')

In [141]:
pr_final.to_file(os.path.join(data_dir,rd_dir,r'Proposed_final//Proposed_roads_processed_211021.gpkg'),driver="GPKG")