In [1]:
#Load libraries

import pandas as pd
import numpy as np
import sys
import os
import io
from configobj import ConfigObj

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from functions.roads import download_roads, roads_to_points
from functions.utils import database

In [33]:
config_path = "/home/jovyan/shared/rural_planner_refactoring/config_files/config_pe"

parser = ConfigObj(config_path)

sql_path = parser['sql_path']
country_folder = parser['country_folder']

road_types = dict(parser['path_finder_roads_params']['road_types'])
output_path = parser['path_finder_roads_params']['output_path']
output_filename = parser['path_finder_roads_params']['output_filename']
radius = int(parser['path_finder_roads_params']['max_road_radius'])
distance = int(parser['path_finder_roads_params']['points_road_distance'])
threshold_distance = int(parser['path_finder_roads_params']['threshold_distance'])
penalty = int(parser['path_finder_roads_params']['penalty'])

schema = parser['path_finder_roads_params']['schema']
roads_table_intermediate = parser['path_finder_roads_params']['roads_table']
roads_table = parser['path_finder_roads_params']['roads_table_dump']
table_roads_points = parser['path_finder_roads_params']['roads_points_table']
table_intersections = parser['path_finder_roads_params']['table_intersections']
table_clusters = parser['clustering_params']['output_table']
table_towers = parser['transport_by_tower_params']['table_infrastructure']
auxiliary_table = parser['path_finder_roads_params']['auxiliary_table']
table_cluster_points = parser['path_finder_roads_params']['table_cluster_points']
table_nodes_roads = parser['path_finder_roads_params']['table_nodes_roads']
table_clusters_links = parser['path_finder_roads_params']['table_clusters_links']
table_edges_roads = parser['path_finder_roads_params']['table_edges_roads']
table_node_replacement_map = parser['path_finder_roads_params']['table_node_replacement_map']
table_cluster_node_map = parser['path_finder_roads_params']['table_cluster_node_map']

In [3]:
## Download national and departamental roads if not downloded, unzip, read SHP and upload to DB
roads = pd.DataFrame()
for road_type in (road_types.keys()):
    roads_n = download_roads(parser, road_type)
    roads = roads.append(roads_n)

roads.reset_index(inplace=True)
roads['road_id'] = roads.index

with database(parser) as db:
    roads.to_sql(roads_table_intermediate, con=db, if_exists = 'replace', schema = schema, index = False)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


AttributeError: 'GeoDataFrame' object has no attribute 'replace_index'

In [15]:
#Create the road table turning multilinestring to linestring.
#This is needed to have a homogeneuous data type throughout all roads data set.
#It requires having the roads data set loaded into the database

query_path = sql_path + '/' + country_folder + '/' + 'path_finder_roads_geom.sql'
query_path_dump = sql_path + '/' + country_folder + '/' + 'path_finder_roads_dump_lines.sql'

with open(query_path) as file, open(query_path_dump) as file_dump, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema, table_roads = roads_table_intermediate)
    db.execute(query_formatted)
    
    query = file_dump.read()
    query_formatted = query.format(schema = schema,
                                   table_roads = roads_table_intermediate,
                                   table_roads_linestring = roads_table)
    db.execute(query_formatted)

In [None]:
#Create a table with points instead of linestring separated by 1km. Remove duplicates and upload to DB (creating geometry indexes)
#Takes less than 1.5 hours
roads_points = roads_to_points(parser, distance)
roads_points = roads_points.drop_duplicates()

query_path = sql_path + '/' + country_folder + '/' + 'path_finder_roads_points_idx.sql'

with open(query_path) as file, database(parser) as db:
    roads_points.to_sql(table_roads_points, con=engine, if_exists = 'replace', schema = schema, index = False)
    query = file.read()
    query_formatted = query.format(schema = schema,
                                   table_roads_points = table_roads_points)
    db.execute(query_formatted)

In [None]:
#Create the table with the intersections between roads

query_path = sql_path + '/' + country_folder + '/' + 'path_finder_roads_intersections.sql'

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                                   table_roads_points = table_roads_points,
                                   table_intersections = table_intersections)
    db.execute(query_formatted)

In [None]:
#Create auxiliary table with all points to connect by fiber or terminal nodes (all cluster centroids + towers)
#Assign to them nearest road point 
#Create road node table based on near clusters' weight, add intersections and borders
#Create final node table output

query_path = sql_path + '/' + country_folder + '/' + 'path_finder_roads_clusters.sql'
query_path_assignation = sql_path + '/' + country_folder + '/' + 'path_finder_roads_nearest_cluster.sql'
query_path_roads = sql_path + '/' + country_folder + '/' + 'path_finder_roads_cluster_nodes.sql'
query_path_borders = sql_path + '/' + country_folder + '/' + 'path_finder_roads_borders.sql'
query_path_final = sql_path + '/' + country_folder + '/' + 'path_finder_roads_final_node_table.sql'

with open(query_path) as file, open(query_path_assignation) as file_assignation, open(query_path_roads) as file_roads, open(query_path_borders) as file_borders, open(query_path_final) as file_final, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                                   table_clusters = table_clusters,
                                   auxiliary_table = auxiliary_table,
                                   table_towers = table_towers)
    db.execute(query_formatted)
    
    query = file_assignation.read()
    query_formatted = query.format(schema = schema,
                                   table_cluster_points = table_cluster_points,
                                   table_roads_points = table_roads_points,
                                   auxiliary_table = auxiliary_table,
                                   radius = radius)
    db.execute(query_formatted)
    
    query = file_roads.read()
    
    query_formatted = query.format(schema = schema,
                                   table_cluster_points = table_cluster_points,
                                   table_roads_points = table_roads_points,
                                   table_clusters_links = table_clusters_links,
                                   auxiliary_table = auxiliary_table,
                                   penalty = penalty,
                                   threshold_distance = threshold_distance,
                                   table_nodes_roads = table_nodes_roads,
                                   table_intersections = table_intersections)
    db.execute(query_formatted)
    
    query = file_final.read()
    
    query_formatted = query.format(schema = schema,
                                   table_cluster_points = table_cluster_points,
                                   table_intersections = table_intersections,
                                   table_roads_points = table_roads_points,
                                   table_clusters_links = table_clusters_links,
                                   table_nodes_roads = table_nodes_roads)
    db.execute(query_formatted)

In [None]:
#Import node table

query_path = sql_path + '/' + country_folder + '/' + 'path_finder_roads_import_nodes.sql'

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                                   table_nodes_roads = table_nodes_roads)
    df_nodes = pd.read_sql_query(query_formatted, db)
    
# Create edge table by linking each node to the immidiately precedent and following nodes within the same stretch (one edge per direction)
df_edges_reverse = pd.DataFrame(columns = ['node_1', 'node_2', 'weight'])

df_edges.node_1 = df_nodes.node_id
df_edges.node_2 = df_nodes.groupby('stretch_id').shift(-1)['node_id']
df_edges.weight =  abs(df_nodes.groupby('stretch_id').shift(-1)['division'] - df_nodes['division'])*df_nodes['stretch_length']/1000

df_edges_reverse.node_1 = df_nodes.groupby('stretch_id').shift(-1)['node_id']
df_edges_reverse.node_2 = df_nodes.node_id
df_edges_reverse.weight =  abs(df_nodes['division'] - df_nodes.groupby('stretch_id').shift(-1)['division'] )*df_nodes['stretch_length']/1000

df_edges = df_edges.append(df_edges_reverse)
df_edges = df_edges.ix[(~np.isnan(df_edges['node_2'])& ~np.isnan(df_edges['node_1']))]

#Import intersections and append to edges table

query_path = sql_path + '/' + country_folder + '/' + 'path_finder_roads_import_intersections.sql'

with open(query_path) as file, database(parser) as db:
    query = file.read()
    query_formatted = query.format(schema = schema,
                                   table_intersections = table_intersections,
                                   table_nodes_roads = table_nodes_roads)
    df_intersections = pd.read_sql_query(query_formatted, db)

df_edges = df_edges.append(df_intersections)

df_edges['node_1'] = df_edges['node_1'].astype('int64')
df_edges['node_2'] = df_edges['node_2'].astype('int64')

In [None]:
#Remove duplicate edges and merge them in one single edge (we keep the one with maximum weight)

##Code to simplify intersections
#Select half of the edges with a weight smaller than a threshold (this threshold has to be the same as the used in the intersection definition/2)
#Take into account only one direction
threshold = 0.5/2/10

#We create a mapping between nodes that we want to unify (node_1 --> node_2). To clean complex dependencies (1-->2, 2-->3, 3-->7 needs to become (1-->2, 1-->3, 1-->7))
df_edges_zero_weight = df_edges.loc[(abs(df_edges['weight']) <= threshold) & (df_edges['node_1'] < df_edges['node_2'])].reset_index().drop('index', axis = 1)

df_edges_zero_weight.drop(columns='weight', inplace=True)
d1 = OrderedDict({row['node_2']:row['node_1'] for _,row in df_edges_zero_weight.iterrows()})

while any(x in list(d1.values()) for x in list(d1.keys())):
    for k, v in (d1.items()):
        if (v in d1.keys()):
            d1[k] = d1[v]
            
df_edges.node_1 = df_edges.node_1.map(d1).fillna(df_edges['node_1'])
df_edges.node_2 = df_edges.node_2.map(d1).fillna(df_edges['node_2'])

#Remove the edges of weight zero and remove duplicates, keeping the ones with higher weight
df_edges = df_edges.ix[abs(df_edges['weight']) >= threshold].sort_values(by = ['node_1', 'weight'], ascending = [True, False]).drop_duplicates(['node_1', 'node_2'])

#Replace nodes that have been altered and update the weight of replacement nodes with the sum of all the nodes that it is replacing
df_nodes_update = df_nodes.copy(deep=True)
df_nodes_update.node_id = df_nodes_update.node_id.map(d1).fillna(df_nodes_update['node_id'])
df_nodes_update = df_nodes_update.sort_values('node_id').groupby('node_id').agg({'stretch_id': lambda x: x.iloc[0],
                                                               'division': lambda x: x.iloc[0],
                                                               'score': lambda x: x.iloc[0],
                                                               'cluster_weight':sum,
                                                               'node_weight':sum,
                                                               'stretch_length': lambda x: x.iloc[0],
                                                                'geom': lambda x: x.iloc[0]}).reset_index('node_id')

# Upload to database and update nodes in node cluster map table
query_path = sql_path + '/' + country_folder + '/' + 'path_finder_roads_update_node_map.sql'

with open(query_path) as file, database(parser) as db:
    
    df_nodes_update.to_sql(table_nodes_roads, con=db, if_exists = 'replace', schema = schema, index = False)
    df_edges_zero_weight.to_sql(table_node_replacement_map, con=db, if_exists = 'replace', schema = schema, index = False)
    
    query = file.read()
    query_formatted = query.format(schema = schema,
                                   table_node_replacement_map = table_node_replacement_map,
                                   table_cluster_node_map = table_cluster_node_map,
                                   auxiliary_table = auxiliary_table,
                                   table_nodes_roads = table_nodes_roads)
    db.execute(query_formatted)     

In [None]:
#Export edge table
with database(parser) as db:
    df_edges.to_sql(table_edges_roads, con=engine, if_exists = 'replace', schema = schema, index = False)