# Graph Data

This notebook organizes some input information and generate the necessary data to create a graph for a territory.

#### The input information contains:
1. The first part is 2 shapefiles that contains the links and nodes of the street network for 79 territories of Colombia. This information was downloaded from the OpenStreetMap database created by Geoff Boeing
2. The second part is a shapefile that contains all the blocks of Colombia, which are going to be the origins of the graph.
3. The last part is a shapefile that contains all the equipments in Colombia, which are going to be the destinations of the graph.

<center><img src="img/Input_information_sample.png" alt="files" style="width:400px"></center>

**Note:** The script change the coordinate system of all the files to epsg:32618 for internal processing, but at the end all the files are exported in epsg:4326.

# Import of libraries

In [None]:
import osmnx as ox
import networkx as nx
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely as sh
import os

import utils.analysis as an

# Program

In [4]:
pd.set_option('future.no_silent_downcasting', True)

In [None]:
# Import the names of the territories
l_names = pd.read_csv('../data/input/table/mpios_names.txt',header=None)

# Import equipments
equipments = gpd.read_file('../data/output/shape/colombia_equipments_ID/colombia_equipments_ID.shp')
equipments = equipments.to_crs('epsg:32618')

# Define the desired codes for each equipment
health_list = ['021001','021002','021003','021004','021011']           # 0210
sport_list = ['020601','020602','020603','020604','020605','020606']   # 0206
education_list = ['020903','020905']                                   # 0209
financial_list = ['020501','020502']                                   # 0205
cultural_list = ['020701','020702','020706','020709','020710']         # 0207
parks_list = ['021101','021102','021104','021105']                     # 0211
codes_list = health_list+sport_list+education_list+financial_list+cultural_list+parks_list
# Filter equipmets based on the code_list
equipments = equipments[equipments['CSIMBOL'].isin(codes_list)].reset_index(drop=True)

# Import perimeters and all blocks
perimeters = gpd.read_file('../data/input/shape/Colombia_urban_perimeters_name_pop/Colombia_urban_perimeters_name_pop.shp')
perimeters = perimeters.to_crs('epsg:32618')
all_blocks = gpd.read_file('../data/input/shape/MANZANA/MGN_URB_MANZANA.shp')
all_blocks = all_blocks.to_crs('epsg:32618')

# Import population data
DANE_data = pd.read_csv('../data/input/table/DANE_2018_personas_manz.txt',low_memory=False)
pop = DANE_data[['MANZ_CCNCT','poblacion']]

# Import speed data
speed_data = pd.read_csv('../data/input/table/slope_velocity_data.csv',sep=';')
speed_data = speed_data.sort_index(ascending=False)
speed_data = speed_data.reset_index(drop=True)

for m in l_names:
    
    edges = gpd.read_file(f'../data/input/shape/edges/{m}_edges.shp')
    edges = edges.to_crs('epsg:32618')
    nodes = gpd.read_file(f'../data/input/shape/nodes/{m}_nodes.shp')
    nodes['osmid'] = nodes['osmid'].astype(int).astype(str)
    nodes = nodes.rename(columns={'osmid':'ID'})
    nodes = nodes.to_crs('epsg:32618')
    
    ## Get perimeter from the network
    # We make a join where we add to the edges gdf the row of the perimeters gdf that intersects each line of the edges gdf
    join = edges.sjoin(perimeters, how='inner', predicate='intersects')
    # Unite the perimeters in only one
    filtered_perimeters = perimeters[perimeters['ID_mpio'].isin(join['ID_mpio'].unique())]
    perimeter = gpd.GeoDataFrame(geometry=[filtered_perimeters.union_all()], crs=filtered_perimeters.crs)
    # Give a name and an ID to the perimeter
    perimeter['ID_p'] = m.split('-')[-1]
    perimeter['Name'] = m.split('-')[0]
    perimeter[['ID_p', 'Name', 'geometry']]

    ## Get blocks by perimeter
    join = all_blocks.sjoin(perimeter,how="inner",predicate='intersects')
    blocks = join.drop(['index_right','ID_p','Name'],axis=1)

    ## Get destinations by perimeter
    join = equipments.sjoin(perimeter,how="inner",predicate='intersects')
    destinations = join.drop(['index_right','ID_p','Name'],axis=1)
    
    ## Create blocks' centroid
    # Create de controids of each polygon
    blocksDissolved = blocks.dissolve('MANZ_CCNCT')
    centroids = blocksDissolved.centroid
    # Convert the GeoSeries file into a GeoDataFrame
    origins = gpd.GeoDataFrame(geometry=gpd.GeoSeries(centroids))
    origins.index.name = 'ID'
    origins = origins.reset_index()

    ## Found the nearest node from the network to each origin
    nearest_origin = an.ckdnearest(origins,nodes)

    ## Create a link that connect the origins with the nearest node
    nearest_origin = nearest_origin.rename(columns={'elevation_to':'elevation'})
    origins_links = an.create_links(nearest_origin, False, origins.crs)
    
    ## Get origins elevation by buffer
    # (gdf_centroids, gdf_nodes, buffer_range, nearest, elevation_name='elevation', kind='centroid')
    origins['elevation'] = origins.geometry.apply(lambda geom: 
        nodes.loc[nodes.within(geom.buffer(100)), 'elevation'].mean()
        if len(nodes.loc[nodes.within(geom.buffer(100))]) > 0
        else nearest_origin.loc[nearest_origin[f'geometry_from'].centroid == geom, 'elevation'].values[0])
    
    ## Get origins links grade
    origins_links = an.get_links_grade(origins_links, origins, nodes)
    
    ## Found the nearest node from the network to each destination
    nearest_destination = an.ckdnearest(destinations,nodes)

    ## Create a link that connect the destinations with the nearest node
    nearest_destination = nearest_destination.rename(columns={'elevation_to':'elevation'})
    destinations_links = an.create_links(nearest_destination, False, destinations.crs)

    ## Get destinations elevation by buffer
    destinations['elevation'] = destinations.geometry.apply(lambda geom: 
        nodes.loc[nodes.within(geom.buffer(100)), 'elevation'].mean()
        if len(nodes.loc[nodes.within(geom.buffer(100))]) > 0
        else nearest_destination.loc[nearest_destination[f'geometry_from'].centroid == geom, 'elevation'].values[0])

    ## Get destinations links grade
    destinations_links = an.get_links_grade(destinations_links, destinations, nodes)

    ## Assing walking speed to every link
    edges = an.assign_walk_speed_Naismith_Langmuir(edges, 'km/h')
    origins_links = an.assign_walk_speed_Naismith_Langmuir(origins_links, 'km/h')
    destinations_links = an.assign_walk_speed_Naismith_Langmuir(destinations_links, 'km/h')

    # Calculate the time [min] that it's necesary to walk througt
    edges['weight'] = edges['length']/(edges['speed']*1000/60)
    origins_links['weight'] = origins_links['length']/(origins_links['speed']*1000/60)
    destinations_links['weight'] = destinations_links['length']/(destinations_links['speed']*1000/60)

    edges = edges.to_crs('epsg:4326')
    nodes = nodes.to_crs('epsg:4326')
    origins = origins.to_crs('epsg:4326')
    origins_links = origins_links.to_crs('epsg:4326')
    destinations = destinations.to_crs('epsg:4326')
    destinations_links = destinations_links.to_crs('epsg:4326')
    perimeter = perimeter.to_crs('epsg:4326')
    blocks = blocks.to_crs('epsg:4326')
    
    nodes = an.organize_nodes(nodes)
    edges = an.organize_edges(edges)
    origins = an.organize_origins(origins)
    origins_links = an.organize_origins_links(origins_links)
    destinations = an.organize_destinations(destinations,blocks,origins)
    destinations_links = an.organize_destinations_links(destinations_links,destinations)
    
    # Add the population
    origins = origins.set_index('ID').join(pop.set_index('MANZ_CCNCT'))
    origins = origins.reset_index()
    origins = origins.rename(columns={'poblacion':'pop'})
    origins = origins[['ID','coord_X','coord_Y','coord_Z','Type','pop','geometry']]
    # Change the NaN values for 0.0
    origins = origins.fillna(0.0)
    
    graph_nodes = pd.concat([nodes,origins,destinations]).reset_index(drop=True)
    graph_links = pd.concat([edges,origins_links,destinations_links]).reset_index(drop=True)

    edges.to_file(f'../data/output/shape/edges/{m}_edges.shp')
    nodes.to_file(f'../data/output/shape/nodes/{m}_nodes.shp')
    origins.to_file(f'../data/output/shape/origins/{m}_origins.shp')
    origins_links.to_file(f'../data/output/shape/origins_links/{m}_origins_links.shp')
    destinations.to_file(f'../data/output/shape/destinations_/{m}_destinations.shp')
    destinations_links.to_file(f'../data/output/shape/destinations_links_/{m}_destinations_links.shp')
    perimeter.to_file(f'../data/output/shape/perimeters/{m}_perimeter.shp')
    blocks.to_file(f'../data/output/shape/blocks/{m}_blocks.shp')
    
    graph_nodes.to_file(f'../data/output/shape/graphs_/nodes/{m}_graph_nodes.shp')
    graph_links.to_file(f'../data/output/shape/graphs_/links/{m}_graph_links.shp')