# Download and visualize urban data

In [None]:
import sys
sys.path.append('..')
import urbanpy as up

import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
from tqdm.auto import tqdm

In [None]:
tqdm.pandas()

## Downloading data for different cities and plotting

In [None]:
ba = up.download.nominatim_osm('Buenos Aires, Argentina') # expected_position is 0 by default
quito = up.download.nominatim_osm('Quito, Ecuador', 1) # second result
lima = up.download.nominatim_osm('Lima, Peru', 2) # third result

In [None]:
lima.plot()
plt.xlabel('lon')
plt.ylabel('lat')
plt.title('Lima')
plt.show()

quito.plot()
plt.xlabel('lon')
plt.ylabel('lat')
plt.title('Quito')
plt.show()

ba.plot()
plt.xlabel('lon')
plt.ylabel('lat')
plt.title('Buenos Aires')
plt.show()

Lima is still missing the Callao polygon, lets download it as well

In [None]:
callao = up.download.nominatim_osm('Callao, Peru', 1)

### Merging layers

In [None]:
lima = up.geom.merge_geom_downloads([lima, callao])

### Downloading population data

In [None]:
pop_per = up.download.hdx_dataset("4e74db39-87f1-4383-9255-eaf8ebceb0c9/resource/317f1c39-8417-4bde-a076-99bd37feefce/download/population_per_2018-10-01.csv.zip")
pop_ecu = up.download.hdx_dataset('58c3ac3f-febd-4222-8969-59c0fe0e7a0d/resource/c05a3c81-a78c-4e6c-ac05-de1316d4ba12/download/population_ecu_2018-10-01.csv.zip')
pop_arg = up.download.hdx_dataset('6cf49080-1226-4eda-8700-a0093cbdfe4d/resource/5737d87f-e17f-4c82-b1bd-d589ed631318/download/population_arg_2018-10-01.csv.zip')

In [None]:
pop_per.head()

### Conversion to Point geometries and hexagons

We got the lat lon coordinates but their are just in numerical format, we need them as geometries to perform spatial operations on the hexagon grid

First, lets filter the national population to our city bounds

In [None]:
pop_lima = up.geom.filter_population(pop_per, lima)
pop_quito = up.geom.filter_population(pop_ecu, quito)
pop_ba = up.geom.filter_population(pop_arg, ba)

Applying filter_population gives us the set of points within our city's bounds and a Point geometry to work with hexagons

In [None]:
pop_lima.head()

### Removing unnecesary features

We don't need the San Lorenzo island. By providing a bounding box we an remove it

In [None]:
pop_lima = up.geom.remove_features(pop_lima, [-12.2,-12, -77.3,-77.17])

Let's generate the hexagon grid using Uber's H3

In [None]:
hex_lima = up.geom.gen_hexagons(8, lima)
hex_quito = up.geom.gen_hexagons(10, quito)
hex_ba = up.geom.gen_hexagons(9, ba)

Notice the effect of the resolution parameter (higher resolution values generate smaller hexagons)

In [None]:
print(hex_lima.shape)
print(hex_quito.shape)
print(hex_ba.shape)

### Merging a layer

We got both the population point geometries and hexagons, lets get the population per hexagon

In [None]:
hex_lima = up.geom.merge_shape_hex(hex_lima, 
                   pop_lima, 
                   how='inner', 
                   op='intersects', 
                   agg={'population_2020': 'sum', 'population_2015':'mean'})

hex_quito = up.geom.merge_shape_hex(hex_quito, 
                   pop_quito, 
                   how='inner', 
                   op='intersects', 
                   agg={'population_2020': 'sum', 'population_2015':'mean'})

hex_ba = up.geom.merge_shape_hex(hex_ba, 
                   pop_ba, 
                   how='inner', 
                   op='intersects', 
                   agg={'population_2020': 'sum', 'population_2015':'mean'})

We've used an inner join to keep only the points that intersect a hexagon (in this case, a within operation gives similar results). We provide a dictionary detailing how to aggregate our fields of interest <br>

Let's visualize the maps

In [None]:
hex_lima.plot(column='population_2020', legend=True, cmap='viridis', figsize=(10,5), missing_kwds={'color':'grey'})
plt.xlabel('lon')
plt.ylabel('lat')
plt.title('Lima')
plt.show()

hex_quito.plot(column='population_2020', legend=True, cmap='viridis', figsize=(10,5), missing_kwds={'color':'grey'})
plt.xlabel('lon')
plt.ylabel('lat')
plt.title('Quito')
plt.show()

hex_ba.plot(column='population_2020', legend=True, cmap='viridis', figsize=(10,5), missing_kwds={'color':'grey'})
plt.xlabel('lon')
plt.ylabel('lat')
plt.title('Buenos Aires')
plt.show()

## Calculate trip distance and duration from each hexagon to the closest food facility

In [None]:
hex_lima['lat'] = hex_lima.geometry.centroid.y
hex_lima['lon'] = hex_lima.geometry.centroid.x

In [None]:
fs = up.download.overpass_pois(bounds=lima.total_bounds, facilities='food')

In [None]:
fs.plot()

Neighborhood search to find closest food facility

In [None]:
dist_up, ind_up = up.utils.nn_search(
    tree_features=fs[['lat', 'lon']].values,
    query_features=hex_lima[['lat', 'lon']].values,
    metric='haversine'
)

In [None]:
hex_lima['nearest_food_facility_ix'] = ind_up

Use OSRM routing server to find walking distance and duration 

In [None]:
# start server
up.routing.start_osrm_server('peru')

In [None]:
# Distancia y duración del viaje a pie
hex_lima[['dist_nn_ff_walk', 'dur_nn_ff_walk']] = hex_lima.progress_apply(
    lambda row: up.routing.osrm_route(
        origin=row.geometry.centroid, 
        destination = fs.iloc[row['nearest_food_facility_ix']]['geometry'],
        profile = 'walking'
    ),
    result_type='expand',
    axis=1,
)

In [None]:
hex_lima['dist_nn_ff_walk'] =  hex_lima['dist_nn_ff_walk'] / 1000 # meters to km
hex_lima['dur_nn_ff_walk'] =  hex_lima['dur_nn_ff_walk'] / 60 # seconds to minutes

In [None]:
up.routing.stop_osrm_server()

## Generate interactive maps

In [None]:
map_figure = up.plotting.choropleth_map(hex_lima, 'population_2020', title='Estimated Population - 2020')

Binarize durations to improve map visualization 

In [None]:
custom_bins, custom_labels = up.utils.create_duration_labels(hex_lima['dur_nn_ff_walk'])

In [None]:
hex_lima['dur_nn_ff_walk_bins'] = pd.cut(hex_lima['dur_nn_ff_walk'], bins=custom_bins, labels=custom_labels)

You can customize your plot usign the same plotly.express.choropletmap parameters

In [None]:
map_figure = up.plotting.choropleth_map(
    hex_lima, 'dur_nn_ff_walk_bins',
    title='Acceso a instalaciones de venta de alimento',
    color_discrete_sequence=px.colors.sequential.Plasma_r, 
    category_orders={'dur_nn_ff_walk_bins': custom_labels}, 
    labels={"dur_nn_ff_walk_bins": "Duración (en minutos)"}
)