# Urbanpy model for Para

Start date: 2022-02-09
Make sure the necessary packages are installed from the `pipenv` file created for this work.

To do this, go to the local directory for this project, and in your terminal run:

`pipenv install`

Then, activate the environment by running 

`pipenv shell`

**Note**: You may need to run `brew install gdal` in terminal and then `pip install urbanpy` in this notebook to get code below to run. Issue submitted about installation difficulties [here](https://github.com/EL-BID/urbanpy/issues/18).

In [None]:
import warnings

warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (10, 10)

# Only needed when git cloning the urbanpy repo
# import sys
# sys.path.append('..')

import urbanpy as up
import geopandas as gpd
import numpy as np
import pandas as pd
import plotly.express as px

from tqdm.notebook import tqdm

tqdm.pandas()

from pandarallel import pandarallel

pandarallel.initialize(progress_bar=True, nb_workers=8)

## Read Para's hexagons

In [None]:
para_hexs = pd.read_csv("outputs/29112023_para_hexs_final.csv", index_col=0)

In [None]:
para_hexs.head()

In [None]:
para_hexs.shape

In [None]:
para_hexs.columns

In [None]:
import h3
from shapely.geometry import Polygon

In [None]:
Polygon(h3.h3_to_geo_boundary("888062d73dfffff", geo_json=True))

In [None]:
para_hexs

In [None]:
%%timeit
# Get polygon from h3 index
para_hexs["geometry"] = para_hexs["hex"].apply(
    lambda x: Polygon(h3.h3_to_geo_boundary(x, geo_json=True))
)

In [None]:
%%timeit
# Get polygon from h3 index
para_hexs["geometry"] = para_hexs["hex"].parallel_apply(
    lambda x: Polygon(h3.h3_to_geo_boundary(x, geo_json=True))
)

In [None]:
para_hexs = gpd.GeoDataFrame(para_hexs, crs="4326")

## Visualize the population data

In [None]:
if True:
    fig = up.plotting.choropleth_map(
        para_hexs.reset_index(drop=True),
        "population_2020",
        title="Para Population - 2020",
        zoom=8,
        color_continuous_scale="Viridis",
        opacity=0.8,
        labels={"population_2020": "Pop. 2020"},
    )

    fig.update_layout(
        margin=dict(l=0, r=0, t=30, b=0),
    )
    fig.update_traces(marker_line_width=0)
    fig.show()

## Urban accessibility - import high quality school data

These data are stored in a csv file named `brazil_schools_census_education_metrics.parquet` that we need to read.

In [None]:
br_schools = gpd.read_parquet("outputs/brazil_schools_census_edu_metrics.parquet")

In [None]:
br_schools.shape

In [None]:
br_schools.head(2)

## Subset schools

Criteria:

1. Only consider schools in Para
1. Only consider Public schools
1. Only consider schools that are active
1. Only consider schools from the level Ensino Fundamental (6 - 14 years old)

In [None]:
# Create the filters
state_filter = br_schools["abbrev_state"] == "PA"
admin_filter = br_schools["admin_category"] == "Pública"
service_filter = (
    br_schools["service_restriction"]
    == "ESCOLA EM FUNCIONAMENTO E SEM RESTRIÇÃO DE ATENDIMENTO"
)
level_filter = br_schools["education_level"].str.contains("Ensino Fundamental")

In [None]:
# Apply the filters
filtered_schools = br_schools[
    state_filter & admin_filter & service_filter & level_filter
]

In [None]:
# Print a small report with the number of schools and percentage of the total
print(f"Total number of schools: {len(br_schools)}")
print(f"Number of schools selected: {len(filtered_schools)}")
print(f"Percentage of the total: {len(filtered_schools) / len(br_schools) * 100:.2f}%")

## Evaluate Accessibility

In [None]:
filtered_schools["lat"] = filtered_schools.geometry.y
filtered_schools["lon"] = filtered_schools.geometry.x

In [None]:
filtered_schools = filtered_schools.dropna(subset=["lat", "lon"])

In [None]:
para_hexs["lat"] = para_hexs.geometry.centroid.y
para_hexs["lon"] = para_hexs.geometry.centroid.x

Get the nearest school from each hexagons centroid 

In [None]:
dist_up, ind_up = up.utils.nn_search(
    tree_features=filtered_schools[["lat", "lon"]].values,  # These are the schools
    query_features=para_hexs[
        ["lat", "lon"]
    ].values,  # Values are the centroids of each hexagon
    metric="haversine",
)

This adds new column to indicate the index of the closest school for a particular hexagon

In [None]:
para_hexs["closest_school_id"] = ind_up
para_hexs["closest_school_dist"] = dist_up

## Download data needed for Para

In [None]:
!cd ~/data/osrm && wget https://download.geofabrik.de/south-america/brazil/norte-latest.osm.pbf

## Start the OSRM server

In [None]:
# Download unix_download.sh file from github repo
!cd .env/lib/python3.11/site-packages/urbanpy/routing/ && wget https://raw.githubusercontent.com/EL-BID/urbanpy/master/urbanpy/routing/unix_download.sh

In [None]:
wget https://download.geofabrik.de/$3/$2-latest.osm.pbf

In [None]:
up.routing.start_osrm_server("norte", "south-america_brazil", "foot")

Then we can do our distance and duration calculations

In [None]:
distance_duration_para_by_foot = para_hexs.parallel_apply(
    lambda row: up.routing.osrm_route(
        origin=row.geometry.centroid,
        destination=filtered_schools.iloc[row["closest_school_id"]]["geometry"],
    ),
    result_type="expand",
    axis=1,
)

## Inspect results

In [None]:
para_hexs["distance_to_school_km_by_foot"] = (
    distance_duration_para_by_foot[0] / 1000
)  # meters to kilometers
para_hexs["duration_to_school_min_by_foot"] = (
    distance_duration_para_by_foot[1] / 60
)  # seconds to minutes

In [None]:
# Once we have finished with the OSRM server we stop it
up.routing.stop_osrm_server("norte", "south-america_brazil", "foot")

## Create map for travel times by foot to nearest schools

In [None]:
para_hexs["duration_to_school_min_by_foot"].describe()

In [None]:
# Reset index is needed to avoid an error with plotly choropleth_map
fig = up.plotting.choropleth_map(
    para_hexs.reset_index(drop=True),
    "duration_to_school_min_by_foot",
    title="Para Estimated travel times to school by foot",
    zoom=8,
    color_continuous_scale="Plasma_r",
    opacity=0.6,
    labels={"duration_to_school_min": "Duration (min)"},
)

fig.update_layout(
    margin=dict(l=0, r=0, t=50, b=0),
)
fig.update_traces(marker_line_width=0.0)
fig.show()

## Making map with bins of duration

First get default categories

In [None]:
custom_bins, custom_labels = up.utils.create_duration_labels(
    hex_para["duration_to_school_min"]
)
print(custom_bins)
print(custom_labels)

Then convert from numerical to categorical

In [None]:
hex_para["duration_column_categories"] = pd.cut(
    hex_para["duration_to_school_min"], bins=custom_bins, labels=custom_labels
)

Then plot

In [None]:
map_figure = up.plotting.choropleth_map(
    hex_para.reset_index(drop=True),
    "duration_column_categories",
    zoom=5,
    opacity=0.6,
    title=state + ". Estimated travel times to school by foot",
    color_discrete_sequence=px.colors.sequential.Plasma_r,
    category_orders={"duration_column_categories": custom_labels},
    labels={"duration_column_categories": "Duration (minutes)"},
)

map_figure.update_layout(
    margin=dict(l=0, r=0, t=50, b=0),
)
map_figure.update_traces(marker_line_width=0.0)
map_figure.show()

In [None]:
up.routing.start_osrm_server("norte", "south-america_brazil", "car")

In [None]:
distance_duration_para_by_car = para_hexs.parallel_apply(
    lambda row: up.routing.osrm_route(
        origin=row.geometry.centroid,
        destination=filtered_schools.iloc[row["closest_school_id"]]["geometry"],
    ),
    result_type="expand",
    axis=1,
)

In [None]:
para_hexs["distance_to_school_km_by_car"] = (
    distance_duration_para_by_car[0] / 1000
)  # meters to kilometers
para_hexs["duration_to_school_min_by_car"] = (
    distance_duration_para_by_car[1] / 60
)  # seconds to minutes

In [None]:
# Once we have finished with the OSRM server we stop it
up.routing.stop_osrm_server("norte", "south-america_brazil", "car")