In [None]:
import urbanpy as up
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as ctx
import rioxarray
import os
from tqdm.auto import tqdm
from shapely.geometry import Polygon, MultiPolygon
from pandarallel import pandarallel
import pandas as pd

tqdm.pandas()

pandarallel.initialize(progress_bar=True)

In [None]:
amazon_limits = gpd.read_parquet("outputs/amazonas_clean.parquet")

In [None]:
country_boundaries = gpd.read_file("inputs/Cartographic Boundary Files/LAC/level 0")

In [None]:
country_boundaries.head()

In [None]:
country_boundaries["ADM0_PCODE"].unique()

In [None]:
countries_ADM0CODE = ["BO", "BR", "CO", "EC", "GY", "PE", "SR", "VE"]
amzn_countries = country_boundaries[
    country_boundaries["ADM0_PCODE"].isin(countries_ADM0CODE)
]

In [None]:
ax = amazon_limits.plot(facecolor="none", edgecolor="black", linewidth=0.7)
amzn_countries.plot("ADM0_PCODE", ax=ax, alpha=0.5)
ctx.add_basemap(ax, crs=amazon_limits.crs.to_string())
ax.set_title("Amazon Countries")
ax.set_axis_off()

In [None]:
amzn_countries.to_parquet("outputs/amazon_countries.parquet")

## WoldPop - Age and sex structures

- Resolution: 100m^2
- Year: 2020
- Classes: 5-year age groups + <1 year
- Version: Constrained


# Raster based version of population data download


In [None]:
worldpop_data = "inputs/WorldPop"
countries = ["per", "col", "guy", "sur", "ven", "bol", "ecu", "bra"]
age_groups = [5, 10, 15]  # 5-9, 10-14, 15-19
genders = ["m", "f"]

In [None]:
def merge_rio_hex(
    hexs: gpd.GeoDataFrame,
    clip_geometries: gpd.GeoDataFrame,
    rio_filename: str,
    data_name: str,
    agg: str,
    band: int = 1,
) -> gpd.GeoDataFrame:
    """
    Merge raster data with hexagons.

    Parameters
    ----------
    hexs : gpd.GeoDataFrame
        GeoDataFrame with hexagons.
    rio_filename : str
        Filename of the raster data.
    agg : str
        Aggregation method to use.

    Returns
    -------
    gpd.GeoDataFrame
        GeoDataFrame with the merged data.
    """
    rds = rioxarray.open_rasterio(rio_filename, masked=True)
    assert hexs.crs.to_string() == rds.rio.crs.to_string(), "CRS do not match"
    clipped = rds.rio.clip(clip_geometries, rds.rio.crs)
    rio_data = clipped.sel(band=band).drop_vars("band")
    rio_data_df = rio_data.to_pandas().unstack().reset_index()
    rio_data_df.columns = ["x", "y", data_name]
    rio_data_df = rio_data_df.dropna()  # Drop NaN values for faster processing
    rio_data_gdf = gpd.GeoDataFrame(
        rio_data_df,
        geometry=gpd.points_from_xy(rio_data_df.x, rio_data_df.y),
        crs=rio_data.rio.crs.to_string(),
    )
    agg_dict = {data_name: agg}
    return up.geom.merge_shape_hex(hexs=hexs, shape=rio_data_gdf, agg=agg_dict)

In [None]:
# Order alphabetically to match
countries_ADM0CODE.sort()
countries.sort()

In [None]:
# Check if order is correct
countries_ADM0CODE, countries

In [None]:
# Get limits and hexagons for every country
countries_geodata = {}
for adm0code, country in tqdm(
    zip(countries_ADM0CODE, countries), total=len(countries_ADM0CODE)
):
    print(f"Processing {country}")

    country_path = f"outputs/{country}"
    os.makedirs(country_path, exist_ok=True)

    limits_fn = os.path.join(country_path, "limits.parquet")
    if os.path.exists(limits_fn):
        country_limits = gpd.read_parquet(limits_fn)
    else:
        country_limits = amzn_countries[amzn_countries["ADM0_PCODE"] == adm0code]
        country_limits.to_parquet(limits_fn)

    hexs_fn = os.path.join(country_path, "hexs.parquet")
    if os.path.exists(hexs_fn):
        country_hexs = gpd.read_parquet(hexs_fn)
    else:
        country_hexs = up.geom.gen_hexagons(resolution=7, city=country_limits)
        country_hexs.to_parquet(hexs_fn)

    print("Number of hexagons:", country_hexs.shape[0])

    countries_geodata[country] = {
        "limits": country_limits,
        "hexs": country_hexs,
    }

In [None]:
# Plot hexagons and limits for all countries
if False:
    fig, axs = plt.subplots(2, 4, figsize=(20, 10))
    axs = axs.flatten()

    for i, (country, data) in enumerate(countries_geodata.items()):
        limits = data["limits"]
        hexs = data["hexs"]
        limits.plot(facecolor="none", edgecolor="black", linewidth=0.7, ax=axs[i])
        hexs.plot(ax=axs[i], alpha=0.5)
        ctx.add_basemap(axs[i], crs=limits.crs.to_string())
        axs[i].set_title(country)
        axs[i].set_axis_off()

    plt.tight_layout()
    plt.show()

In [None]:
# Remove galapagos from ecuador if it exists
if countries_geodata["ecu"]["limits"].cx[-100:-82.5, -5:2].shape[0] != 0:
    ecu_limits = countries_geodata["ecu"]["limits"].copy()
    ecu_hexs = countries_geodata["ecu"]["hexs"].copy()
    # Explode the MultiPolygon into separate polygons
    ecu_limits = ecu_limits.explode()
    # Remove the galapagos islands
    ecu_limits_clipped = ecu_limits.cx[-82.5:-75, -5:2]
    ecu_hexs_clipped = ecu_hexs.cx[-82.5:-75, -5:2]
    # Clip the hexagons to the limits
    countries_geodata["ecu"]["limits"] = ecu_limits_clipped
    countries_geodata["ecu"]["hexs"] = ecu_hexs_clipped
    # Save new limits and hexagons
    ecu_limits_clipped.to_parquet("outputs/ecu/limits.parquet")
    ecu_hexs_clipped.to_parquet("outputs/ecu/hexs.parquet")
else:
    print("Galapagos islands already removed")

In [None]:
# Create a DF to process raster to hex per file
year = "2020"
filenames = []
for country in countries:
    for gender in genders:
        for age in age_groups:
            fn = f"{worldpop_data}/{country}/{country}_{gender}_{age}_{year}.tif"
            assert os.path.exists(fn), f"{fn} does not exist"
            filenames.append([country, gender, age, fn])

print("Number of files", len(filenames))

df = pd.DataFrame(filenames, columns=["country", "gender", "age", "filename"])
df.head()

In [None]:
def raster_hex_in_parallel(row):
    country_path = f"outputs/{row['country']}"
    pop_path = os.path.join(country_path, "population")
    os.makedirs(pop_path, exist_ok=True)

    col_name = row["filename"].split("/")[-1].split(".")[0]
    pop_fn = os.path.join(pop_path, f"{col_name}.parquet")
    if os.path.exists(pop_fn):
        processed_hex = pd.read_parquet(pop_fn)
        pop_col = processed_hex[col_name].to_frame()
    else:
        processed_hex = merge_rio_hex(
            hexs=countries_geodata[row["country"]]["hexs"],
            clip_geometries=countries_geodata[row["country"]]["limits"].geometry,
            rio_filename=row["filename"],
            data_name=col_name,
            agg="sum",
            band=1,
        )
        processed_hex.drop("geometry", axis=1).to_parquet(pop_fn)
        pop_col = processed_hex[col_name].to_frame()

    return pop_col

In [None]:
# Create empty population column to store the data
df["population"] = None

In [None]:
def process_country(country: str, parallel: bool = True):
    output_fn = f"outputs/{country}_hex_population.parquet"
    if os.path.exists(output_fn):
        country_hex_population = gpd.read_parquet(output_fn)
    else:
        country_df = df[df["country"] == country]

        # Expensive process
        if parallel:
            country_df["population"] = country_df.parallel_apply(
                raster_hex_in_parallel, axis=1
            )
        else:
            try:
                country_df["population"] = country_df.progress_apply(
                    raster_hex_in_parallel, axis=1
                )
            except:
                country_df["population"] = country_df.apply(
                    raster_hex_in_parallel, axis=1
                )

        # Concatenate all the results
        processed_hexs_list = country_df["population"].tolist()
        processed_hexs_list.insert(0, countries_geodata[country]["hexs"])
        country_hex_population = pd.concat(processed_hexs_list, axis=1)

        # Save to disk
        country_hex_population.to_parquet(output_fn)

    # Store the data for easy access
    countries_geodata[country]["hexs_population"] = country_hex_population

In [None]:
process_country("guy")

In [None]:
process_country("sur")

In [None]:
process_country("ecu", parallel=False)

In [None]:
process_country("ven", parallel=False)

In [None]:
process_country("bol", parallel=False)

In [None]:
process_country("col", parallel=False)

In [None]:
process_country("per", parallel=False)

In [None]:
# Out of memory error
# process_country("bra", parallel=False)

In [None]:
def plot_country_hexs(country: str):
    # Plot chloropleths of hexagons and limits for each population group (gender, age)
    fig, axs = plt.subplots(2, 3, figsize=(20, 10))
    axs = axs.flatten()

    for i, gender in enumerate(genders):
        for j, age in enumerate(age_groups):
            col_name = f"{country}_{gender}_{age}_{year}"
            ax = countries_geodata[country]["limits"].plot(
                facecolor="none", edgecolor="black", linewidth=0.7, ax=axs[i * 3 + j]
            )
            countries_geodata[country]["hexs_population"].query(f"{col_name} > 0").plot(
                col_name, ax=ax, alpha=0.5, legend=True
            )
            ctx.add_basemap(
                ax, crs=countries_geodata[country]["limits"].crs.to_string()
            )
            ax.set_title(f"{country} - {gender} - {age} to {age+4} years old")
            ax.set_axis_off()

    plt.tight_layout()
    plt.show()

In [None]:
if False:
    plot_country_hexs("sur")
    plot_country_hexs("guy")
    plot_country_hexs("ecu")
    plot_country_hexs("ven")
    plot_country_hexs("bol")
    plot_country_hexs("col")
    plot_country_hexs("per")

In [None]:
for country in countries:
    if country == "bra":
        continue
    country_hex = countries_geodata[country]["hexs_population"]
    country_hex[f"{country}_t_5_2020"] = country_hex[
        [f"{country}_m_5_2020", f"{country}_f_5_2020"]
    ].sum(axis=1)
    country_hex[f"{country}_t_10_2020"] = country_hex[
        [f"{country}_m_10_2020", f"{country}_f_10_2020"]
    ].sum(axis=1)
    country_hex[f"{country}_t_15_2020"] = country_hex[
        [f"{country}_m_15_2020", f"{country}_f_15_2020"]
    ].sum(axis=1)
    country_hex["pob_primaria"] = country_hex[f"{country}_t_5_2020"]
    country_hex["pob_secundaria"] = country_hex[
        [f"{country}_t_10_2020", f"{country}_t_15_2020"]
    ].sum(axis=1)
    country_hex["country"] = country

In [None]:
br_pop_hex_7 = gpd.read_parquet(
    "/Users/claudio/Documents/edu-brazil/outputs/br_pop_hex_7.parquet"
)
countries_geodata["bra"]["hexs_population"] = br_pop_hex_7

In [None]:
br_pop_hex_7.head()

In [None]:
amazonas_hexs_7 = gpd.read_parquet("outputs/amazonas_hexs_7.parquet")

In [None]:
for country in countries:
    print(country)
    country_hex = countries_geodata[country]["hexs_population"]
    country_hex["amazonian_region"] = country_hex["hex"].isin(amazonas_hexs_7["hex"])

In [None]:
country_hex.head()

In [None]:
complete_schools = gpd.read_parquet("outputs/complete_schools.parquet")

In [None]:
complete_schools["country"] = complete_schools["Pais"].str.lower().str[:3]

In [None]:
complete_schools.head()

In [None]:
complete_schools["lat"] = complete_schools.geometry.y
complete_schools["lon"] = complete_schools.geometry.x

In [None]:
complete_schools[complete_schools["lat"].isna()]["Pais"].unique()

In [None]:
complete_schools_dropna = complete_schools.dropna(subset=["lat", "lon"])

In [None]:
primary_schools = complete_schools_dropna[
    complete_schools_dropna["EduNivelPrimaria"] == 1
]
secondary_schools = complete_schools_dropna[
    complete_schools_dropna["EduNivelSecundariaTotal"] == 1
]

In [None]:
# Remove hexagons without population
for country in countries:
    print(country)
    country_hex = countries_geodata[country]["hexs_population"]
    print(
        "before", country_hex.shape, countries_geodata[country]["hexs_population"].shape
    )
    country_hex = country_hex[
        country_hex["pob_primaria"] + country_hex["pob_primaria"] > 0
    ]
    countries_geodata[country]["hexs_population"] = country_hex
    print(
        "after", country_hex.shape, countries_geodata[country]["hexs_population"].shape
    )

In [None]:
# Distance calculation for primary schools
for country in countries:
    print(country)
    country_hex = countries_geodata[country]["hexs_population"]
    # Get lat, lon from centroids
    country_hex["lat"] = country_hex.geometry.centroid.y
    country_hex["lon"] = country_hex.geometry.centroid.x

    dist_up, ind_up = up.utils.nn_search(
        # These are the primary schools for the country
        tree_features=primary_schools[primary_schools["country"] == country][
            ["lat", "lon"]
        ].values,
        # Values are the centroids of each hexagon with population in primary school
        query_features=country_hex[["lat", "lon"]].values,
        metric="haversine",
    )
    country_hex["closest_primary_school_id"] = ind_up
    country_hex["closest_primary_school_dist"] = dist_up
    countries_geodata[country]["hexs_population"] = country_hex

In [None]:
# Distance calculation for secondary schools
for country in countries:
    print(country)
    country_hex = countries_geodata[country]["hexs_population"]

    dist_up, ind_up = up.utils.nn_search(
        # These are the primary schools for the country
        tree_features=secondary_schools[secondary_schools["country"] == country][
            ["lat", "lon"]
        ].values,
        # Values are the centroids of each hexagon with population in primary school
        query_features=country_hex[["lat", "lon"]].values,
        metric="haversine",
    )
    country_hex["closest_secondary_school_id"] = ind_up
    country_hex["closest_secondary_school_dist"] = dist_up

    countries_geodata[country]["hexs_population"] = country_hex

In [None]:
# Filter only in amazonia
for country in countries:
    print(country)
    country_hex = countries_geodata[country]["hexs_population"]
    countries_geodata[country]["hexs_amazonia"] = country_hex[
        country_hex["amazonian_region"]
    ]

In [None]:
for country in countries:
    print(country)
    country_hex = countries_geodata[country]["hexs_population"]
    country_hex["pob_primaria_peso"] = (
        country_hex["pob_primaria"] / country_hex["pob_primaria"].sum()
    )
    country_hex["pob_secundaria_peso"] = (
        country_hex["pob_secundaria"] / country_hex["pob_secundaria"].sum()
    )
    countries_geodata[country]["hexs_amazonia"] = country_hex[
        country_hex["amazonian_region"]
    ]

In [None]:
selected_vars = [
    "country",
    "hex",
    "pob_primaria",
    "pob_secundaria",
    "closest_primary_school_id",
    "closest_primary_school_dist",
    "closest_secondary_school_id",
    "closest_secondary_school_dist",
    "lat",
    "lon",
    "geometry",
]

In [None]:
countries_geodata[country]["hexs_amazonia"][selected_vars].head()

In [None]:
countries_geodata["per"]["hexs_amazonia"][selected_vars].to_file(
    "outputs/per/hexs_amazonia.shp", driver="ESRI Shapefile"
)

In [None]:
for country in countries:
    output_pq_file = f"outputs/{country}_hexs_amazonia.parquet"
    output_geojson_file = f"outputs/{country}_hexs_amazonia.geojson"
    if os.path.exists(output_pq_file):
        print(f"{output_pq_file} exists")
    else:
        countries_geodata[country]["hexs_amazonia"][selected_vars].to_parquet(
            output_pq_file
        )
        countries_geodata[country]["hexs_amazonia"][selected_vars].to_file(
            output_geojson_file, driver="GeoJSON"
        )

In [None]:
amazonia_complete_hexs = pd.concat(
    [
        countries_geodata[country]["hexs_amazonia"][selected_vars]
        for country in countries
    ],
    axis=0,
)

In [None]:
amazonia_complete_hexs[
    "closest_primary_school_dist"
].describe()  # kilometros (distancia)

In [None]:
amazonia_complete_hexs.iloc[
    amazonia_complete_hexs["closest_primary_school_dist"].argmax()
]

In [None]:
primary_schools[primary_schools["country"] == "bra"].iloc[3706]

In [None]:
# Average distance to primary school per pop in primary school age = 5.862020812925963
(
    amazonia_complete_hexs["pob_primaria"]
    * amazonia_complete_hexs["closest_primary_school_dist"]
).sum() / amazonia_complete_hexs["pob_primaria"].sum()

In [None]:
for country in countries:
    country_filter = amazonia_complete_hexs[
        amazonia_complete_hexs["country"] == country
    ]
    country_mean = (
        country_filter["pob_primaria"] * country_filter["closest_primary_school_dist"]
    ).sum() / country_filter["pob_primaria"].sum()
    print(country, country_mean)

In [None]:
# Average distance to secondary school per pop in secondary school age
(
    amazonia_complete_hexs["pob_secundaria"]
    * amazonia_complete_hexs["closest_secondary_school_dist"]
).sum() / amazonia_complete_hexs["pob_secundaria"].sum()

In [None]:
primary_custom_bins = [
    0,
    2.5,
    5,
    10,
    20,
    amazonia_complete_hexs["closest_primary_school_dist"].max(),
]
amazonia_complete_hexs["closest_primary_school_dist_cat"] = pd.cut(
    amazonia_complete_hexs["closest_primary_school_dist"], bins=primary_custom_bins
)
secondary_custom_bins = [
    0,
    2.5,
    5,
    10,
    20,
    amazonia_complete_hexs["closest_secondary_school_dist"].max(),
]
amazonia_complete_hexs["closest_secondary_school_dist_cat"] = pd.cut(
    amazonia_complete_hexs["closest_secondary_school_dist"], bins=secondary_custom_bins
)

In [None]:
# Calculate the population for each distance category
pop_primaria_by_duration_total = (
    amazonia_complete_hexs.groupby("closest_primary_school_dist_cat")["pob_primaria"]
    .sum()
    .reset_index()
)
pop_primaria_by_duration_total["population_label"] = "Primary School Population"

# # Calculate the percentage of the total population for each travel time category
# pop_primaria_by_duration_total["prop_pob_primaria"] = (
#     pop_primaria_by_duration_total["pob_primaria"]
#     * 100
#     / amazonia_complete_hexs["pob_primaria"].sum()
# ).round(2).astype(str) + " %"

In [None]:
pop_primaria_by_duration_total

In [None]:
pop_primaria_by_duration_total.to_csv("pop_primaria_by_duration_total.csv")

In [None]:
# Calculate the population for each distance category
pop_primaria_by_duration_total = (
    amazonia_complete_hexs.groupby("closest_primary_school_dist_cat")["pob_primaria"]
    .sum()
    .reset_index()
)
pop_primaria_by_duration_total["population_label"] = "Primary School Population"

# Calculate the percentage of the total population for each travel time category
pop_primaria_by_duration_total["prop_pob_primaria"] = (
    pop_primaria_by_duration_total["pob_primaria"]
    / amazonia_complete_hexs["pob_primaria"].sum()
    * 100
).round(2).astype(str) + " %"

In [None]:
pop_primaria_by_duration_total["pob_primaria"].sum()

In [None]:
pop_primaria_by_duration_total.to_excel("pop_primaria_by_distancia_amazonia_total.xlsx")

In [None]:
pop_secundaria_by_duration_total = (
    amazonia_complete_hexs.groupby("closest_secondary_school_dist_cat")[
        "pob_secundaria"
    ]
    .sum()
    .reset_index()
)
pop_secundaria_by_duration_total["population_label"] = "Secondary School Population"

# Calculate the percentage of the total population for each travel time category
pop_secundaria_by_duration_total["prop_pob_secundaria"] = (
    pop_secundaria_by_duration_total["pob_secundaria"]
    / amazonia_complete_hexs["pob_secundaria"].sum()
    * 100
).round(2).astype(str) + " %"

In [None]:
pop_secundaria_by_duration_total.to_excel("pop_secundaria_by_duration_total.xlsx")

In [None]:
pop_secundaria_by_duration_total

In [None]:
# Calculate the population for each distance category
pop_primaria_by_duration_country = (
    amazonia_complete_hexs.groupby(["country", "closest_primary_school_dist_cat"])[
        "pob_primaria"
    ]
    .sum()
    .reset_index()
)
pop_primaria_by_duration_country["population_label"] = "Primary School Population"

In [None]:
pop_primaria_by_duration_country.to_excel("pop_primaria_distancia_by_country.xlsx")

In [None]:
# Calculate the population for each distance category
pop_secundaria_by_duration_country = (
    amazonia_complete_hexs.groupby(["country", "closest_secondary_school_dist_cat"])[
        "pob_secundaria"
    ]
    .sum()
    .reset_index()
)
pop_secundaria_by_duration_country["population_label"] = "Secondary School Population"

In [None]:
pop_secundaria_by_duration_country.to_excel("pop_secundaria_distance_by__country.xlsx")

In [None]:
# amazonas_limits.plot(facecolor="none", edgecolor="black", linewidth=0.7)
ax = amazonia_complete_hexs.plot(
    "closest_primary_school_dist_cat",
    # ax=ax,
    linewidth=0,
    cmap="viridis",
    # categories=["0-15", "15-30", "30-45", "45-60", "60-90", "90-120", ">120"],
    legend=True,
    # cax=cax,
    vmin=0,
    vmax=40,
)
ctx.add_basemap(
    ax,
    crs=amazonia_complete_hexs.crs.to_string(),
    source=ctx.providers.CartoDB.Positron,
)
ax.set_axis_off()

In [None]:
import seaborn as sns

sns.set_theme(style="ticks", palette="pastel")

In [None]:
amazonia_complete_hexs["closest_primary_school_dist_clipped"] = amazonia_complete_hexs[
    "closest_primary_school_dist"
].clip(0, 50)

In [None]:
amazonia_complete_hexs[
    amazonia_complete_hexs["closest_primary_school_dist"] < 18
].groupby("country")["pob_primaria"].sum().round(0)

In [None]:
amazonia_complete_hexs["closest_primary_school_dist"].hist()

In [None]:
# Draw a nested boxplot to show bills by day and time
sns.boxplot(
    x="country", y="closest_primary_school_dist_clipped", data=amazonia_complete_hexs
)
sns.despine(offset=10, trim=True)

In [None]:
sns.violinplot(
    x="country", y="closest_primary_school_dist", data=amazonia_complete_hexs
)

In [None]:
amazonia_complete_hexs.head()

In [None]:
amazonia_complete_hexs[amazonia_complete_hexs["pob_primaria"] < 1].shape

In [None]:
amazonia_complete_hexs[amazonia_complete_hexs["pob_primaria"] > 1].shape

In [None]:
amazonia_complete_hexs.head()

In [None]:
amazonia_complete_hexs["closest_primary_school_dist_cat"]

In [None]:
amazonia_complete_hexs["closest_secondary_school_dist_cat"]

In [None]:
type(amazonia_complete_hexs)

In [None]:
# Save the complete dataset as parquet
amazonia_complete_hexs_temp = amazonia_complete_hexs.copy()

# Convert Categorical to string to be able to save in parquet format
amazonia_complete_hexs_temp["closest_primary_school_dist_cat"] = (
    amazonia_complete_hexs_temp["closest_primary_school_dist_cat"].astype(str)
)
amazonia_complete_hexs_temp["closest_secondary_school_dist_cat"] = (
    amazonia_complete_hexs_temp["closest_secondary_school_dist_cat"].astype(str)
)

amazonia_complete_hexs_temp.to_parquet("outputs/amazonia_hexs_7.parquet")

In [None]:
amazonas_hexs5 = up.geom.resolution_downsampling(
    amazonia_complete_hexs[
        (amazonia_complete_hexs["pob_primaria"] > 1)
        | (amazonia_complete_hexs["pob_secundaria"] > 1)
    ],
    "hex",
    5,
    {
        "closest_primary_school_dist": "mean",
        "closest_secondary_school_dist": "mean",
    },
)

In [None]:
amazonas_hexs5["closest_primary_school_dist_cat"] = pd.cut(
    amazonas_hexs5["closest_primary_school_dist"], bins=secondary_custom_bins
)

In [None]:
amazonas_hexs5["closest_primary_school_dist_cat"] = amazonas_hexs5[
    "closest_primary_school_dist_cat"
].astype(str)
amazonas_hexs5["closest_primary_school_dist_label"] = amazonas_hexs5[
    "closest_primary_school_dist_cat"
].replace(
    {
        "(0.0, 2.5]": "<2.5",
        "(2.5, 5.0]": "2.5-5",
        "(5.0, 10.0]": "5-10",
        "(10.0, 20.0]": "10-20",
        "(20.0, 384.278]": ">20",
    }
)

In [None]:
amazonas_hexs5["closest_primary_school_dist_label"].unique()

In [None]:
amazonas_hexs5["closest_secondary_school_dist_cat"] = pd.cut(
    amazonas_hexs5["closest_secondary_school_dist"], bins=primary_custom_bins
)

In [None]:
amazonas_hexs5["closest_secondary_school_dist_cat"] = amazonas_hexs5[
    "closest_secondary_school_dist_cat"
].astype(str)
amazonas_hexs5["closest_secondary_school_dist_label"] = amazonas_hexs5[
    "closest_secondary_school_dist_cat"
].replace(
    {
        "(0.0, 2.5]": "<2.5",
        "(2.5, 5.0]": "2.5-5",
        "(5.0, 10.0]": "5-10",
        "(10.0, 20.0]": "10-20",
        "(20.0, 375.971]": ">20",
    }
)

In [None]:
amazonas_hexs5["closest_secondary_school_dist_cat"].unique()

In [None]:
# amazonas_limits.plot(facecolor="none", edgecolor="black", linewidth=0.7)
ax = amazonas_hexs5.plot(
    "closest_primary_school_dist_label",
    # ax=ax,
    linewidth=0,
    # alpha=0.5,
    cmap="viridis",
    categories=["<2.5", "2.5-5", "5-10", "10-20", ">20"],
    legend=True,
    figsize=(10, 10),
    # cax=cax,
)
# primary_schools.plot(ax=ax, color="red", markersize=0.01)
amzn_countries.plot(facecolor="none", edgecolor="grey", ax=ax, linewidth=1)
amazon_limits.plot(facecolor="none", edgecolor="black", linewidth=0.7, ax=ax)

ax.set_xlim(-82, -40)
ax.set_ylim(-22, 12)

ctx.add_basemap(
    ax, crs=amazonas_hexs5.crs.to_string(), source=ctx.providers.CartoDB.Positron
)
ctx.add_basemap(
    ax,
    crs=amazonas_hexs5.crs.to_string(),
    source=ctx.providers.CartoDB.PositronOnlyLabels,
)
ax.set_axis_off()

plt.savefig("closest_primary_school_dist_label.png", dpi=300, bbox_inches="tight")
plt.savefig("closest_primary_school_dist_label.svg", dpi=300, bbox_inches="tight")

In [None]:
(amazonas_hexs5["closest_secondary_school_dist_label"] == "nan").sum()

In [None]:
# amazonas_limits.plot(facecolor="none", edgecolor="black", linewidth=0.7)
ax = amazonas_hexs5[
    amazonas_hexs5["closest_secondary_school_dist_label"] != "nan"
].plot(
    "closest_secondary_school_dist_label",
    # ax=ax,
    linewidth=0,
    # alpha=0.5,
    cmap="viridis",
    categories=["<2.5", "2.5-5", "5-10", "10-20", ">20"],
    legend=True,
    figsize=(10, 10),
    # cax=cax,
)
# primary_schools.plot(ax=ax, color="red", markersize=0.01)
amzn_countries.plot(facecolor="none", edgecolor="grey", ax=ax, linewidth=1)
amazon_limits.plot(facecolor="none", edgecolor="black", linewidth=0.7, ax=ax)

ax.set_xlim(-82, -40)
ax.set_ylim(-22, 12)

ctx.add_basemap(
    ax, crs=amazonas_hexs5.crs.to_string(), source=ctx.providers.CartoDB.Positron
)
ctx.add_basemap(
    ax,
    crs=amazonas_hexs5.crs.to_string(),
    source=ctx.providers.CartoDB.PositronOnlyLabels,
)
ax.set_axis_off()

plt.savefig("closest_secondary_school_dist_label.png", dpi=300, bbox_inches="tight")
plt.savefig("closest_secondary_school_dist_label.svg", dpi=300, bbox_inches="tight")

In [None]:
fig, (ax, ax1) = plt.subplots(1, 2, figsize=(20, 10))

# divider = make_axes_locatable(ax)
# cax = divider.append_axes("right", size="5%", pad=0.1)

amazonas_limits.plot(facecolor="none", edgecolor="black", linewidth=0.7, ax=ax)
amazonas_hexs5.plot(
    "closest_primary_school_dur_cat",
    ax=ax,
    linewidth=0.3,
    cmap="viridis",
    categories=['0-15', '15-30', '30-45', '45-60', '60-90', '90-120', '>120']
    legend=True,
    # cax=cax,
)
ctx.add_basemap(ax, crs=amazonas_limits.crs.to_string())
ax.set_axis_off()

divider = make_axes_locatable(ax1)
cax1 = divider.append_axes("right", size="5%", pad=0.1)

amazonas_limits.plot(facecolor="none", edgecolor="black", linewidth=0.7, ax=ax1)
amazonas_hexs4.query("EduNivelSecundariaTotal > 0").plot(
    "EduNivelSecundariaTotal",
    ax=ax1,
    linewidth=0.3,
    cmap="viridis",
    categorical=False,
    legend=True,
    vmin=0,
    vmax=10,
    cax=cax1,
)
ctx.add_basemap(ax1, crs=amazonas_limits.crs.to_string())
ax1.set_axis_off()

plt.tight_layout()
plt.savefig(
    "outputs/amazonas_hexs_4_2maps_schools.svg",
    dpi=300,
    bbox_inches="tight",
    format="svg",
)

In [None]:
df = pd.DataFrame(
    amazonia_complete_hexs, columns=amazonia_complete_hexs["country"].unique()
)
df.plot.box(ax=ax)
ax.set_ylabel("accesibility")