In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import urbanpy as up
from tqdm.notebook import tqdm

In [None]:
tqdm.pandas()

In [None]:
gdf_celdas = gpd.read_parquet("outputs/celdas_countries_wo_br_02_06_2025.parquet")
print(f"{gdf_celdas.shape[0]} Cells loaded")

In [None]:
gdf_schools = gpd.read_parquet("outputs/amazon_schools.parquet")
print(f"{gdf_schools.shape[0]} Cells loaded")

In [None]:
gdf_schools.loc[:, "lat"] = gdf_schools["geometry"].y
gdf_schools.loc[:, "lon"] = gdf_schools["geometry"].x

In [None]:
gdf_celdas.head()

In [None]:
gdf_celdas["code"].value_counts()

In [None]:
gdf_celdas["code"].isna().sum()

In [None]:
gdf_celdas_peru = gdf_celdas[gdf_celdas["code"] == "per"]

In [None]:
gdf_schools["country_code"].value_counts()

In [None]:
gdf_schools_peru = gdf_schools[gdf_schools["country_code"] == "PER"]

In [None]:
assert (
    gdf_schools_peru.crs.to_string() == gdf_celdas.crs.to_string()
), "CRS do not match"

In [None]:
gdf_schools_peru.head()

In [None]:
gdf_primary_schools_peru = gdf_schools_peru[gdf_schools_peru["nivel_primaria"] == 1]
gdf_middle_schools_peru = gdf_schools_peru[gdf_schools_peru["nivel_media"] == 1]
gdf_secondary_schools_peru = gdf_schools_peru[gdf_schools_peru["nivel_secundaria"] == 1]

In [None]:
total_schools = gdf_schools_peru.shape[0]
print("total # of schools in peru", gdf_schools_peru.shape[0])
print("primary:", gdf_primary_schools_peru.shape[0])
print("middle:", gdf_middle_schools_peru.shape[0])
print("secondary:", gdf_secondary_schools_peru.shape[0])
print(
    "add primary, middle, and secondary:",
    gdf_primary_schools_peru.shape[0]
    + gdf_middle_schools_peru.shape[0]
    + gdf_secondary_schools_peru.shape[0],
)

In [None]:
# start server
up.routing.start_osrm_server("south-america/peru", "foot")

In [None]:
gdf_celdas_peru_access_primaria = up.accessibility.travel_times(
    gdf_celdas_peru, gdf_primary_schools_peru, "primary_schools"
)

In [None]:
gdf_celdas_peru_access_primaria

In [None]:
gdf_primary_schools_peru.reset_index()

In [None]:
# Obtain the school unique_id and lat_lon
gdf_celdas_peru_access_primaria = pd.merge(
    gdf_celdas_peru_access_primaria,
    gdf_primary_schools_peru.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_primary_schools_ix",
    right_index=True,
    suffixes=("", "_primary_school"),
)

In [None]:
gdf_celdas_peru_access_middle = up.accessibility.travel_times(
    gdf_celdas_peru_access_primaria, gdf_middle_schools_peru, "middle_schools"
)

In [None]:
# Obtain the middle school unique_id and lat_lon
gdf_celdas_peru_access_middle = pd.merge(
    gdf_celdas_peru_access_middle,
    gdf_middle_schools_peru.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_middle_schools_ix",
    right_index=True,
    suffixes=("", "_middle_school"),
)

In [None]:
gdf_celdas_peru_access_middle.head()

In [None]:
gdf_celdas_peru_access_total = up.accessibility.travel_times(
    gdf_celdas_peru_access_middle, gdf_secondary_schools_peru, "secondary_schools"
)

In [None]:
# Obtain the secondary school unique_id and lat_lon
gdf_celdas_peru_access_total = pd.merge(
    gdf_celdas_peru_access_total,
    gdf_secondary_schools_peru.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_secondary_schools_ix",
    right_index=True,
    suffixes=("", "_secondary_school"),
)

In [None]:
gdf_celdas_peru_access_total.head()

In [None]:
gdf_celdas_peru_access_total["category"].value_counts(), gdf_celdas_peru_access_total[
    "category"
].isna().sum()

In [None]:
import seaborn as sns

In [None]:
# Group by settlement type (smod) and calculate mean durations
grouped = (
    gdf_celdas_peru_access_total.groupby("category")[
        ["duration_to_nearest_primary_schools", "duration_to_nearest_secondary_schools"]
    ]
    .mean()
    .reset_index()
)

# Plot
grouped_melted = grouped.melt(
    id_vars="category", var_name="School Type", value_name="Avg Duration (min)"
)
sns.barplot(
    data=grouped_melted, x="category", y="Avg Duration (min)", hue="School Type"
)
plt.title("Average Travel Duration to Nearest Primary Schools by Settlement Type")
plt.ylabel("Minutes")
plt.xlabel("Settlement Type")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 6), sharey=True)

# Plot for primary schools
gdf_celdas_peru_access_total[
    "duration_to_nearest_primary_schools_label"
].value_counts().plot(kind="bar", ax=axes[0])
axes[0].set_title("Distribution of Access Durations to Primary Schools")
axes[0].set_xlabel("Duration Category")
axes[0].set_ylabel("Number of Cells")
# axes[0].set_xticklabels(gdf_celdas_peru_access_total['duration_to_nearest_primary_schools_label'].unique().index, rotation=45)

# Plot for middle schools
gdf_celdas_peru_access_total[
    "duration_to_nearest_middle_schools_label"
].value_counts().plot(kind="bar", ax=axes[1], color="green")
axes[1].set_title("Distribution of Access Durations to Middle Schools")
axes[1].set_xlabel("Duration Category")

# Plot for secondary schools
gdf_celdas_peru_access_total[
    "duration_to_nearest_secondary_schools_label"
].value_counts().plot(kind="bar", ax=axes[2], color="orange")
axes[2].set_title("Distribution of Access Durations to Secondary Schools")
axes[2].set_xlabel("Duration Category")
# axes[1].set_xticklabels(secondary_labels.index, rotation=45)

axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)

plt.tight_layout()
plt.show()

In [None]:
gdf_celdas_peru_access_total.head()

In [None]:
# Save the results
# gdf_celdas_peru_access.to_parquet("outputs/celdas_peru_access.parquet")
gdf_celdas_peru_access_total.to_parquet("outputs/celdas_peru_access_v3.parquet")

In [None]:
up.routing.stop_osrm_server("south-america/peru", "foot")

In [None]:
gdf_celdas["code"].value_counts()

In [None]:
gdf_schools["country_code"].value_counts()

In [None]:
# Select the data for the country
gdf_celdas_bol = gdf_celdas[gdf_celdas["code"] == "bol"]
gdf_schools_bol = gdf_schools[gdf_schools["country_code"] == "BOL"]

In [None]:
gdf_schools_bol

In [None]:
gdf_primary_schools_bol = gdf_schools_bol[gdf_schools_bol["nivel_primaria"] == 1]
gdf_middle_schools_bol = gdf_schools_bol[gdf_schools_bol["nivel_media"] == 1]
gdf_secondary_schools_bol = gdf_schools_bol[gdf_schools_bol["nivel_secundaria"] == 1]

In [None]:
print("total # of schools in bolivia", gdf_schools_bol.shape[0])
print("primary:", gdf_primary_schools_bol.shape[0])
print("middle:", gdf_middle_schools_bol.shape[0])
print("secondary:", gdf_secondary_schools_bol.shape[0])
print(
    "add primary, middle, and secondary:",
    gdf_primary_schools_bol.shape[0]
    + gdf_middle_schools_bol.shape[0]
    + gdf_secondary_schools_bol.shape[0],
)

In [None]:
up.routing.start_osrm_server("south-america/bolivia", "foot")

In [None]:
gdf_celdas_bol_access_primary = up.accessibility.travel_times(
    gdf_celdas_bol, gdf_primary_schools_bol, "primary_schools"
)

In [None]:
# Obtain the school unique_id and lat_lon
gdf_celdas_bol_access_primary = pd.merge(
    gdf_celdas_bol_access_primary,
    gdf_primary_schools_bol.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_primary_schools_ix",
    right_index=True,
    suffixes=("", "_primary_school"),
)

In [None]:
gdf_celdas_bol_access_middle = up.accessibility.travel_times(
    gdf_celdas_bol_access_primary, gdf_middle_schools_bol, "middle_schools"
)

In [None]:
# Obtain the middle school unique_id and lat_lon
gdf_celdas_bol_access_middle = pd.merge(
    gdf_celdas_bol_access_middle,
    gdf_middle_schools_bol.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_middle_schools_ix",
    right_index=True,
    suffixes=("", "_middle_school"),
)

In [None]:
gdf_celdas_bol_access_middle.head()

In [None]:
gdf_celdas_bol_access_total = up.accessibility.travel_times(
    gdf_celdas_bol_access_middle, gdf_secondary_schools_bol, "secondary_schools"
)

In [None]:
# Obtain the secondary school unique_id and lat_lon
gdf_celdas_bol_access_total = pd.merge(
    gdf_celdas_bol_access_total,
    gdf_secondary_schools_bol.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_secondary_schools_ix",
    right_index=True,
    suffixes=("", "_secondary_school"),
)

In [None]:
gdf_celdas_bol_access_total

In [None]:
up.routing.stop_osrm_server("south-america/bolivia", "foot")
# gdf_celdas_bol_access.head()

In [None]:
# Save the results
# gdf_celdas_bol_access.to_parquet("outputs/celdas_bol_access.parquet")
gdf_celdas_bol_access_total.to_parquet("outputs/celdas_bol_access_v3.parquet")

In [None]:
# Select the data for the country
gdf_celdas_ecu = gdf_celdas[gdf_celdas["code"] == "ecu"]
gdf_schools_ecu = gdf_schools[gdf_schools["country_code"] == "ECU"]

In [None]:
gdf_primary_schools_ecu = gdf_schools_ecu[gdf_schools_ecu["nivel_primaria"] == 1]
gdf_middle_schools_ecu = gdf_schools_ecu[gdf_schools_ecu["nivel_media"] == 1]
gdf_secondary_schools_ecu = gdf_schools_ecu[gdf_schools_ecu["nivel_secundaria"] == 1]

In [None]:
print("total # of schools in ecuador", gdf_schools_ecu.shape[0])
print("primary:", gdf_primary_schools_ecu.shape[0])
print("middle:", gdf_middle_schools_ecu.shape[0])
print("secondary:", gdf_secondary_schools_ecu.shape[0])
print(
    "add primary, middle, and secondary:",
    gdf_primary_schools_ecu.shape[0]
    + gdf_middle_schools_ecu.shape[0]
    + gdf_secondary_schools_ecu.shape[0],
)

In [None]:
# start server
up.routing.start_osrm_server("south-america/ecuador", "foot")

In [None]:
gdf_celdas_ecu_access_primary = up.accessibility.travel_times(
    gdf_celdas_ecu, gdf_primary_schools_ecu, "primary_schools"
)

In [None]:
# Obtain the school unique_id and lat_lon
gdf_celdas_ecu_access_primary = pd.merge(
    gdf_celdas_ecu_access_primary,
    gdf_primary_schools_ecu.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_primary_schools_ix",
    right_index=True,
    suffixes=("", "_primary_school"),
)

In [None]:
gdf_celdas_ecu_access_middle = up.accessibility.travel_times(
    gdf_celdas_ecu, gdf_middle_schools_ecu, "middle_schools"
)

In [None]:
# Obtain the school unique_id and lat_lon
gdf_celdas_ecu_access_middle = pd.merge(
    gdf_celdas_ecu_access_middle,
    gdf_middle_schools_ecu.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_middle_schools_ix",
    right_index=True,
    suffixes=("", "_middle_school"),
)

In [None]:
gdf_celdas_ecu_access_total = up.accessibility.travel_times(
    gdf_celdas_ecu_access_middle, gdf_secondary_schools_ecu, "secondary_schools"
)

In [None]:
# Obtain the secondary school unique_id and lat_lon
gdf_celdas_ecu_access_total = pd.merge(
    gdf_celdas_ecu_access_total,
    gdf_secondary_schools_ecu.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_secondary_schools_ix",
    right_index=True,
    suffixes=("", "_secondary_school"),
)

In [None]:
# gdf_celdas_ecu_access.to_parquet("outputs/celdas_ecu_access.parquet")
gdf_celdas_ecu_access_total.to_parquet("outputs/celdas_ecu_access_v3.parquet")

In [None]:
up.routing.stop_osrm_server("south-america/ecuador", "foot")

In [None]:
gdf_celdas_ecu_access_total.head()

In [None]:
# Select the data for the country
gdf_celdas_col = gdf_celdas[gdf_celdas["code"] == "col"]
gdf_schools_col = gdf_schools[gdf_schools["country_code"] == "COL"]

In [None]:
gdf_primary_schools_col = gdf_schools_col[gdf_schools_col["nivel_primaria"] == 1]
gdf_middle_schools_col = gdf_schools_col[gdf_schools_col["nivel_media"] == 1]
gdf_secondary_schools_col = gdf_schools_col[gdf_schools_col["nivel_secundaria"] == 1]

In [None]:
print("total # of schools in colombia", gdf_schools_col.shape[0])
print("primary:", gdf_primary_schools_col.shape[0])
print("middle:", gdf_middle_schools_col.shape[0])
print("secondary:", gdf_secondary_schools_col.shape[0])
print(
    "add primary, middle, and secondary:",
    gdf_primary_schools_col.shape[0]
    + gdf_middle_schools_col.shape[0]
    + gdf_secondary_schools_col.shape[0],
)

In [None]:
# start server
up.routing.start_osrm_server("south-america/colombia", "foot")

In [None]:
gdf_celdas_col_access_primary = up.accessibility.travel_times(
    gdf_celdas_col, gdf_primary_schools_col, "primary_schools"
)

In [None]:
# Obtain the school unique_id and lat_lon
gdf_celdas_col_access_primary = pd.merge(
    gdf_celdas_col_access_primary,
    gdf_primary_schools_col.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_primary_schools_ix",
    right_index=True,
    suffixes=("", "_primary_school"),
)

In [None]:
gdf_celdas_col_access_middle = up.accessibility.travel_times(
    gdf_celdas_col, gdf_middle_schools_col, "middle_schools"
)

In [None]:
# Obtain the school unique_id and lat_lon
gdf_celdas_col_access_middle = pd.merge(
    gdf_celdas_col_access_middle,
    gdf_middle_schools_col.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_middle_schools_ix",
    right_index=True,
    suffixes=("", "_middle_school"),
)

In [None]:
gdf_celdas_col_access_total = up.accessibility.travel_times(
    gdf_celdas_col_access_middle, gdf_secondary_schools_col, "secondary_schools"
)

In [None]:
# Obtain the secondary school unique_id and lat_lon
gdf_celdas_col_access_total = pd.merge(
    gdf_celdas_col_access_total,
    gdf_secondary_schools_col.reset_index()[["id_edificio", "lat", "lon"]],
    how="left",
    left_on="nearest_secondary_schools_ix",
    right_index=True,
    suffixes=("", "_secondary_school"),
)

In [None]:
# gdf_celdas_ecu_access.to_parquet("outputs/celdas_ecu_access.parquet")
gdf_celdas_col_access_total.to_parquet("outputs/celdas_col_access_v3.parquet")

In [None]:
up.routing.stop_osrm_server("south-america/colombia", "foot")

In [None]:
gdf_celdas_col_access_total.head()

In [None]:
# Read and concatenate them into a single GeoDataFrame
gdf_combined_comp = gpd.GeoDataFrame(
    pd.concat(
        [
            gdf_celdas_bol_access_total,
            gdf_celdas_col_access_total,
            gdf_celdas_ecu_access_total,
            gdf_celdas_peru_access_total,
        ],
        ignore_index=True,
    )
)

# Display the combined GeoDataFrame
gdf_combined_comp.head()

In [None]:
# gdf_combined_comp["nivel_educativo"] = gdf_combined_comp.apply(
#     lambda x: (
#         "Primaria"
#         if x["EduNivelPrimaria"] == 1
#         else "Secundaria" if x["EduNivelSecundariaTotal"] == 1 else "Ninguno"
#     ),
#     axis=1,
# )

In [None]:
# gdf_combined_comp["nivel_educativo"].value_counts()

In [None]:
# Save the combined GeoDataFrame to a new file
gdf_combined_comp.to_parquet("outputs/celdas_combined_access_v3.parquet")

In [None]:
gdf_schools