# Overview

Generate a sample of barangays with rooftop and road data to use in testing out the RHR.

In [3]:
import geopandas as gpd
from pathlib import Path
import pandas as pd
from datetime import datetime
import folium
import osmnx as ox
from tqdm import tqdm
tqdm.pandas()
from pin_drop_sampling2.utils import get_s2_cell_id, count_neighbors_in_radius, get_nearest_point_on_road, dist_in_meters

In [4]:
DB_DIR = Path.home() / 'IDinsight Dropbox' / 'Random Walk Testing' 
PSU_FILE = DB_DIR / '01_Raw data'/ '03_Census' / 'Philippines' / 'barangay_w_borders.parquet'
ROOFTOP_DIR = DB_DIR /'01_Raw data'/ '01_Rooftop'/'Philippines'
OUTPUT_DIR = DB_DIR / '03_Output' / '06_RHR Simulations'

timestamp = datetime.now().strftime("%Y%m%d_%H")

# number of barangays to sample
num_bars = 20


# Sample barangays

In [5]:
# load the psu file
psus = gpd.read_parquet(PSU_FILE)

# drop rows where geometry is null
psus = psus.dropna(subset=['geometry'])

# sample num_bars barangays using random state 42
sampled_barangays = psus.sample(num_bars, random_state=42)

# Get rooftop data for sampled barangay

In [6]:
# get the s2 cell id for each barangay
sampled_barangays['s2_cell_id'] = sampled_barangays.apply(lambda x: get_s2_cell_id(x.geometry.centroid, 4), axis=1)

# create empty gdf to store rooftops
all_rooftops = gpd.GeoDataFrame()

# loop over each unique value of s2_cell_id. I loop over s2_cell_id first because loading the rooftop data for each s2 cell 
# takes a lot of time so I want to do it only once for each s2 cell
pd.options.mode.chained_assignment = None  # turn off annoying copy of a df warning
for s2_cell_id in sampled_barangays['s2_cell_id'].unique():
    print(f"\nProcessing s2 cell {s2_cell_id}")
    # get the barangays in this s2 cell
    barangays_in_s2_cell= sampled_barangays[sampled_barangays['s2_cell_id'] == s2_cell_id]
    # load the rooftop data for this s2 cell
    rooftops_gdf = gpd.read_parquet(ROOFTOP_DIR /f'{s2_cell_id}.parquet')
    # replace geometry column with the centroid of the geometry
    rooftops_gdf['geometry'] = rooftops_gdf.geometry.centroid

    for item, row in barangays_in_s2_cell.iterrows():
        # print a single dot without the newline character
        print('.', end='')
        # filter rooftops to only include those within the barangay
        temp_rooftops = rooftops_gdf[rooftops_gdf.geometry.within(row.geometry)]
        # set the psid for the rooftops
        temp_rooftops['PSGC'] = row['PSGC']
        temp_rooftops.to_crs(epsg=4326, inplace=True)
        # try to append temp_rooftops to all_rooftops and catch a value error. if there is an error, print the crs of the two dataframes
        try:
            all_rooftops = gpd.GeoDataFrame(pd.concat([all_rooftops, temp_rooftops], ignore_index=True))
        except ValueError:
            print(f"Error with s2 cell {s2_cell_id}")
            print(f"temp_rooftops crs: {temp_rooftops.crs}")
            print(f"all_rooftops crs: {all_rooftops.crs}")
            
# save all rooftops to a parquet file in case I close this notebook or the kernel gets messed up
all_rooftops.to_parquet(OUTPUT_DIR / f'all_roofs_samp_bars_{timestamp}.parquet')


Processing s2 cell 3724476891835400192



  rooftops_gdf['geometry'] = rooftops_gdf.geometry.centroid


......
Processing s2 cell 3679440895561695232



  rooftops_gdf['geometry'] = rooftops_gdf.geometry.centroid


...
Processing s2 cell 3715469692580659200



  rooftops_gdf['geometry'] = rooftops_gdf.geometry.centroid


......
Processing s2 cell 3625397700033249280



  rooftops_gdf['geometry'] = rooftops_gdf.geometry.centroid


.....

# Get OSM street network for sampled barangays

In [7]:
# create an empty gdf to store the road network
all_roads = gpd.GeoDataFrame()

for item, row in sampled_barangays.iterrows():
    polygon = row['geometry']
    psgc = row['PSGC']

    # Get the road network within the polygon
    try:
        G = ox.graph_from_polygon(polygon, network_type='all')
    except Exception as e:
        print(f"An error occurred: {e} for PSGC {psgc}")
        continue

    # Convert the road network to a GeoDataFrame
    try:
        gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)
    except Exception as e:
        print(f"An error occurred: {e} for PSGC {psgc}")
        continue
    gdf_edges['PSGC'] = psgc
    all_roads = gpd.GeoDataFrame(pd.concat([all_roads, gdf_edges], ignore_index=True))

An error occurred: Graph contains no edges. for PSGC 504105001
An error occurred: Found no graph nodes within the requested polygon. for PSGC 908307002
An error occurred: Found no graph nodes within the requested polygon. for PSGC 1906609017
An error occurred: Found no graph nodes within the requested polygon. for PSGC 803701034


# Randomly sample 10 rooftops per barangay

In [8]:
# Group by 'PSGC' and sample 10 rows from each group
sampled = all_rooftops.groupby('PSGC', group_keys=False).apply(lambda x: x.sample(10, random_state=42))

# Create a new binary column and set the value to 1 for the sampled rows
all_rooftops['Sampled'] = 0

# Set Sampled to 1 if the row is in the sampled DataFrame
all_rooftops.loc[sampled.index, 'Sampled'] = 1

  sampled = all_rooftops.groupby('PSGC', group_keys=False).apply(lambda x: x.sample(10, random_state=42))


# Generate maps showing barangay borders, rooftop centroids, and road

In [9]:
psgcs = all_roads['PSGC'].unique()

for psgc in psgcs:
    temp_rooftops = all_rooftops[all_rooftops['PSGC'] == psgc]
    temp_roads = all_roads[all_roads['PSGC'] == psgc]
    border = sampled_barangays[sampled_barangays['PSGC'] == psgc].geometry.iloc[0]

    # Create a folium map centered at the centroid of the barangay
    m = folium.Map(location=[border.centroid.y, border.centroid.x], zoom_start=15, tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}', attr='Esri')

    # Add the barangay border to the map
    folium.GeoJson(border).add_to(m)

    # Add the rooftops to map. If the row is in the sampled DataFrame, set the color to red
    for item, row in temp_rooftops.iterrows():
        if row['Sampled'] == 1:
            folium.CircleMarker(location=[row.geometry.y, row.geometry.x], color = 'blue', radius = 2).add_to(m)
        else:
            folium.CircleMarker(location=[row.geometry.y, row.geometry.x], color = 'blue', radius = 2).add_to(m)

    # add the roads to the map
    for _, row in temp_roads.iterrows():
        folium.PolyLine(locations=[(point[1], point[0]) for point in row['geometry'].coords],
                        color='black', weight=2.5, opacity=1).add_to(m)
    # save the map
    m.save(OUTPUT_DIR / f'{psgc}_map.html')
        