# **Read and Inspect**

This notebook loads the 5 layers into GeoDataFrames based on the chosen province/region boundary.

----

In [None]:
# Change file directory path for datasets
dataset_file_dir = "F:/Data Visualization/Case Study/DATASETS"

In [2]:
# Imports for Geo data
import geopandas as gpd
import pandas as pd
import numpy as np
import rasterio
import os
import json
import matplotlib.pyplot as plt
from rasterio.mask import mask

In [3]:
# Show the layers
os.listdir(dataset_file_dir)

['Admin', 'Flood', 'Health', 'Roads']

These directories represent the five layers and should be loaded and inspected in this notebook.

### **Loading Layers**

----

In [None]:
layer_names = ["Admin", "Flood", "Health", "Roads"]

def load_layer_from_dir(directory):
  # Find the .shp file
  for file in os.listdir(directory):
    if file.endswith(".shp"):
      shp_path = os.path.join(directory, file)
      return gpd.read_file(shp_path)
  raise FileNotFoundError(f"No .shp file found in {directory}")

# Loading all the layers
layers = {}
for layer_name in layer_names:
  layers[layer_name] = load_layer_from_dir(os.path.join(dataset_file_dir, layer_name))

### **Inspect Layers**

----

In [None]:
# List of all keys
print(layers.keys())

In [None]:
# Function to inspect each layer
def inspect_layer(layer):
  """
  A function that inspects the basic information about the layer

  Args:
    layer: A GeoDataFrame (Admin, Flood, Health, Roads)
  Returns:
    None
  """
  # Length of layer
  print("Length:", len(layer))
  # Corrdinate reference
  print("\nCRS:", layer.crs)
  # Bounding box
  print("\nBounds:", layer.total_bounds)
  # Show first 5 rows
  print("\nFirst 5 rows:")
  print(layer.head())
  # Show the attributes
  print("\nAttributes:", layer.columns)
  # Sample geometrics
  print("\nSample Geometries:")
  print(layer.geometry.sample())

##### **Admin Layer**

----

In [None]:
# Inspect "Admin" layer
admin_layer = layers["Admin"]

print("Admin Layer:\n")
inspect_layer(admin_layer)

##### **Flood Layer**

----

In [None]:
# Inspect "Flood" layer
flood_layer = layers["Flood"]

print("Flood Layer:\n")
inspect_layer(flood_layer)

##### **Health Layer**

----

In [None]:
# Inspect "Health" layer
health_layer = layers["Health"]

print("Health Layer:\n")
inspect_layer(health_layer)

##### **Population Layer**

----

In [None]:
# Load population
pop_raster = rasterio.open(os.path.join(dataset_file_dir, "Population", "phl_ppp_2020.tif"))

print("CRS:", pop_raster.crs)
print("Bounds:", pop_raster.bounds)
print("Height:", pop_raster.height)
print("Width:", pop_raster.width)

##### **Roads Layer**

----

In [None]:
# Inspeact "Road" layer
roads_layer = layers["Roads"]

print("Roads Layer:\n")
inspect_layer(roads_layer)

### **Clip Layers**

----

In [None]:
print(admin_layer.columns)
admin_layer.head()

In [None]:
# Filter admin layer for NCR (all barangays)
ncr_admin = admin_layer[
    admin_layer["ADM1_EN"].str.contains("National Capital Region|NCR", case=False, na=False)
]

print("Number of NCR barangays:", len(ncr_admin))

# Create single boundary polygon for NCR
ncr_boundary_geom = ncr_admin.unary_union
ncr_boundary_gdf = gpd.GeoDataFrame(geometry=[ncr_boundary_geom], crs=admin_layer.crs)

In [None]:
# Reproject other layers to match admin CRS
target_crs = admin_layer.crs

flood_layer = flood_layer.to_crs(target_crs)
roads_layer = roads_layer.to_crs(target_crs)
health_layer = health_layer.to_crs(target_crs)

In [None]:
# List of NCR cities
ncr_cities = [
    "Manila", "Quezon City", "Pasig", "Makati", "Taguig", "Mandaluyong",
    "Pasay", "Marikina", "Las Piñas", "Parañaque", "Muntinlupa", "Pateros",
    "Valenzuela", "Caloocan", "Malabon", "Navotas"
]

# Filter health facilities by city names
health_ncr = health_layer[health_layer['addr_city'].isin(ncr_cities)]
print("Number of health facilities in NCR (by addr_city):", len(health_ncr))


In [None]:
# Dissolve all NCR barangays into a single polygon for clipping
ncr_boundary_geom = ncr_admin.unary_union
ncr_boundary_gdf = gpd.GeoDataFrame(geometry=[ncr_boundary_geom], crs=admin_layer.crs)


In [None]:
# Flood
flood_clipped = gpd.clip(flood_layer, ncr_boundary_gdf)

# Roads
roads_clipped = gpd.clip(roads_layer, ncr_boundary_gdf)

# Health facilities (already filtered by city)
health_clipped = gpd.clip(health_ncr, ncr_boundary_gdf)

In [None]:
from rasterio.mask import mask

# Convert NCR boundary to GeoJSON-like dict
ncr_geom_for_raster = [ncr_boundary_geom.__geo_interface__]

# Clip population raster
pop_clipped, pop_transform = mask(pop_raster, ncr_geom_for_raster, crop=True)

print("Clipped population raster shape:", pop_clipped.shape)


In [None]:
fig, ax = plt.subplots(figsize=(12,12))  # larger figure

# NCR boundary (thick black outline)
ncr_boundary_gdf.plot(ax=ax, facecolor='none', edgecolor='black', linewidth=2, label='NCR Boundary')

# NCR barangay boundaries (thin grey lines)
ncr_admin.plot(ax=ax, facecolor='none', edgecolor='blue', linewidth=0.5, alpha=0.5, label='Barangays')

# Health facilities
health_ncr_valid = health_ncr[health_ncr.geometry.notnull()]
ax.scatter(
    x=health_ncr_valid.geometry.x,
    y=health_ncr_valid.geometry.y,
    color='red',
    s=20,
    label='Health Facilities'
)

# Set aspect ratio
ax.set_aspect('equal')

# Zoom out 3x around NCR
xmin, ymin, xmax, ymax = ncr_boundary_gdf.total_bounds
x_center = (xmin + xmax) / 2
y_center = (ymin + ymax) / 2
x_range = (xmax - xmin) * 1  # half of 3x expansion each side
y_range = (ymax - ymin) * 1
ax.set_xlim(x_center - x_range, x_center + x_range)
ax.set_ylim(y_center - y_range, y_center + y_range)

# Title & legend
ax.set_title("NCR: Barangays and Health Facilities (Zoomed Out x3)", fontsize=18)
ax.legend(fontsize=12)

plt.show()

# Save plot
fig.savefig("ncr_barangays_health_zoomedout.png", dpi=300)

