In [1]:
import geopandas as gpd
import os
from tqdm import tqdm

## What this notebook is about

This notebook consolidates the baranngay and municipality json files into one file each. Files are taken from [this repository](https://github.com/faeldon/philippines-json-maps)

# Importing Brgy Data

The barangay files were obtained from [this source](https://github.com/faeldon/philippines-json-maps/tree/master/topojson/barangays/hires)

In [2]:
brgys_folder = 'philippines-json-maps-master/topojson/barangays/hires'
brgys_json = os.listdir(brgys_folder)
brgys_json = [f"{brgys_folder}/{fs}" for fs in brgys_json]
len(brgys_json)

1647

In [3]:
brgy_gdf = gpd.GeoDataFrame({})
brgy_gdf.head()

In [4]:
# create a geodataframe from that contains all barangays
for brgy_json in tqdm(brgys_json):
    new_gdf = gpd.GeoDataFrame.from_file(brgy_json)
    brgy_gdf = brgy_gdf.append(new_gdf)
brgy_gdf.head()

100%|██████████| 1647/1647 [00:42<00:00, 38.39it/s]


Unnamed: 0,id,ADM1_PCODE,ADM1_EN,ADM2_PCODE,ADM2_EN,ADM3_PCODE,ADM3_EN,ADM4_PCODE,ADM4_EN,geometry
0,,PH040000000,REGION IV-A (CALABARZON),PH045600000,QUEZON,PH045602000,ALABAT,PH045602001,Angeles,"POLYGON ((121.99440 14.11326, 121.99405 14.112..."
1,,PH040000000,REGION IV-A (CALABARZON),PH045600000,QUEZON,PH045602000,ALABAT,PH045602002,Bacong,"POLYGON ((122.03987 14.14286, 122.03882 14.142..."
2,,PH040000000,REGION IV-A (CALABARZON),PH045600000,QUEZON,PH045602000,ALABAT,PH045602003,Balungay,"POLYGON ((122.05585 14.08601, 122.05575 14.085..."
3,,PH040000000,REGION IV-A (CALABARZON),PH045600000,QUEZON,PH045602000,ALABAT,PH045602004,Buenavista,"POLYGON ((122.07112 14.05975, 122.07174 14.059..."
4,,PH040000000,REGION IV-A (CALABARZON),PH045600000,QUEZON,PH045602000,ALABAT,PH045602005,Caglate,"POLYGON ((122.02153 14.15299, 122.02175 14.149..."


In [5]:
# drop null columns
if brgy_gdf["id"].isnull().all():
    brgy_gdf = brgy_gdf.drop(columns = "id")

In [6]:
# these columns will serve as the primary key
brgy_cols = [col for col in brgy_gdf.columns if col != "geometry"]
brgy_cols

['ADM1_PCODE',
 'ADM1_EN',
 'ADM2_PCODE',
 'ADM2_EN',
 'ADM3_PCODE',
 'ADM3_EN',
 'ADM4_PCODE',
 'ADM4_EN']

In [7]:
# check for duplicates
brgy_gdf.loc[brgy_gdf.loc[:,brgy_cols].duplicated(keep = False),:]

Unnamed: 0,ADM1_PCODE,ADM1_EN,ADM2_PCODE,ADM2_EN,ADM3_PCODE,ADM3_EN,ADM4_PCODE,ADM4_EN,geometry
97,PH090000000,REGION IX (ZAMBOANGA PENINSULA),PH097300000,ZAMBOANGA DEL SUR,PH097332000,ZAMBOANGA CITY,,Unclaimed Area under Jurisdiction of Zamboanga...,"POLYGON ((122.09483 7.16454, 122.09496 7.18130..."
98,PH090000000,REGION IX (ZAMBOANGA PENINSULA),PH097300000,ZAMBOANGA DEL SUR,PH097332000,ZAMBOANGA CITY,,Unclaimed Area under Jurisdiction of Zamboanga...,"POLYGON ((122.19224 7.26845, 122.19085 7.26891..."


In [8]:
# removing duplicates
brgy_gdf = brgy_gdf.loc[~brgy_gdf.loc[:,brgy_cols].duplicated(),:]
brgy_gdf.loc[brgy_gdf.loc[:,brgy_cols].duplicated(keep = False),:]

Unnamed: 0,ADM1_PCODE,ADM1_EN,ADM2_PCODE,ADM2_EN,ADM3_PCODE,ADM3_EN,ADM4_PCODE,ADM4_EN,geometry


In [9]:
# set coordinate system to be epsg:4326, the default latitude longitude
brgy_gdf = brgy_gdf.set_crs(crs = "epsg:4326")
brgy_gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [10]:
# write brgy to geojson
brgy_gdf.to_file('output/ph_brgys.geojson', driver='GeoJSON')

# Importing City/Municipality Data

The barangay files were obtained from [this source](https://github.com/faeldon/philippines-json-maps/tree/master/topojson/municities/hires)

In [11]:
munis_folder = 'philippines-json-maps-master/topojson/municities/hires'
munis_json = os.listdir(munis_folder)
munis_json = [f"{munis_folder}/{fs}" for fs in munis_json]
len(munis_json)

87

In [12]:
muni_gdf = gpd.GeoDataFrame({})
muni_gdf.head()

In [13]:
# create a geodataframe from that contains all barangays
for muni_json in tqdm(munis_json):
    new_gdf = gpd.GeoDataFrame.from_file(muni_json)
    muni_gdf = muni_gdf.append(new_gdf)
muni_gdf.head()

100%|██████████| 87/87 [00:05<00:00, 15.06it/s]


Unnamed: 0,id,Shape_Leng,Shape_Area,ADM3_EN,ADM3_PCODE,ADM3_REF,ADM3ALT1EN,ADM3ALT2EN,ADM2_EN,ADM2_PCODE,ADM1_EN,ADM1_PCODE,ADM0_EN,ADM0_PCODE,date,validOn,geometry
0,,0.471963,0.005857,Alcantara,PH175901000,,,,Romblon,PH175900000,Region IV-B,PH170000000,Philippines (the),PH,2016-06-15,2018-01-30,"POLYGON ((122.09617 12.35427, 122.09609 12.354..."
1,,0.288515,0.002384,Banton,PH175902000,,,,Romblon,PH175900000,Region IV-B,PH170000000,Philippines (the),PH,2016-06-15,2018-01-30,"POLYGON ((122.06152 12.96302, 122.06178 12.962..."
2,,0.536131,0.010599,Cajidiocan,PH175903000,,,,Romblon,PH175900000,Region IV-B,PH170000000,Philippines (the),PH,2016-06-15,2018-01-30,"POLYGON ((122.67014 12.31087, 122.66968 12.311..."
3,,0.453908,0.003164,Calatrava,PH175904000,,,,Romblon,PH175900000,Region IV-B,PH170000000,Philippines (the),PH,2016-06-15,2018-01-30,"POLYGON ((122.12886 12.66333, 122.12288 12.654..."
4,,0.32112,0.001699,Concepcion,PH175905000,,,,Romblon,PH175900000,Region IV-B,PH170000000,Philippines (the),PH,2016-06-15,2018-01-30,"POLYGON ((121.70535 12.94911, 121.70551 12.948..."


In [14]:
# drop null columns
if muni_gdf["id"].isnull().all():
    muni_gdf = muni_gdf.drop(columns = "id")

In [15]:
# these columns will serve as the primary key
muni_cols = [col for col in muni_gdf.columns if col != "geometry"]
muni_cols

['Shape_Leng',
 'Shape_Area',
 'ADM3_EN',
 'ADM3_PCODE',
 'ADM3_REF',
 'ADM3ALT1EN',
 'ADM3ALT2EN',
 'ADM2_EN',
 'ADM2_PCODE',
 'ADM1_EN',
 'ADM1_PCODE',
 'ADM0_EN',
 'ADM0_PCODE',
 'date',
 'validOn']

In [16]:
# check for duplicates
muni_gdf.loc[muni_gdf.loc[:,muni_cols].duplicated(keep = False),:]

Unnamed: 0,Shape_Leng,Shape_Area,ADM3_EN,ADM3_PCODE,ADM3_REF,ADM3ALT1EN,ADM3ALT2EN,ADM2_EN,ADM2_PCODE,ADM1_EN,ADM1_PCODE,ADM0_EN,ADM0_PCODE,date,validOn,geometry


In [17]:
# removing duplicates
muni_gdf = muni_gdf.loc[~muni_gdf.loc[:,muni_cols].duplicated(),:]
muni_gdf.loc[muni_gdf.loc[:,muni_cols].duplicated(keep = False),:]

Unnamed: 0,Shape_Leng,Shape_Area,ADM3_EN,ADM3_PCODE,ADM3_REF,ADM3ALT1EN,ADM3ALT2EN,ADM2_EN,ADM2_PCODE,ADM1_EN,ADM1_PCODE,ADM0_EN,ADM0_PCODE,date,validOn,geometry


In [18]:
# set coordinate system to be epsg:4326, the default latitude longitude
muni_gdf = muni_gdf.set_crs(crs = "epsg:4326")
muni_gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [19]:
# write muni to geojson
muni_gdf.to_file('output/ph_municities.geojson', driver='GeoJSON')