# **Import**

In [1]:
import pandas as pd
import numpy as np

# **Reading datasets**

In [2]:
# Reading population per barangay dataset
ppb_df = pd.read_csv('Datasets/population_per_barangay.csv')
ppb_df.head()

Unnamed: 0,New 10 digit PSGC,New 10 PCODE,Region,Reg_Code,Province,Prov_Code,Mun_City,Mun_Code,Barangay,Bgy_Code,Bgy_Old names,Urban / Rural\n(based on 2020 CPH),2020 Census Popn
0,102801001,PH0102801001,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Adams,PH012801000,Adams,PH012801001,,R,2189.0
1,102802001,PH0102802001,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Bacarra,PH012802000,Bani,PH012802001,,R,1079.0
2,102802002,PH0102802002,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Bacarra,PH012802000,Buyon,PH012802002,,R,1669.0
3,102802003,PH0102802003,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Bacarra,PH012802000,Cabaruan,PH012802003,,R,1418.0
4,102802004,PH0102802004,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Bacarra,PH012802000,Cabulalaan,PH012802004,,R,733.0


In [3]:
hf_df = pd.read_csv('Datasets/health_facilities.csv')
hf_df.sort_values(by=['region'], inplace=True)
hf_df.head()

Unnamed: 0,id,facilityco,healthfaci,typeofheal,barangay,municipali,province,region,status,address,style
1398,1452.0,DOH000000000031732,Bangco Barangay Health Station,Barangay Health Station,Bangco,Marawi City (capital),Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1515,1569.0,DOH000000000014693,Katutungan Barangay Health Station,Barangay Health Station,Katutungan (Pob.),Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1514,1568.0,DOH000000000014687,Pagalungan Barangay Health Station,Barangay Health Station,Pagalongan,Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1513,1567.0,DOH000000000014690,Milaya Barangay Health Station,Barangay Health Station,Milaya,Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1512,1566.0,DOH000000000014684,Kabatangan Barangay Health Station,Barangay Health Station,Kabatangan,Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station


# **Cleaning/Preprocessing**

**Rename provinces**
> Maguindanao del Norte -> ***Maguindanao***

> Maguindanao del Sur -> ***Maguindanao***

> City of Isabela (Not a Province) -> ***City of Isabela***

> First District (Not a Province) -> ***First District***

> Second District (Not a Province) -> ***Second District***

> Third District (Not a Province) -> ***Third District***

> Fourth District (Not a Province) -> ***Fourth District***



In [4]:
# Dictionary for region name correction (have it inline with the rest of the datasets)
PPN_correction = {
    "Maguindanao del Norte": "Maguindanao",
    "Maguindanao del Sur": "Maguindanao",
    "City of Isabela (Not a Province)": "City of Isabela",
    "First District (Not a Province)": "First District",
    "Second District (Not a Province)": "Second District",
    "Third District (Not a Province)": "Third District",
    "Fourth District (Not a Province)": "Fourth District",
}


for old_province, new_province in PPN_correction.items():
    ppb_df.loc[ppb_df['Province'] == old_province, 'Province'] = new_province

> ## **Population per barangay**







---
Making the <i>population_per_municipality.csv</i> dataset from the <i>population_per_barangay.csv</i> dataset

In [5]:
# Array of unique municipality names
muni_names = np.array(ppb_df['Mun_City'].unique())

# Array of unique region names
region_names = np.array(ppb_df['Region'].unique())

print("Number of municipalities found: " + str(ppb_df['Mun_City'].nunique()))

Number of municipalities found: 1422


In [6]:
# Aggregating by municipality and adding each barangay's population
ppm_df = ppb_df.groupby(['Region', 'Reg_Code', 'Province', 'Prov_Code', 'Mun_City', 'Mun_Code'], as_index=False).sum()

print("Number of municipalities after groupby: " + str(ppm_df['Mun_City'].nunique()))

ppm_df.sort_values(by=['Mun_Code'], inplace=True)
ppm_df.drop(columns=['New 10 digit PSGC'], inplace=True)
ppm_df.head()

Number of municipalities after groupby: 1422


  ppm_df = ppb_df.groupby(['Region', 'Reg_Code', 'Province', 'Prov_Code', 'Mun_City', 'Mun_Code'], as_index=False).sum()


Unnamed: 0,Region,Reg_Code,Province,Prov_Code,Mun_City,Mun_Code,2020 Census Popn
286,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Adams,PH012801000,2189.0
287,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Bacarra,PH012802000,33496.0
288,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Badoc,PH012803000,32530.0
293,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,City of Batac,PH012804000,55484.0
289,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Bangui,PH012804000,15019.0


In [7]:
# Export to .csv
ppm_df.to_csv('Datasets/population_per_municipality.csv')

In [8]:
# If true, then the number of municipalities are consistent between the two Dataframes
ppb_df['Mun_City'].nunique() == ppm_df['Mun_City'].nunique()

True

> ## **Population per province**

---
Making <i>population_per_province.csv</i>

In [9]:
ppp_df = ppm_df.groupby(['Region', 'Reg_Code', 'Province', 'Prov_Code'], as_index=False).sum()

# Create a new row to add to the DataFrame
new_row = {'Region': "Region XII (SOCCSKSARGEN)", 'Reg_Code': "PH120000000", 'Province': "Cotabato City", 'Prov_Code': "PH129800000", '2020 Census Popn': 325079.0}

# Append the new row to the DataFrame
ppp_df = ppp_df.append(new_row, ignore_index=True)

ppp_df.sort_values(by=['Province'], inplace=True)
ppp_df.head(30)

  ppp_df = ppm_df.groupby(['Region', 'Reg_Code', 'Province', 'Prov_Code'], as_index=False).sum()
  ppp_df = ppp_df.append(new_row, ignore_index=True)


Unnamed: 0,Region,Reg_Code,Province,Prov_Code,2020 Census Popn
5,Cordillera Administrative Region (CAR),PH140000000,Abra,PH140100000,250985.0
81,Region XIII (Caraga),PH160000000,Agusan del Norte,PH160200000,760413.0
82,Region XIII (Caraga),PH160000000,Agusan del Sur,PH160300000,739367.0
51,Region VI (Western Visayas),PH060000000,Aklan,PH060400000,615475.0
45,Region V (Bicol Region),PH050000000,Albay,PH050500000,1374768.0
52,Region VI (Western Visayas),PH060000000,Antique,PH060600000,612974.0
6,Cordillera Administrative Region (CAR),PH140000000,Apayao,PH148100000,124366.0
29,Region III (Central Luzon),PH030000000,Aurora,PH037700000,235750.0
0,Bangsamoro Autonomous Region In Muslim Mindana...,PH150000000,Basilan,PH150700000,426207.0
30,Region III (Central Luzon),PH030000000,Bataan,PH030800000,853373.0


In [10]:
specific_row = ppp_df[ppp_df['Province'] == "Cotabato City"]

specific_row

Unnamed: 0,Region,Reg_Code,Province,Prov_Code,2020 Census Popn
86,Region XII (SOCCSKSARGEN),PH120000000,Cotabato City,PH129800000,325079.0


In [11]:
# Export to .csv
ppp_df.to_csv('Datasets/population_per_province.csv')

---
Finding unique province names

In [12]:
provinces = ppp_df['Province'].unique()
provinces.sort()
for province in provinces:
    print(province)
print('---')
print('Number of provinces: ' + str(ppp_df['Province'].nunique()))

Abra
Agusan del Norte
Agusan del Sur
Aklan
Albay
Antique
Apayao
Aurora
Basilan
Bataan
Batanes
Batangas
Benguet
Biliran
Bohol
Bukidnon
Bulacan
Cagayan
Camarines Norte
Camarines Sur
Camiguin
Capiz
Catanduanes
Cavite
Cebu
City of Isabela
Cotabato
Cotabato City
Davao Occidental
Davao Oriental
Davao de Oro
Davao del Norte
Davao del Sur
Dinagat Islands
Eastern Samar
First District
Fourth District
Guimaras
Ifugao
Ilocos Norte
Ilocos Sur
Iloilo
Isabela
Kalinga
La Union
Laguna
Lanao del Norte
Lanao del Sur
Leyte
Maguindanao
Marinduque
Masbate
Misamis Occidental
Misamis Oriental
Mountain Province
Negros Occidental
Negros Oriental
Northern Samar
Nueva Ecija
Nueva Vizcaya
Occidental Mindoro
Oriental Mindoro
Palawan
Pampanga
Pangasinan
Quezon
Quirino
Rizal
Romblon
Samar
Sarangani
Second District
Siquijor
Sorsogon
South Cotabato
Southern Leyte
Sultan Kudarat
Sulu
Surigao del Norte
Surigao del Sur
Tarlac
Tawi-Tawi
Third District
Zambales
Zamboanga Sibugay
Zamboanga del Norte
Zamboanga del Sur
---
Num

In [13]:
ppp_df.loc[ppp_df['Province'] == 'Maguindanao del Norte']

Unnamed: 0,Region,Reg_Code,Province,Prov_Code,2020 Census Popn


In [14]:
ppp_df.loc[ppp_df['Province'] == 'Maguindanao del Sur']

Unnamed: 0,Region,Reg_Code,Province,Prov_Code,2020 Census Popn


> ## **Population per region**

---
Making <i>population_per_region.csv</i>

In [15]:
ppr_df = ppp_df.groupby(['Region', 'Reg_Code'], as_index=False).sum()
ppr_df.sort_values(by=['Reg_Code'], inplace=True)
ppr_df.head(20)

  ppr_df = ppp_df.groupby(['Region', 'Reg_Code'], as_index=False).sum()


Unnamed: 0,Region,Reg_Code,2020 Census Popn
4,Region I (Ilocos Region),PH010000000,5301139.0
5,Region II (Cagayan Valley),PH020000000,3685744.0
6,Region III (Central Luzon),PH030000000,12422172.0
7,Region IV-A (CALABARZON),PH040000000,16195042.0
9,Region V (Bicol Region),PH050000000,6082165.0
10,Region VI (Western Visayas),PH060000000,7954723.0
11,Region VII (Central Visayas),PH070000000,8081988.0
12,Region VIII (Eastern Visayas),PH080000000,4547150.0
8,Region IX (Zamboanga Peninsula),PH090000000,3875576.0
13,Region X (Northern Mindanao),PH100000000,5022768.0


In [16]:
# Export to .csv
ppr_df.to_csv('Datasets/population_per_region.csv')

---
Finding unique region names

In [17]:
regions = ppr_df['Region'].unique()
regions.sort()
for region in regions:
    print(region)
print('---')
print('Number of regions: ' + str(ppr_df['Region'].nunique()))

Bangsamoro Autonomous Region In Muslim Mindanao (BARMM)
Cordillera Administrative Region (CAR)
MIMAROPA Region
National Capital Region (NCR)
Region I (Ilocos Region)
Region II (Cagayan Valley)
Region III (Central Luzon)
Region IV-A (CALABARZON)
Region IX (Zamboanga Peninsula)
Region V (Bicol Region)
Region VI (Western Visayas)
Region VII (Central Visayas)
Region VIII (Eastern Visayas)
Region X (Northern Mindanao)
Region XI (Davao Region)
Region XII (SOCCSKSARGEN)
Region XIII (Caraga)
---
Number of regions: 17


> ## **Health Facilities**

---
Exploring the <i>region</i> column of the <i>health_facilities</i> dataset

In [18]:
hf_df.head()

Unnamed: 0,id,facilityco,healthfaci,typeofheal,barangay,municipali,province,region,status,address,style
1398,1452.0,DOH000000000031732,Bangco Barangay Health Station,Barangay Health Station,Bangco,Marawi City (capital),Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1515,1569.0,DOH000000000014693,Katutungan Barangay Health Station,Barangay Health Station,Katutungan (Pob.),Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1514,1568.0,DOH000000000014687,Pagalungan Barangay Health Station,Barangay Health Station,Pagalongan,Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1513,1567.0,DOH000000000014690,Milaya Barangay Health Station,Barangay Health Station,Milaya,Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1512,1566.0,DOH000000000014684,Kabatangan Barangay Health Station,Barangay Health Station,Kabatangan,Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station


Rename column 'region' and 'province'

In [19]:
hf_df.rename(columns={'region': 'Region'}, inplace=True)
hf_df.rename(columns={'province': 'Province'}, inplace=True)\

hf_df.head()

Unnamed: 0,id,facilityco,healthfaci,typeofheal,barangay,municipali,Province,Region,status,address,style
1398,1452.0,DOH000000000031732,Bangco Barangay Health Station,Barangay Health Station,Bangco,Marawi City (capital),Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1515,1569.0,DOH000000000014693,Katutungan Barangay Health Station,Barangay Health Station,Katutungan (Pob.),Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1514,1568.0,DOH000000000014687,Pagalungan Barangay Health Station,Barangay Health Station,Pagalongan,Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1513,1567.0,DOH000000000014690,Milaya Barangay Health Station,Barangay Health Station,Milaya,Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station
1512,1566.0,DOH000000000014684,Kabatangan Barangay Health Station,Barangay Health Station,Kabatangan,Wao,Lanao Del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station


Filtering 'style' to

* Barangay Health Stations
* Hospital
* Medical Clinic
* Rural Health Unit




In [20]:
hf_df = pd.DataFrame(hf_df)

# Specify the categories you want to filter
chosen_type = ['Barangay Health Station', 'Hospital', 'Medical Clinic', 'Rural Health Unit']

# Filter rows based on the specified categories
hf_df = hf_df[hf_df['style'].isin(chosen_type)]

Unique ***regions***

In [21]:
hf_df_RNs_original = hf_df['Region'].unique()
hf_df_RNs_original.sort()
for name in hf_df_RNs_original:
    print(name)

Bangsamoro Autonomous Region In Muslim Mindanao (BARMM)
CAR (CORDILLERA ADMINISTRATIVE REGION
NCR (NATIONAL CAPITAL REGION)
REGION I (ILOCOS REGION)
REGION II (CAGAYAN VALLEY)
REGION III (CENTRAL LUZON)
REGION IV-A (CALABARZON)
REGION IV-B (MIMAROPA)
REGION IX (ZAMBOANGA PENINSULA)
REGION V (BICOL REGION)
REGION VI (WESTERN VISAYAS)
REGION VII (CENTRAL VISAYAS)
REGION VIII (EASTERN VISAYAS)
REGION XI (DAVAO REGION)
REGION XII (SOCCSKSARGEN)
REGION XIII (CARAGA)
Region X (northern Mindanao)


Unique ***provinces***

In [22]:
hf_df_RNs_original = hf_df['Province'].unique()
hf_df_RNs_original.sort()
for name in hf_df_RNs_original:
    print(name)

ABRA
AGUSAN DEL NORTE
AGUSAN DEL SUR
AKLAN
ALBAY
ANTIQUE
APAYAO
AURORA
BATAAN
BATANES
BATANGAS
BENGUET
BILIRAN
BOHOL
BULACAN
Basilan
Bukidnon
CAGAYAN
CAMARINES NORTE
CAMARINES SUR
CAPIZ
CATANDUANES
CAVITE
CEBU
COMPOSTELA VALLEY
Camiguin
City Of Isabela (not A Province)
Cotabato (North Cotabato)
Cotabato City (Not a Province)
DAVAO DEL NORTE
DAVAO DEL SUR
DAVAO OCCIDENTAL
DAVAO ORIENTAL
DINAGAT ISLANDS
EASTERN SAMAR
GUIMARAS
IFUGAO
ILOCOS NORTE
ILOCOS SUR
ILOILO
ISABELA
KALINGA
LA UNION
LAGUNA
LEYTE
Lanao Del Norte
Lanao Del Sur
MARINDUQUE
MASBATE
MOUNTAIN PROVINCE
Maguindanao
Misamis Occidental
Misamis Oriental
NCR
NCR, CITY OF MANILA, FIRST DISTRICT
NCR, FOURTH DISTRICT
NCR, SECOND DISTRICT
NCR, THIRD DISTRICT
NEGROS OCCIDENTAL
NEGROS ORIENTAL
NORTHERN SAMAR
NUEVA ECIJA
NUEVA VIZCAYA
OCCIDENTAL MINDORO
ORIENTAL MINDORO
PALAWAN
PAMPANGA
PANGASINAN
QUEZON
QUIRINO
RIZAL
ROMBLON
SAMAR (WESTERN SAMAR)
SIQUIJOR
SORSOGON
SOUTHERN LEYTE
SURIGAO DEL NORTE
SURIGAO DEL SUR
Sarangani
South Cotaba

> #### **Rename region and province names**

**Region**

In [23]:
# Dictionary for region name correction (have it inline with the rest of the datasets)
RN_correction = {
    "Bangsamoro Autonomous Region In Muslim Mindanao (BARMM)": "Bangsamoro Autonomous Region In Muslim Mindanao (BARMM)",
    "CAR (CORDILLERA ADMINISTRATIVE REGION": "Cordillera Administrative Region (CAR)",
    "NCR (NATIONAL CAPITAL REGION)": "National Capital Region (NCR)",
    "REGION I (ILOCOS REGION)": "Region I (Ilocos Region)",
    "REGION II (CAGAYAN VALLEY)": "Region II (Cagayan Valley)",
    "REGION III (CENTRAL LUZON)": "Region III (Central Luzon)",
    "REGION IV-A (CALABARZON)": "Region IV-A (CALABARZON)",
    "REGION IV-B (MIMAROPA)": "MIMAROPA Region",
    "REGION IX (ZAMBOANGA PENINSULA)": "Region IX (Zamboanga Peninsula)",
    "REGION V (BICOL REGION)": "Region V (Bicol Region)",
    "REGION VI (WESTERN VISAYAS)": "Region VI (Western Visayas)",
    "REGION VII (CENTRAL VISAYAS)": "Region VII (Central Visayas)",
    "REGION VIII (EASTERN VISAYAS)": "Region VIII (Eastern Visayas)",
    "REGION XI (DAVAO REGION)": "Region XI (Davao Region)",
    "REGION XII (SOCCSKSARGEN)": "Region XII (SOCCSKSARGEN)",
    "REGION XIII (CARAGA)": "Region XIII (Caraga)",
    "Region X (northern Mindanao)": "Region X (Northern Mindanao)"
}


for old_region, new_region in RN_correction.items():
    hf_df.loc[hf_df['Region'] == old_region, 'Region'] = new_region

**Province**

In [24]:
PN_correction = {
    "ABRA": "Abra",
    "AGUSAN DEL NORTE": "Agusan del Norte",
    "AGUSAN DEL SUR": "Agusan del Sur",
    "AKLAN": "Aklan",
    "ALBAY": "Albay",
    "ANTIQUE": "Antique",
    "APAYAO": "Apayao",
    "AURORA": "Aurora",
    "BATAAN": "Bataan",
    "BATANES": "Batanes",
    "BATANGAS": "Batangas",
    "BENGUET": "Benguet",
    "BILIRAN": "Biliran",
    "BOHOL": "Bohol",
    "BULACAN": "Bulacan",
    "Basilan": "Basilan",
    "Bukidnon": "Bukidnon",
    "CAGAYAN": "Cagayan",
    "CAMARINES NORTE": "Camarines Norte",
    "CAMARINES SUR": "Camarines Sur",
    "Camiguin": "Camiguin",
    "CAPIZ": "Capiz",
    "CATANDUANES": "Catanduanes",
    "CAVITE": "Cavite",
    "CEBU": "Cebu",
    "City Of Isabela (not A Province)": "City of Isabela",
    "COMPOSTELA VALLEY": "Davao de Oro",
    "Cotabato (North Cotabato)": "Cotabato",
    "Cotabato City (Not a Province)": "Cotabato City",
    "DAVAO DEL NORTE": "Davao del Norte",
    "DAVAO DEL SUR": "Davao del Sur",
    "DAVAO OCCIDENTAL": "Davao Occidental",
    "DAVAO ORIENTAL": "Davao Oriental",
    "DINAGAT ISLANDS": "Dinagat Islands",
    "EASTERN SAMAR": "Eastern Samar",
    "GUIMARAS": "Guimaras",
    "IFUGAO": "Ifugao",
    "ILOCOS NORTE": "Ilocos Norte",
    "ILOCOS SUR": "Ilocos Sur",
    "ILOILO": "Iloilo",
    "ISABELA": "Isabela",
    "KALINGA": "Kalinga",
    "LA UNION": "La Union",
    "LAGUNA": "Laguna",
    "LEYTE": "Leyte",
    "Lanao Del Norte": "Lanao del Norte",
    "Lanao Del Sur": "Lanao del Sur",
    "MARINDUQUE": "Marinduque",
    "MASBATE": "Masbate",
    "MOUNTAIN PROVINCE": "Mountain Province",
    "Maguindanao": "Maguindanao",
    "Misamis Occidental": "Misamis Occidental",
    "Misamis Oriental": "Misamis Oriental",
    "NCR": "Second District",
    "NCR, CITY OF MANILA, FIRST DISTRICT": "First District",
    "NCR, FOURTH DISTRICT": "Fourth District",
    "NCR, SECOND DISTRICT": "Second District",
    "NCR, THIRD DISTRICT": "Third District",
    "NEGROS OCCIDENTAL": "Negros Occidental",
    "NEGROS ORIENTAL": "Negros Oriental",
    "NORTHERN SAMAR": "Northern Samar",
    "NUEVA ECIJA": "Nueva Ecija",
    "NUEVA VIZCAYA": "Nueva Vizcaya",
    "OCCIDENTAL MINDORO": "Occidental Mindoro",
    "ORIENTAL MINDORO": "Oriental Mindoro",
    "PALAWAN": "Palawan",
    "PAMPANGA": "Pampanga",
    "PANGASINAN": "Pangasinan",
    "QUEZON": "Quezon",
    "QUIRINO": "Quirino",
    "RIZAL": "Rizal",
    "ROMBLON": "Romblon",
    "SAMAR (WESTERN SAMAR)": "Samar",
    "SIQUIJOR": "Siquijor",
    "SORSOGON": "Sorsogon",
    "SOUTHERN LEYTE": "Southern Leyte",
    "SURIGAO DEL NORTE": "Surigao del Norte",
    "SURIGAO DEL SUR": "Surigao del Sur",
    "Sarangani": "Sarangani",
    "South Cotabato": "South Cotabato",
    "Sultan Kudarat": "Sultan Kudarat",
    "Sulu": "Sulu",
    "TARLAC": "Tarlac",
    "Tawi -tawi": "Tawi-Tawi",
    "ZAMBALES": "Zambales",
    "Zamboanga Del Norte": "Zamboanga del Norte",
    "Zamboanga Del Sur": "Zamboanga del Sur",
    "Zamboanga Sibugay": "Zamboanga Sibugay",
}

for old_province, new_province in PN_correction.items():
    hf_df.loc[hf_df['Province'] == old_province, 'Province'] = new_province

Check if renaming is successful

In [25]:
# region

hf_df_RNs_original = hf_df['Region'].unique()
hf_df_RNs_original.sort()
for name in hf_df_RNs_original:
    print(name)

Bangsamoro Autonomous Region In Muslim Mindanao (BARMM)
Cordillera Administrative Region (CAR)
MIMAROPA Region
National Capital Region (NCR)
Region I (Ilocos Region)
Region II (Cagayan Valley)
Region III (Central Luzon)
Region IV-A (CALABARZON)
Region IX (Zamboanga Peninsula)
Region V (Bicol Region)
Region VI (Western Visayas)
Region VII (Central Visayas)
Region VIII (Eastern Visayas)
Region X (Northern Mindanao)
Region XI (Davao Region)
Region XII (SOCCSKSARGEN)
Region XIII (Caraga)


In [26]:
# province

hf_df_PNs_original = hf_df['Province'].unique()
hf_df_PNs_original.sort()
for name in hf_df_PNs_original:
    print(name)

Abra
Agusan del Norte
Agusan del Sur
Aklan
Albay
Antique
Apayao
Aurora
Basilan
Bataan
Batanes
Batangas
Benguet
Biliran
Bohol
Bukidnon
Bulacan
Cagayan
Camarines Norte
Camarines Sur
Camiguin
Capiz
Catanduanes
Cavite
Cebu
City of Isabela
Cotabato
Cotabato City
Davao Occidental
Davao Oriental
Davao de Oro
Davao del Norte
Davao del Sur
Dinagat Islands
Eastern Samar
First District
Fourth District
Guimaras
Ifugao
Ilocos Norte
Ilocos Sur
Iloilo
Isabela
Kalinga
La Union
Laguna
Lanao del Norte
Lanao del Sur
Leyte
Maguindanao
Marinduque
Masbate
Misamis Occidental
Misamis Oriental
Mountain Province
Negros Occidental
Negros Oriental
Northern Samar
Nueva Ecija
Nueva Vizcaya
Occidental Mindoro
Oriental Mindoro
Palawan
Pampanga
Pangasinan
Quezon
Quirino
Rizal
Romblon
Samar
Sarangani
Second District
Siquijor
Sorsogon
South Cotabato
Southern Leyte
Sultan Kudarat
Sulu
Surigao del Norte
Surigao del Sur
Tarlac
Tawi-Tawi
Third District
Zambales
Zamboanga Sibugay
Zamboanga del Norte
Zamboanga del Sur


> ### **Per province**

Get style count per province

In [27]:
hf_df_p = hf_df[['Province', 'style']]
hf_df_p['style_count_per_province'] = hf_df_p.groupby(['Province', 'style'])['style'].transform('count')
hf_df_p = pd.DataFrame(hf_df_p)

hf_df_p.head(20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hf_df_p['style_count_per_province'] = hf_df_p.groupby(['Province', 'style'])['style'].transform('count')


Unnamed: 0,Province,style,style_count_per_province
1398,Lanao del Sur,Barangay Health Station,112
1515,Lanao del Sur,Barangay Health Station,112
1514,Lanao del Sur,Barangay Health Station,112
1513,Lanao del Sur,Barangay Health Station,112
1512,Lanao del Sur,Barangay Health Station,112
1510,Lanao del Sur,Barangay Health Station,112
1509,Lanao del Sur,Barangay Health Station,112
1508,Lanao del Sur,Hospital,16
1516,Lanao del Sur,Barangay Health Station,112
1502,Lanao del Sur,Rural Health Unit,43


In [28]:
hf_df_p = hf_df_p.groupby(['Province', 'style', 'style_count_per_province'], as_index=False).sum()
hf_df_p.sort_values(by=['Province'], inplace=True)
hf_df_p.head(20)

Unnamed: 0,Province,style,style_count_per_province
0,Abra,Barangay Health Station,110
1,Abra,Hospital,6
2,Abra,Medical Clinic,1
3,Abra,Rural Health Unit,23
4,Agusan del Norte,Barangay Health Station,213
5,Agusan del Norte,Hospital,12
6,Agusan del Norte,Rural Health Unit,16
7,Agusan del Sur,Barangay Health Station,134
8,Agusan del Sur,Hospital,15
9,Agusan del Sur,Rural Health Unit,12


Reformat table, having styles as columns

In [29]:
# Pivot the DataFrame
hf_df_p = hf_df_p.pivot_table(index='Province', columns='style', values='style_count_per_province', aggfunc='sum', fill_value=0)

# Reset the index to make 'province' a regular column
hf_df_p.reset_index(inplace=True)

# Display the resulting pivoted DataFrame
hf_df_p.head(100)

style,Province,Barangay Health Station,Hospital,Medical Clinic,Rural Health Unit
0,Abra,110,6,1,23
1,Agusan del Norte,213,12,0,16
2,Agusan del Sur,134,15,0,12
3,Aklan,126,13,0,25
4,Albay,179,35,2,18
...,...,...,...,...,...
82,Third District,49,28,1,78
83,Zambales,220,16,0,16
84,Zamboanga Sibugay,125,6,0,16
85,Zamboanga del Norte,370,5,0,28


Get total Health Care Facilities (***total_HCF***)

In [30]:
hf_df_p['total_HCF'] = hf_df_p[['Barangay Health Station', 'Hospital', 'Medical Clinic', 'Rural Health Unit']].sum(axis=1)
hf_df_p.head(100)

style,Province,Barangay Health Station,Hospital,Medical Clinic,Rural Health Unit,total_HCF
0,Abra,110,6,1,23,140
1,Agusan del Norte,213,12,0,16,241
2,Agusan del Sur,134,15,0,12,161
3,Aklan,126,13,0,25,164
4,Albay,179,35,2,18,234
...,...,...,...,...,...,...
82,Third District,49,28,1,78,156
83,Zambales,220,16,0,16,252
84,Zamboanga Sibugay,125,6,0,16,147
85,Zamboanga del Norte,370,5,0,28,403


> ### **Per region**

Get style count per region

In [31]:
hf_df_r = hf_df[['Region', 'style']]
hf_df_r['style_count_per_region'] = hf_df_r.groupby(['Region', 'style'])['style'].transform('count')
hf_df_r = pd.DataFrame(hf_df_r)

hf_df_r.head(20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hf_df_r['style_count_per_region'] = hf_df_r.groupby(['Region', 'style'])['style'].transform('count')


Unnamed: 0,Region,style,style_count_per_region
1398,Bangsamoro Autonomous Region In Muslim Mindana...,Barangay Health Station,595
1515,Bangsamoro Autonomous Region In Muslim Mindana...,Barangay Health Station,595
1514,Bangsamoro Autonomous Region In Muslim Mindana...,Barangay Health Station,595
1513,Bangsamoro Autonomous Region In Muslim Mindana...,Barangay Health Station,595
1512,Bangsamoro Autonomous Region In Muslim Mindana...,Barangay Health Station,595
1510,Bangsamoro Autonomous Region In Muslim Mindana...,Barangay Health Station,595
1509,Bangsamoro Autonomous Region In Muslim Mindana...,Barangay Health Station,595
1508,Bangsamoro Autonomous Region In Muslim Mindana...,Hospital,44
1516,Bangsamoro Autonomous Region In Muslim Mindana...,Barangay Health Station,595
1502,Bangsamoro Autonomous Region In Muslim Mindana...,Rural Health Unit,123


In [32]:
hf_df_r = hf_df_r.groupby(['Region', 'style', 'style_count_per_region'], as_index=False).sum()
hf_df_r.sort_values(by=['Region'], inplace=True)
hf_df_r.head(20)

Unnamed: 0,Region,style,style_count_per_region
0,Bangsamoro Autonomous Region In Muslim Mindana...,Barangay Health Station,595
1,Bangsamoro Autonomous Region In Muslim Mindana...,Hospital,44
2,Bangsamoro Autonomous Region In Muslim Mindana...,Rural Health Unit,123
3,Cordillera Administrative Region (CAR),Barangay Health Station,785
4,Cordillera Administrative Region (CAR),Hospital,53
5,Cordillera Administrative Region (CAR),Medical Clinic,2
6,Cordillera Administrative Region (CAR),Rural Health Unit,88
10,MIMAROPA Region,Rural Health Unit,79
9,MIMAROPA Region,Medical Clinic,4
8,MIMAROPA Region,Hospital,65


Reformat table, having styles as columns

In [33]:
# Pivot the DataFrame
hf_df_r = hf_df_r.pivot_table(index='Region', columns='style', values='style_count_per_region', aggfunc='sum', fill_value=0)

# Reset the index to make 'region' a regular column
hf_df_r.reset_index(inplace=True)

# Display the resulting pivoted DataFrame
hf_df_r.head(100)

style,Region,Barangay Health Station,Hospital,Medical Clinic,Rural Health Unit
0,Bangsamoro Autonomous Region In Muslim Mindana...,595,44,0,123
1,Cordillera Administrative Region (CAR),785,53,2,88
2,MIMAROPA Region,1074,65,4,79
3,National Capital Region (NCR),133,186,5,373
4,Region I (Ilocos Region),1210,122,8,157
5,Region II (Cagayan Valley),1189,96,5,94
6,Region III (Central Luzon),1880,205,9,267
7,Region IV-A (CALABARZON),2409,268,7,198
8,Region IX (Zamboanga Peninsula),899,37,0,92
9,Region V (Bicol Region),1150,111,9,135


Get total Health Care Facilities (***total_HCF***)

In [34]:
hf_df_r['total_HCF'] = hf_df_r[['Barangay Health Station', 'Hospital', 'Medical Clinic', 'Rural Health Unit']].sum(axis=1)
hf_df_r.head(100)

style,Region,Barangay Health Station,Hospital,Medical Clinic,Rural Health Unit,total_HCF
0,Bangsamoro Autonomous Region In Muslim Mindana...,595,44,0,123,762
1,Cordillera Administrative Region (CAR),785,53,2,88,928
2,MIMAROPA Region,1074,65,4,79,1222
3,National Capital Region (NCR),133,186,5,373,697
4,Region I (Ilocos Region),1210,122,8,157,1497
5,Region II (Cagayan Valley),1189,96,5,94,1384
6,Region III (Central Luzon),1880,205,9,267,2361
7,Region IV-A (CALABARZON),2409,268,7,198,2882
8,Region IX (Zamboanga Peninsula),899,37,0,92,1028
9,Region V (Bicol Region),1150,111,9,135,1405


#**Unedited population files**

In [35]:
region_df = pd.read_csv('Datasets/population_per_region.csv')
region_df.head()

Unnamed: 0.1,Unnamed: 0,Region,Reg_Code,2020 Census Popn
0,4,Region I (Ilocos Region),PH010000000,5301139.0
1,5,Region II (Cagayan Valley),PH020000000,3685744.0
2,6,Region III (Central Luzon),PH030000000,12422172.0
3,7,Region IV-A (CALABARZON),PH040000000,16195042.0
4,9,Region V (Bicol Region),PH050000000,6082165.0


In [36]:
region_df["2020 Census Popn"].sum()

109142891.0

In [37]:
province_df = pd.read_csv('Datasets/population_per_province.csv')
province_df.head()

Unnamed: 0.1,Unnamed: 0,Region,Reg_Code,Province,Prov_Code,2020 Census Popn
0,5,Cordillera Administrative Region (CAR),PH140000000,Abra,PH140100000,250985.0
1,81,Region XIII (Caraga),PH160000000,Agusan del Norte,PH160200000,760413.0
2,82,Region XIII (Caraga),PH160000000,Agusan del Sur,PH160300000,739367.0
3,51,Region VI (Western Visayas),PH060000000,Aklan,PH060400000,615475.0
4,45,Region V (Bicol Region),PH050000000,Albay,PH050500000,1374768.0


In [38]:
province_df["2020 Census Popn"].sum()

109142891.0

# **Joined layers (as CSV)**
This is to check if the populations align across maps. Per region, per province, and per municipality populations must have the same total).

In [39]:
new_province_df = pd.read_csv('Datasets/new_population_per_province.csv')
new_province_df.head()

Unnamed: 0,ADM2_EN,ADM2_PCODE,ADM1_EN,ADM1_PCODE,ADM0_EN,ADM0_PCODE,2020 Censu
0,Ilocos Norte,PH012800000,Region I,PH010000000,Philippines (the),PH,609588.0
1,Ilocos Sur,PH012900000,Region I,PH010000000,Philippines (the),PH,706009.0
2,La Union,PH013300000,Region I,PH010000000,Philippines (the),PH,822352.0
3,Pangasinan,PH015500000,Region I,PH010000000,Philippines (the),PH,3163190.0
4,Batanes,PH020900000,Region II,PH020000000,Philippines (the),PH,18831.0


In [40]:
new_provinces = new_province_df['ADM2_EN'].unique()
new_provinces.sort()
for new_province in new_provinces:
    print(new_province)
print('---')
print('Number of provinces: ' + str(new_province_df['ADM2_EN'].nunique()))

Abra
Agusan del Norte
Agusan del Sur
Aklan
Albay
Antique
Apayao
Aurora
Basilan
Bataan
Batanes
Batangas
Benguet
Biliran
Bohol
Bukidnon
Bulacan
Cagayan
Camarines Norte
Camarines Sur
Camiguin
Capiz
Catanduanes
Cavite
Cebu
City of Isabela
Compostela Valley
Cotabato
Cotabato City
Davao Occidental
Davao Oriental
Davao del Norte
Davao del Sur
Dinagat Islands
Eastern Samar
Guimaras
Ifugao
Ilocos Norte
Ilocos Sur
Iloilo
Isabela
Kalinga
La Union
Laguna
Lanao del Norte
Lanao del Sur
Leyte
Maguindanao
Marinduque
Masbate
Misamis Occidental
Misamis Oriental
Mountain Province
NCR, City of Manila, First District
NCR, Fourth District
NCR, Second District
NCR, Third District
Negros Occidental
Negros Oriental
Northern Samar
Nueva Ecija
Nueva Vizcaya
Occidental Mindoro
Oriental Mindoro
Palawan
Pampanga
Pangasinan
Quezon
Quirino
Rizal
Romblon
Samar
Sarangani
Siquijor
Sorsogon
South Cotabato
Southern Leyte
Sultan Kudarat
Sulu
Surigao del Norte
Surigao del Sur
Tarlac
Tawi-Tawi
Zambales
Zamboanga Sibugay
Zamb

In [41]:
specific_row = new_province_df[new_province_df['ADM2_EN'] == "Cotabato City"]

specific_row

Unnamed: 0,ADM2_EN,ADM2_PCODE,ADM1_EN,ADM1_PCODE,ADM0_EN,ADM0_PCODE,2020 Censu
61,Cotabato City,PH129800000,Region XII,PH120000000,Philippines (the),PH,325079.0


In [42]:
new_region_df = pd.read_csv('Datasets/new_population_per_region.csv')
new_region_df.head()

Unnamed: 0,ADM1_EN,ADM1_PCODE,ADM0_EN,ADM0_PCODE,field_1,Region,Reg_Code,2020 Censu
0,Region I,PH010000000,Philippines (the),PH,4,Region I (Ilocos Region),PH010000000,5301139.0
1,Region II,PH020000000,Philippines (the),PH,5,Region II (Cagayan Valley),PH020000000,3685744.0
2,Region III,PH030000000,Philippines (the),PH,6,Region III (Central Luzon),PH030000000,12422172.0
3,Region IV-A,PH040000000,Philippines (the),PH,7,Region IV-A (CALABARZON),PH040000000,16195042.0
4,Region V,PH050000000,Philippines (the),PH,9,Region V (Bicol Region),PH050000000,6082165.0


In [43]:
new_province_df.columns

new_province_df.head()

Unnamed: 0,ADM2_EN,ADM2_PCODE,ADM1_EN,ADM1_PCODE,ADM0_EN,ADM0_PCODE,2020 Censu
0,Ilocos Norte,PH012800000,Region I,PH010000000,Philippines (the),PH,609588.0
1,Ilocos Sur,PH012900000,Region I,PH010000000,Philippines (the),PH,706009.0
2,La Union,PH013300000,Region I,PH010000000,Philippines (the),PH,822352.0
3,Pangasinan,PH015500000,Region I,PH010000000,Philippines (the),PH,3163190.0
4,Batanes,PH020900000,Region II,PH020000000,Philippines (the),PH,18831.0


In [44]:
province_popn = new_province_df["2020 Censu"].sum()
region_popn = new_region_df["2020 Censu"].sum()

print(province_popn - region_popn)

0.0


# **Merge (HCF and Population) in preparation for analysis**

## **Per province**

In [45]:
merged_p = pd.merge(ppp_df, hf_df_p, on='Province')

In [46]:
merged_p.head(30)

Unnamed: 0,Region,Reg_Code,Province,Prov_Code,2020 Census Popn,Barangay Health Station,Hospital,Medical Clinic,Rural Health Unit,total_HCF
0,Cordillera Administrative Region (CAR),PH140000000,Abra,PH140100000,250985.0,110,6,1,23,140
1,Region XIII (Caraga),PH160000000,Agusan del Norte,PH160200000,760413.0,213,12,0,16,241
2,Region XIII (Caraga),PH160000000,Agusan del Sur,PH160300000,739367.0,134,15,0,12,161
3,Region VI (Western Visayas),PH060000000,Aklan,PH060400000,615475.0,126,13,0,25,164
4,Region V (Bicol Region),PH050000000,Albay,PH050500000,1374768.0,179,35,2,18,234
5,Region VI (Western Visayas),PH060000000,Antique,PH060600000,612974.0,137,12,0,18,167
6,Cordillera Administrative Region (CAR),PH140000000,Apayao,PH148100000,124366.0,114,7,1,8,130
7,Region III (Central Luzon),PH030000000,Aurora,PH037700000,235750.0,108,6,0,8,122
8,Bangsamoro Autonomous Region In Muslim Mindana...,PH150000000,Basilan,PH150700000,426207.0,130,4,0,15,149
9,Region III (Central Luzon),PH030000000,Bataan,PH030800000,853373.0,189,14,0,20,223


In [47]:
merged_pcode = merged_p[['Prov_Code', '2020 Census Popn', 'Barangay Health Station', 'Hospital', 'Medical Clinic', 'Rural Health Unit', 'total_HCF']]

merged_pcode

Unnamed: 0,Prov_Code,2020 Census Popn,Barangay Health Station,Hospital,Medical Clinic,Rural Health Unit,total_HCF
0,PH140100000,250985.0,110,6,1,23,140
1,PH160200000,760413.0,213,12,0,16,241
2,PH160300000,739367.0,134,15,0,12,161
3,PH060400000,615475.0,126,13,0,25,164
4,PH050500000,1374768.0,179,35,2,18,234
...,...,...,...,...,...,...,...
82,PH137500000,3004627.0,49,28,1,78,156
83,PH037100000,909932.0,220,16,0,16,252
84,PH098300000,669840.0,125,6,0,16,147
85,PH097200000,1047455.0,370,5,0,28,403


In [48]:
specific_row = merged_p[merged_p['Province'] == "Cotabato City"]

specific_row

Unnamed: 0,Region,Reg_Code,Province,Prov_Code,2020 Census Popn,Barangay Health Station,Hospital,Medical Clinic,Rural Health Unit,total_HCF
27,Region XII (SOCCSKSARGEN),PH120000000,Cotabato City,PH129800000,325079.0,40,6,0,1,47


Export csv

In [49]:
merged_p.to_csv('Datasets/merged_province.csv', index=False)
merged_pcode.to_csv('Datasets/merged_pcode.csv', index=False)

## **Per region**

In [50]:
merged_r = pd.merge(ppr_df, hf_df_r, on='Region')

In [51]:
merged_r.head(30)

Unnamed: 0,Region,Reg_Code,2020 Census Popn,Barangay Health Station,Hospital,Medical Clinic,Rural Health Unit,total_HCF
0,Region I (Ilocos Region),PH010000000,5301139.0,1210,122,8,157,1497
1,Region II (Cagayan Valley),PH020000000,3685744.0,1189,96,5,94,1384
2,Region III (Central Luzon),PH030000000,12422172.0,1880,205,9,267,2361
3,Region IV-A (CALABARZON),PH040000000,16195042.0,2409,268,7,198,2882
4,Region V (Bicol Region),PH050000000,6082165.0,1150,111,9,135,1405
5,Region VI (Western Visayas),PH060000000,7954723.0,1775,93,2,146,2016
6,Region VII (Central Visayas),PH070000000,8081988.0,1818,95,2,152,2067
7,Region VIII (Eastern Visayas),PH080000000,4547150.0,764,82,3,159,1008
8,Region IX (Zamboanga Peninsula),PH090000000,3875576.0,899,37,0,92,1028
9,Region X (Northern Mindanao),PH100000000,5022768.0,1257,75,0,129,1461


In [52]:
merged_rcode = merged_r[['Reg_Code', '2020 Census Popn', 'Barangay Health Station', 'Hospital', 'Medical Clinic', 'Rural Health Unit', 'total_HCF']]

merged_rcode

Unnamed: 0,Reg_Code,2020 Census Popn,Barangay Health Station,Hospital,Medical Clinic,Rural Health Unit,total_HCF
0,PH010000000,5301139.0,1210,122,8,157,1497
1,PH020000000,3685744.0,1189,96,5,94,1384
2,PH030000000,12422172.0,1880,205,9,267,2361
3,PH040000000,16195042.0,2409,268,7,198,2882
4,PH050000000,6082165.0,1150,111,9,135,1405
5,PH060000000,7954723.0,1775,93,2,146,2016
6,PH070000000,8081988.0,1818,95,2,152,2067
7,PH080000000,4547150.0,764,82,3,159,1008
8,PH090000000,3875576.0,899,37,0,92,1028
9,PH100000000,5022768.0,1257,75,0,129,1461


Export csv

In [53]:
merged_r.to_csv('Datasets/merged_region.csv', index=False)
merged_rcode.to_csv('Datasets/merged_rcode.csv', index=False)

 ## **HCF per province (with PCODE)**

In [54]:
#merged_hcf_p = pd.merge(hf_df, merged_p, on='Province')

#merged_hcf_p = pd.merge(hf_df, merged_p, on='Province', how='inner')
merged_hcf_p = pd.merge(hf_df, merged_p, left_on='Province', right_on='Province', how='inner')


#hf_df['Prov_Code'] = merged_hcf_p['Prov_Code']


In [55]:
hf_df_pcode = merged_hcf_p[['id', 'facilityco', 'healthfaci', 'typeofheal', 'barangay', 'municipali', 'Province', 'Region_x', 'status', 'address', 'style', 'Prov_Code']]
hf_df_pcode.rename(columns={'Region_x': 'Region'}, inplace=True)
hf_df_pcode

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hf_df_pcode.rename(columns={'Region_x': 'Region'}, inplace=True)


Unnamed: 0,id,facilityco,healthfaci,typeofheal,barangay,municipali,Province,Region,status,address,style,Prov_Code
0,1452.0,DOH000000000031732,Bangco Barangay Health Station,Barangay Health Station,Bangco,Marawi City (capital),Lanao del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station,PH153600000
1,1569.0,DOH000000000014693,Katutungan Barangay Health Station,Barangay Health Station,Katutungan (Pob.),Wao,Lanao del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station,PH153600000
2,1568.0,DOH000000000014687,Pagalungan Barangay Health Station,Barangay Health Station,Pagalongan,Wao,Lanao del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station,PH153600000
3,1567.0,DOH000000000014690,Milaya Barangay Health Station,Barangay Health Station,Milaya,Wao,Lanao del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station,PH153600000
4,1566.0,DOH000000000014684,Kabatangan Barangay Health Station,Barangay Health Station,Kabatangan,Wao,Lanao del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station,PH153600000
...,...,...,...,...,...,...,...,...,...,...,...,...
24177,3121.0,DOH000000000021846,Sinonoc Barangay Health Station,Barangay Health Station,Sinonoc,Sinacaban,Misamis Occidental,Region X (Northern Mindanao),,,Barangay Health Station,PH104200000
24178,3122.0,DOH000000000002197,Ozamiz City Health Office,Rural Health Unit,Aguada (pob.),Ozamis City,Misamis Occidental,Region X (Northern Mindanao),,,Rural Health Unit,PH104200000
24179,3123.0,DOH000000000021704,Aguada Barangay Health Station,Barangay Health Station,Aguada (pob.),Ozamis City,Misamis Occidental,Region X (Northern Mindanao),,,Barangay Health Station,PH104200000
24180,3124.0,DOH000000000021705,Banadero Barangay Health Station,Barangay Health Station,Banadero,Ozamis City,Misamis Occidental,Region X (Northern Mindanao),,,Barangay Health Station,PH104200000


 ## **HCF (with PCODE and RCODE)**

In [56]:
merged_hcf_r = pd.merge(hf_df_pcode, merged_r, left_on='Region', right_on='Region', how='inner')
merged_hcf_r

Unnamed: 0,id,facilityco,healthfaci,typeofheal,barangay,municipali,Province,Region,status,address,style,Prov_Code,Reg_Code,2020 Census Popn,Barangay Health Station,Hospital,Medical Clinic,Rural Health Unit,total_HCF
0,1452.0,DOH000000000031732,Bangco Barangay Health Station,Barangay Health Station,Bangco,Marawi City (capital),Lanao del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station,PH153600000,PH150000000,4729367.0,595,44,0,123,762
1,1569.0,DOH000000000014693,Katutungan Barangay Health Station,Barangay Health Station,Katutungan (Pob.),Wao,Lanao del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station,PH153600000,PH150000000,4729367.0,595,44,0,123,762
2,1568.0,DOH000000000014687,Pagalungan Barangay Health Station,Barangay Health Station,Pagalongan,Wao,Lanao del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station,PH153600000,PH150000000,4729367.0,595,44,0,123,762
3,1567.0,DOH000000000014690,Milaya Barangay Health Station,Barangay Health Station,Milaya,Wao,Lanao del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station,PH153600000,PH150000000,4729367.0,595,44,0,123,762
4,1566.0,DOH000000000014684,Kabatangan Barangay Health Station,Barangay Health Station,Kabatangan,Wao,Lanao del Sur,Bangsamoro Autonomous Region In Muslim Mindana...,,,Barangay Health Station,PH153600000,PH150000000,4729367.0,595,44,0,123,762
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24177,3121.0,DOH000000000021846,Sinonoc Barangay Health Station,Barangay Health Station,Sinonoc,Sinacaban,Misamis Occidental,Region X (Northern Mindanao),,,Barangay Health Station,PH104200000,PH100000000,5022768.0,1257,75,0,129,1461
24178,3122.0,DOH000000000002197,Ozamiz City Health Office,Rural Health Unit,Aguada (pob.),Ozamis City,Misamis Occidental,Region X (Northern Mindanao),,,Rural Health Unit,PH104200000,PH100000000,5022768.0,1257,75,0,129,1461
24179,3123.0,DOH000000000021704,Aguada Barangay Health Station,Barangay Health Station,Aguada (pob.),Ozamis City,Misamis Occidental,Region X (Northern Mindanao),,,Barangay Health Station,PH104200000,PH100000000,5022768.0,1257,75,0,129,1461
24180,3124.0,DOH000000000021705,Banadero Barangay Health Station,Barangay Health Station,Banadero,Ozamis City,Misamis Occidental,Region X (Northern Mindanao),,,Barangay Health Station,PH104200000,PH100000000,5022768.0,1257,75,0,129,1461


In [62]:
hf_df_prcode = merged_hcf_r[['id', 'Prov_Code', 'Reg_Code']]
hf_df_prcode

Unnamed: 0,id,Prov_Code,Reg_Code
0,1452.0,PH153600000,PH150000000
1,1569.0,PH153600000,PH150000000
2,1568.0,PH153600000,PH150000000
3,1567.0,PH153600000,PH150000000
4,1566.0,PH153600000,PH150000000
...,...,...,...
24177,3121.0,PH104200000,PH100000000
24178,3122.0,PH104200000,PH100000000
24179,3123.0,PH104200000,PH100000000
24180,3124.0,PH104200000,PH100000000


In [63]:
hf_df_prcode.to_csv('Datasets/hcf_prcode.csv', index=False)
