In [587]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
import geopandas as gp
import re

from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

In [588]:
# Setting up directories

WORKINGDIR = Path(os.getcwd())
PROJECTROOT = WORKINGDIR.parents[1]
HOUSING_DATASETS = Path(PROJECTROOT, "data", "gathered-datasets", "housing-census")

# Reference df
COMPLETE_DATASET = Path(PROJECTROOT, "data", "cleaned-datasets","ph-shp-file", "ph-shp-file.shp")

# Housing datasets
LIGHTING_SOURCE = Path(HOUSING_DATASETS, "housing-census-fuel-lighting-2015-flattened.csv")
HOUSETYPE = Path(HOUSING_DATASETS, "housing-census-housetype-2015-flattened.csv")
HOUSING_MATERIAL = Path(HOUSING_DATASETS, "housing-census-housing-material-2015-flattened.csv")
HOUSING_TENURE = Path(HOUSING_DATASETS, "housing-census-housing-tenure-2015-flattened.csv")
WATER_SUPPLY_COOKING = Path(HOUSING_DATASETS, "housing-census-water-supply-cooking-2015-flattened.csv")
WATER_SUPPLY_DRINKING = Path(HOUSING_DATASETS, "housing-census-water-supply-drinking-2015-flattened.csv")

In [589]:
def data_cleaner(text):
    # Make lower caps
    text = text.lower()

    # Remove "city"
    text = text.replace("city", "")

    # Remove " of "
    text = text.replace(" of ", "")
    
    # Remove all strings within parentheses
    text = text.split(" (")[0]
    
    # Remove (Capital)
    text = text.replace(" (capital)", "")

    # Remove punctuation marks
    text = re.sub(r"[^\w\s]", "", text)

    # Remove spaces
    text = text.replace(" ", "")

    # Replace compostela valley with Davao de Oro
    text = text.replace("compostelavalley", "davaodeoro")

    # Replace Maguindanao with del Norte/del Sur
    if "maguindanao" in text:
        text = "maguindanao"

    # Replace santo with sto
    text = text.replace("santo", "sto")

    # Replace santa with sta
    text = text.replace("santa", "sta")



    return text

In [590]:
def compare_lists(list1, list2):
    return [element for element in list1 if element not in list2]

## Import reference df

In [591]:
ref_df = gp.read_file(COMPLETE_DATASET)
ref_df.head()

Unnamed: 0,name,city_munic,province,clean_idx,coords,geometry
0,Aborlan,Aborlan,Palawan,"aborlan, palawan","9.4371009, 118.5484168","MULTIPOLYGON (((118.57998 9.37215, 118.57982 9..."
1,Abra De Ilog,Abra De Ilog,Occidental Mindoro,"abradeilog, occidentalmindoro","13.4437209, 120.7268262","POLYGON ((120.60896 13.35233, 120.60797 13.373..."
2,Abucay,Abucay,Bataan,"abucay, bataan","14.7213146, 120.5348704","POLYGON ((120.45676 14.69671, 120.45620 14.696..."
3,Abulug,Abulug,Cagayan,"abulug, cagayan","18.4434854, 121.4572732","MULTIPOLYGON (((121.40276 18.40896, 121.40276 ..."
4,Abuyog,Abuyog,Leyte,"abuyog, leyte","10.747102, 125.0114853","POLYGON ((125.04650 10.56751, 125.04588 10.576..."


In [592]:
ref_df_idx = ref_df.clean_idx.drop_duplicates().unique().tolist()

## Lighting source

In [593]:
lighting_source_df = pd.read_csv(LIGHTING_SOURCE)
lighting_source_df.drop(columns=["Unnamed: 0"], inplace=True)

In [594]:
lighting_source_df.head()

Unnamed: 0,city,region_name,province,Electricity_count,Kerosene (Gaas)_count,Liquified Petroleum Gas (LPG)_count,Oil (vegetable animal and others)_count,Solar panel_count,Solar lamp_count,Others_count,None_count,Not Reported_count,year
0,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,428934.0,2365.0,1369.0,34.0,66.0,98.0,1018.0,1012.0,258.0,2015
1,CITY OF MANDALUYONG,NCR - National Capital Region,METROPOLITAN MANILA,99089.0,217.0,161.0,6.0,112.0,491.0,101.0,166.0,13.0,2015
2,CITY OF MARIKINA,NCR - National Capital Region,METROPOLITAN MANILA,96774.0,488.0,592.0,7.0,24.0,35.0,264.0,44.0,10.0,2015
3,CITY OF PASIG,NCR - National Capital Region,METROPOLITAN MANILA,178773.0,605.0,586.0,9.0,64.0,111.0,258.0,163.0,43.0,2015
4,QUEZON CITY,NCR - National Capital Region,METROPOLITAN MANILA,671386.0,3265.0,3687.0,99.0,249.0,247.0,1605.0,604.0,1902.0,2015


In [595]:
lighting_source_df["clean_province"] = (
    lighting_source_df["province"].apply(data_cleaner)
)

lighting_source_df["clean_city"] = lighting_source_df["city"].apply(
    data_cleaner
)

lighting_source_df["clean_idx"] = (
    lighting_source_df["clean_city"].astype(str)
    + ", "
    + lighting_source_df["clean_province"].astype(str)
)

In [596]:
lighting_source_df.sort_values("clean_idx")

Unnamed: 0,city,region_name,province,Electricity_count,Kerosene (Gaas)_count,Liquified Petroleum Gas (LPG)_count,Oil (vegetable animal and others)_count,Solar panel_count,Solar lamp_count,Others_count,None_count,Not Reported_count,year,clean_province,clean_city,clean_idx
613,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),5606.0,1436.0,,,669.0,472.0,111.0,,,2015,palawan,aborlan,"aborlan, palawan"
587,ABRA DE ILOG,REGION IV-B (MIMAROPA),OCCIDENTAL MINDORO,4773.0,1333.0,30.0,18.0,20.0,550.0,578.0,27.0,,2015,occidentalmindoro,abradeilog,"abradeilog, occidentalmindoro"
319,ABUCAY,REGION III (Central Luzon),BATAAN,9256.0,165.0,4.0,,1.0,1.0,19.0,7.0,,2015,bataan,abucay,"abucay, bataan"
225,ABULUG,REGION II (Cagayan Valley),CAGAYAN,6729.0,643.0,3.0,1.0,8.0,18.0,3.0,,,2015,cagayan,abulug,"abulug, cagayan"
948,ABUYOG,REGION VIII (Eastern Visayas),LEYTE (excluding Tacloban City),11369.0,2062.0,7.0,,12.0,43.0,10.0,5.0,,2015,leyte,abuyog,"abuyog, leyte"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1114,ZAMBOANGA CITY,REGION IX (Zamboanga Peninsula),ZAMBOANGA DEL SUR (excluding Zamboanga City),168368.0,17544.0,867.0,33.0,1115.0,1765.0,382.0,489.0,5.0,2015,zamboangadelsur,zamboanga,"zamboanga, zamboangadelsur"
917,ZAMBOANGUITA,REGION VII (Central Visayas),NEGROS ORIENTAL,4304.0,1951.0,,2.0,111.0,212.0,27.0,,,2015,negrosoriental,zamboanguita,"zamboanguita, negrosoriental"
385,ZARAGOZA,REGION III (Central Luzon),NUEVA ECIJA,11438.0,257.0,5.0,1.0,3.0,4.0,12.0,3.0,,2015,nuevaecija,zaragoza,"zaragoza, nuevaecija"
748,ZARRAGA,REGION VI (Western Visayas),ILOILO (excluding Iloilo City),5544.0,296.0,1.0,,2.0,3.0,7.0,,,2015,iloilo,zarraga,"zarraga, iloilo"


In [597]:
lighting_source_df_idx = lighting_source_df.clean_idx.drop_duplicates().unique().tolist()

In [598]:
def quirino_to_isabela(idx):
  to_change = [
    'ramon, quirino',
 'reinamercedes, quirino',
 'roxas, quirino',
 'sanagustin, quirino',
 'sanguillermo, quirino',
 'sanisidro, quirino',
 'sanmanuel, quirino',
 'sanmariano, quirino',
 'sanmateo, quirino',
 'sanpablo, quirino',
 'stamaria, quirino',
 'santiago, quirino',
 'stotomas, quirino',
 'tumauini, quirino',
  ]
  
  if idx in to_change:
    return idx.replace("quirino", "isabela")
  else:
    return idx


In [599]:
def rizal_to_laguna(idx):
    to_change = [
         'sanpablo, rizal',
 'sanpedro, rizal',
 'stacruz, rizal',
 'stamaria, rizal',
 'starosa, rizal',
 'siniloan, rizal',
 'victoria, rizal',
        
    ]
    
    if idx in to_change:
        return idx.replace("rizal", "laguna")
    else:
        return idx

In [600]:
def bohol_to_cebu(idx):
    to_change = [
        'cebu, bohol',
 'alcantara, bohol',
 'alcoy, bohol',
 'alegria, bohol',
 'aloguinsan, bohol',
 'argao, bohol',
 'asturias, bohol',
 'badian, bohol',
 'balamban, bohol',
 'bantayan, bohol',
 'barili, bohol',
 'bogo, bohol',
 'boljoon, bohol',
 'borbon, bohol',
 'carcar, bohol',
 'catmon, bohol',
 'compostela, bohol',
 'consolacion, bohol',
 'cordova, bohol',
 'daanbantayan, bohol',
 'dalaguete, bohol',
 'dumanjug, bohol',
 'ginatilan, bohol',
 'liloan, bohol',
 'madridejos, bohol',
 'malabuyoc, bohol',
 'medellin, bohol',
 'minglanilla, bohol',
 'moalboal, bohol',
 'naga, bohol',
 'oslob, bohol',
 'pinamungahan, bohol',
 'poro, bohol',
 'ronda, bohol',
 'samboan, bohol',
 'sanfernando, bohol',
 'sanfrancisco, bohol',
 'sanremigio, bohol',
 'stafe, bohol',
 'stander, bohol',
 'sibonga, bohol',
 'sogod, bohol',
 'tabogon, bohol',
 'tabuelan, bohol',
 'talisay, bohol',
 'toledo, bohol',
 'tuburan, bohol',
 'tudela, bohol',
 'lapulapu, bohol',
 'mandaue, bohol',
    ]
    
    if idx in to_change:
        return idx.replace("bohol", "cebu")
    else:
        return idx

In [601]:
def replace_idx(idx):
    idx = idx.replace('prescarlospgarcia, bohol',"presidentcarlospgarcia, bohol")
    idx = idx.replace('stonio, samar',"stoniño, samar")
    
    idx = idx.replace('pian, zamboangadelnorte', "piñan, zamboangadelnorte")
    idx = idx.replace('isabela, zamboangasibugay', "isabela, basilan")
    idx = idx.replace('ozamis, misamisoccidental', "ozamiz, misamisoccidental")
    idx = idx.replace('cotabato, sarangani', 'cotabato, maguindanao')
    idx = idx.replace('bumbaran, lanaodelsur',"amaimanabilang, lanaodelsur")
    idx = idx.replace("pinamungahan, cebu", 'pinamungajan, cebu')
 
    return idx

In [602]:
lighting_source_df["clean_idx"] = lighting_source_df["clean_idx"].apply(bohol_to_cebu).apply(replace_idx).apply(quirino_to_isabela).apply(rizal_to_laguna).apply(bohol_to_cebu)

In [603]:
lighting_source_df_idx = lighting_source_df.clean_idx.drop_duplicates().unique().tolist()

In [604]:
not_in_ref = compare_lists(lighting_source_df_idx, ref_df_idx)
not_in_ref

[]

In [605]:
not_in_new = compare_lists(ref_df_idx, lighting_source_df_idx)
not_in_new

['aroroy, masbate',
 'baao, camarinessur',
 'bacacay, albay',
 'bagamanoc, catanduanes',
 'balatan, camarinessur',
 'baleno, masbate',
 'balud, masbate',
 'baras, catanduanes',
 'barcelona, sorsogon',
 'basud, camarinesnorte',
 'bato, camarinessur',
 'bato, catanduanes',
 'batuan, masbate',
 'biliran, biliran',
 'bombon, camarinessur',
 'buenavista, agusandelnorte',
 'buhi, camarinessur',
 'bula, camarinessur',
 'bulacan, bulacan',
 'bulan, sorsogon',
 'bulusan, sorsogon',
 'butuan, agusandelnorte',
 'cabadbaran, agusandelnorte',
 'cabusao, camarinessur',
 'calabanga, camarinessur',
 'camalig, albay',
 'camaligan, camarinessur',
 'canaman, camarinessur',
 'capalonga, camarinesnorte',
 'caramoan, camarinessur',
 'caramoran, catanduanes',
 'carmen, agusandelnorte',
 'carmen, cebu',
 'casiguran, sorsogon',
 'castilla, sorsogon',
 'cataingan, masbate',
 'cawayan, masbate',
 'claveria, masbate',
 'daet, camarinesnorte',
 'danao, cebu',
 'daraga, albay',
 'delgallego, camarinessur',
 'dimasa

In [606]:
ref_df

Unnamed: 0,name,city_munic,province,clean_idx,coords,geometry
0,Aborlan,Aborlan,Palawan,"aborlan, palawan","9.4371009, 118.5484168","MULTIPOLYGON (((118.57998 9.37215, 118.57982 9..."
1,Abra De Ilog,Abra De Ilog,Occidental Mindoro,"abradeilog, occidentalmindoro","13.4437209, 120.7268262","POLYGON ((120.60896 13.35233, 120.60797 13.373..."
2,Abucay,Abucay,Bataan,"abucay, bataan","14.7213146, 120.5348704","POLYGON ((120.45676 14.69671, 120.45620 14.696..."
3,Abulug,Abulug,Cagayan,"abulug, cagayan","18.4434854, 121.4572732","MULTIPOLYGON (((121.40276 18.40896, 121.40276 ..."
4,Abuyog,Abuyog,Leyte,"abuyog, leyte","10.747102, 125.0114853","POLYGON ((125.04650 10.56751, 125.04588 10.576..."
...,...,...,...,...,...,...
1629,Zamboanga,City of Zamboanga,Zamboanga del Sur,"zamboanga, zamboangadelsur","6.9214, 122.079","MULTIPOLYGON (((122.06639 6.86972, 122.06639 6..."
1630,Zamboanguita,Zamboanguita,Negros Oriental,"zamboanguita, negrosoriental","9.1004649, 123.1994244","POLYGON ((123.20750 9.10485, 123.20722 9.10443..."
1631,Zaragoza,Zaragoza,Nueva Ecija,"zaragoza, nuevaecija","15.4475833, 120.7935538","POLYGON ((120.81170 15.47132, 120.81309 15.470..."
1632,Zarraga,Zarraga,Iloilo,"zarraga, iloilo","10.8223786, 122.6095819","POLYGON ((122.65892 10.79784, 122.65959 10.796..."


In [607]:
lighting_source_df = ref_df[["clean_idx", "name"]].merge(lighting_source_df, 
                                                       how='left',
                                                       on='clean_idx')

In [608]:
lighting_source_df.head(10)

Unnamed: 0,clean_idx,name,city,region_name,province,Electricity_count,Kerosene (Gaas)_count,Liquified Petroleum Gas (LPG)_count,Oil (vegetable animal and others)_count,Solar panel_count,Solar lamp_count,Others_count,None_count,Not Reported_count,year,clean_province,clean_city
0,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),5606.0,1436.0,,,669.0,472.0,111.0,,,2015.0,palawan,aborlan
1,"abradeilog, occidentalmindoro",Abra De Ilog,ABRA DE ILOG,REGION IV-B (MIMAROPA),OCCIDENTAL MINDORO,4773.0,1333.0,30.0,18.0,20.0,550.0,578.0,27.0,,2015.0,occidentalmindoro,abradeilog
2,"abucay, bataan",Abucay,ABUCAY,REGION III (Central Luzon),BATAAN,9256.0,165.0,4.0,,1.0,1.0,19.0,7.0,,2015.0,bataan,abucay
3,"abulug, cagayan",Abulug,ABULUG,REGION II (Cagayan Valley),CAGAYAN,6729.0,643.0,3.0,1.0,8.0,18.0,3.0,,,2015.0,cagayan,abulug
4,"abuyog, leyte",Abuyog,ABUYOG,REGION VIII (Eastern Visayas),LEYTE (excluding Tacloban City),11369.0,2062.0,7.0,,12.0,43.0,10.0,5.0,,2015.0,leyte,abuyog
5,"adams, ilocosnorte",Adams,ADAMS,REGION I (Ilocos Region),ILOCOS NORTE,366.0,44.0,,,1.0,,,,,2015.0,ilocosnorte,adams
6,"agdangan, quezon",Agdangan,AGDANGAN,REGION IV-A (CALABARZON),QUEZON (excluding Lucena City),2733.0,256.0,,,33.0,135.0,10.0,4.0,,2015.0,quezon,agdangan
7,"aglipay, quirino",Aglipay,AGLIPAY,REGION II (Cagayan Valley),QUIRINO,5690.0,754.0,1.0,,44.0,54.0,29.0,5.0,,2015.0,quirino,aglipay
8,"agno, pangasinan",Agno,AGNO,REGION I (Ilocos Region),PANGASINAN,5730.0,838.0,2.0,1.0,3.0,2.0,43.0,5.0,,2015.0,pangasinan,agno
9,"agoncillo, batangas",Agoncillo,AGONCILLO,REGION IV-A (CALABARZON),BATANGAS,7687.0,82.0,2.0,,1.0,1.0,15.0,38.0,,2015.0,batangas,agoncillo


In [609]:
drop_cols =["clean_idx", "region_name", "province", "clean_province", "clean_city"]
lighting_source_df.drop(columns=drop_cols, inplace=True)

lighting_source_df.to_csv(Path(PROJECTROOT, "data", "cleaned-datasets", "housing-census-fuel-lighting-2015.csv"))

## Housing material

In [610]:
housing_matl_df = pd.read_csv(HOUSING_MATERIAL)
housing_matl_df.drop(columns=["Unnamed: 0"], inplace=True)

In [611]:
housing_matl_df.head(10)

Unnamed: 0,city,region_name,province,outer_wall_materials,roof_materials,count,year
0,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Concrete/brick/stone,Galvanized iron/aluminum,21164.0,2015
1,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Concrete/brick/stone,Tile/concrete/clay tile,3284.0,2015
2,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Concrete/brick/stone,Half galvanized iron and half concrete,4.0,2015
3,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Concrete/brick/stone,Bamboo/cogon/ nipa/anahaw,55.0,2015
4,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Concrete/brick/stone,Asbestos,19.0,2015
5,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Concrete/brick/stone,Makeshift/ salvaged/ improvised materials,42.0,2015
6,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Concrete/brick/stone,Trapal,75.0,2015
7,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Concrete/brick/stone,Others,,2015
8,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Concrete/brick/stone,Not Reported,,2015
9,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Wood,Galvanized iron/aluminum,1168.0,2015


In [612]:
housing_matl_df = housing_matl_df.loc[~housing_matl_df["province"].isnull()]
housing_matl_df = housing_matl_df.loc[~housing_matl_df["city"].isnull()]

In [613]:
housing_matl_df["clean_province"] = (
    housing_matl_df["province"].apply(data_cleaner)
)

housing_matl_df["clean_city"] = housing_matl_df["city"].apply(
    data_cleaner
)

housing_matl_df["clean_idx"] = (
    housing_matl_df["clean_city"].astype(str)
    + ", "
    + housing_matl_df["clean_province"].astype(str)
)

In [614]:
housing_matl_df["clean_idx"] = housing_matl_df["clean_idx"].apply(bohol_to_cebu).apply(replace_idx).apply(quirino_to_isabela).apply(rizal_to_laguna).apply(bohol_to_cebu)

In [615]:
housing_matl_idx = housing_matl_df.clean_idx.drop_duplicates().unique().tolist() 

In [616]:
not_in_ref = compare_lists(housing_matl_idx, ref_df_idx)
not_in_ref

[]

In [617]:
not_in_new = compare_lists(ref_df_idx, housing_matl_idx)
not_in_new

['aroroy, masbate',
 'baao, camarinessur',
 'bacacay, albay',
 'bagamanoc, catanduanes',
 'balatan, camarinessur',
 'baleno, masbate',
 'balud, masbate',
 'baras, catanduanes',
 'barcelona, sorsogon',
 'basud, camarinesnorte',
 'bato, camarinessur',
 'bato, catanduanes',
 'batuan, masbate',
 'biliran, biliran',
 'bombon, camarinessur',
 'buenavista, agusandelnorte',
 'buhi, camarinessur',
 'bula, camarinessur',
 'bulacan, bulacan',
 'bulan, sorsogon',
 'bulusan, sorsogon',
 'butuan, agusandelnorte',
 'cabadbaran, agusandelnorte',
 'cabusao, camarinessur',
 'calabanga, camarinessur',
 'camalig, albay',
 'camaligan, camarinessur',
 'canaman, camarinessur',
 'capalonga, camarinesnorte',
 'caramoan, camarinessur',
 'caramoran, catanduanes',
 'carmen, agusandelnorte',
 'carmen, cebu',
 'casiguran, sorsogon',
 'castilla, sorsogon',
 'cataingan, masbate',
 'cawayan, masbate',
 'claveria, masbate',
 'daet, camarinesnorte',
 'danao, cebu',
 'daraga, albay',
 'delgallego, camarinessur',
 'dimasa

In [618]:
housing_matl_df = ref_df[["clean_idx", "name"]].merge(housing_matl_df, 
                                                       how='left',
                                                       on='clean_idx')

In [619]:
housing_matl_df.head(10)

Unnamed: 0,clean_idx,name,city,region_name,province,outer_wall_materials,roof_materials,count,year,clean_province,clean_city
0,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Concrete/brick/stone,Galvanized iron/aluminum,11.0,2015.0,palawan,aborlan
1,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Concrete/brick/stone,Tile/concrete/clay tile,7.0,2015.0,palawan,aborlan
2,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Concrete/brick/stone,Half galvanized iron and half concrete,11.0,2015.0,palawan,aborlan
3,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Concrete/brick/stone,Bamboo/cogon/ nipa/anahaw,,2015.0,palawan,aborlan
4,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Concrete/brick/stone,Asbestos,,2015.0,palawan,aborlan
5,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Concrete/brick/stone,Makeshift/ salvaged/ improvised materials,,2015.0,palawan,aborlan
6,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Concrete/brick/stone,Trapal,,2015.0,palawan,aborlan
7,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Concrete/brick/stone,Others,,2015.0,palawan,aborlan
8,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Concrete/brick/stone,Not Reported,,2015.0,palawan,aborlan
9,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Wood,Galvanized iron/aluminum,,2015.0,palawan,aborlan


In [620]:
housing_matl_df.drop(columns=drop_cols, inplace=True)

housing_matl_df.to_csv(Path(PROJECTROOT, "data", "cleaned-datasets", "housing-census-housing-material-2015.csv"))

## Housing tenure

In [621]:
housing_tenure_df = pd.read_csv(HOUSING_TENURE)
housing_tenure_df.drop(columns=["Unnamed: 0"], inplace=True)

In [622]:
housing_tenure_df.head(10)

Unnamed: 0,city,region_name,province,tenure_type,bldg_type,count,year
0,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Own or owner like possession of house and lot,Single house,57658.0,2015
1,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Own or owner like possession of house and lot,Duplex,27215.0,2015
2,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Own or owner like possession of house and lot,Multi-unit residential,78569.0,2015
3,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Own or owner like possession of house and lot,Commercial/ industrial/ agricultural,426.0,2015
4,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Own or owner like possession of house and lot,Institutional living quarter,24.0,2015
5,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Own or owner like possession of house and lot,Others,158.0,2015
6,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Own or owner like possession of house and lot,Not Reported,354.0,2015
7,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Rent house/room including lot,Single house,34840.0,2015
8,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Rent house/room including lot,Duplex,21217.0,2015
9,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,Rent house/room including lot,Multi-unit residential,130042.0,2015


In [623]:
housing_tenure_df = housing_tenure_df.loc[~housing_tenure_df["province"].isnull()]
housing_tenure_df = housing_tenure_df.loc[~housing_tenure_df["city"].isnull()]

In [624]:
housing_tenure_df["clean_province"] = (
    housing_tenure_df["province"].apply(data_cleaner)
)

housing_tenure_df["clean_city"] = housing_tenure_df["city"].apply(
    data_cleaner
)

housing_tenure_df["clean_idx"] = (
    housing_tenure_df["clean_city"].astype(str)
    + ", "
    + housing_tenure_df["clean_province"].astype(str)
)

In [625]:
housing_tenure_df["clean_idx"] = housing_tenure_df["clean_idx"].apply(bohol_to_cebu).apply(replace_idx).apply(quirino_to_isabela).apply(rizal_to_laguna).apply(bohol_to_cebu)

In [626]:
housing_tenure_idx = housing_tenure_df.clean_idx.drop_duplicates().unique().tolist() 

In [627]:
not_in_ref = compare_lists(housing_tenure_idx, ref_df_idx)
not_in_ref

['997, zamboangasibugay']

In [628]:
not_in_new = compare_lists(ref_df_idx, housing_tenure_idx)
not_in_new

['angeles, pampanga',
 'aroroy, masbate',
 'baao, camarinessur',
 'bacacay, albay',
 'bagamanoc, catanduanes',
 'balatan, camarinessur',
 'baleno, masbate',
 'balud, masbate',
 'baras, catanduanes',
 'barcelona, sorsogon',
 'basud, camarinesnorte',
 'bato, camarinessur',
 'bato, catanduanes',
 'batuan, masbate',
 'biliran, biliran',
 'bombon, camarinessur',
 'buenavista, agusandelnorte',
 'buhi, camarinessur',
 'bula, camarinessur',
 'bulacan, bulacan',
 'bulan, sorsogon',
 'bulusan, sorsogon',
 'butuan, agusandelnorte',
 'cabadbaran, agusandelnorte',
 'cabusao, camarinessur',
 'calabanga, camarinessur',
 'camalig, albay',
 'camaligan, camarinessur',
 'canaman, camarinessur',
 'capalonga, camarinesnorte',
 'caramoan, camarinessur',
 'caramoran, catanduanes',
 'carmen, agusandelnorte',
 'carmen, cebu',
 'casiguran, sorsogon',
 'castilla, sorsogon',
 'cataingan, masbate',
 'cawayan, masbate',
 'claveria, masbate',
 'daet, camarinesnorte',
 'danao, cebu',
 'daraga, albay',
 'delgallego, c

In [629]:
housing_tenure_df = ref_df[["clean_idx", "name"]].merge(housing_tenure_df, 
                                                       how='left',
                                                       on='clean_idx')

In [630]:
housing_tenure_df.head(10)

Unnamed: 0,clean_idx,name,city,region_name,province,tenure_type,bldg_type,count,year,clean_province,clean_city
0,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Own or owner like possession of house and lot,Single house,4785.0,2015.0,palawan,aborlan
1,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Own or owner like possession of house and lot,Duplex,118.0,2015.0,palawan,aborlan
2,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Own or owner like possession of house and lot,Multi-unit residential,61.0,2015.0,palawan,aborlan
3,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Own or owner like possession of house and lot,Commercial/ industrial/ agricultural,1.0,2015.0,palawan,aborlan
4,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Own or owner like possession of house and lot,Institutional living quarter,,2015.0,palawan,aborlan
5,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Own or owner like possession of house and lot,Others,,2015.0,palawan,aborlan
6,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Own or owner like possession of house and lot,Not Reported,3.0,2015.0,palawan,aborlan
7,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Rent house/room including lot,Single house,130.0,2015.0,palawan,aborlan
8,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Rent house/room including lot,Duplex,56.0,2015.0,palawan,aborlan
9,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),Rent house/room including lot,Multi-unit residential,233.0,2015.0,palawan,aborlan


In [631]:
housing_tenure_df.drop(columns=drop_cols, inplace=True)

housing_tenure_df.to_csv(Path(PROJECTROOT, "data", "cleaned-datasets", "housing-census-housing-tenure-2015.csv"))

## Water supply for cooking

In [632]:
water_cooking_df = pd.read_csv(WATER_SUPPLY_COOKING)
water_cooking_df.drop(columns=["Unnamed: 0"], inplace=True)

In [633]:
water_cooking_df.head(10)

Unnamed: 0,city,region_name,province,Own use faucet community water system_count,Shared faucet community water system_count,Own use tubed/piped deep well_count,Shared tubed/piped deep well_count,Tubed/piped shallow well_count,Dug well_count,Protected spring_count,Unprotected spring_count,Lake river rain and others_count,Peddler_count,Bottled water_count,Others_count,Not Reported_count,year
0,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,347935.0,69597.0,1181.0,1488.0,100.0,48.0,233.0,9.0,13.0,9313.0,5068.0,169.0,,2015
1,CITY OF MANDALUYONG,NCR - National Capital Region,METROPOLITAN MANILA,89749.0,7551.0,107.0,254.0,55.0,1.0,36.0,2.0,,875.0,1637.0,89.0,,2015
2,CITY OF MARIKINA,NCR - National Capital Region,METROPOLITAN MANILA,89537.0,6991.0,290.0,389.0,14.0,16.0,91.0,,,224.0,657.0,29.0,,2015
3,CITY OF PASIG,NCR - National Capital Region,METROPOLITAN MANILA,162226.0,13063.0,197.0,435.0,94.0,34.0,43.0,,,1321.0,2954.0,245.0,,2015
4,QUEZON CITY,NCR - National Capital Region,METROPOLITAN MANILA,561729.0,102919.0,2375.0,3006.0,334.0,343.0,293.0,4.0,17.0,4548.0,6572.0,904.0,,2015
5,CITY OF SAN JUAN,NCR - National Capital Region,METROPOLITAN MANILA,24496.0,3008.0,,,,,,,,783.0,336.0,,,2015
6,CALOOCAN CITY,NCR - National Capital Region,METROPOLITAN MANILA,283142.0,66524.0,5172.0,5595.0,191.0,290.0,15.0,,,2740.0,3974.0,235.0,,2015
7,CITY OF MALABON,NCR - National Capital Region,METROPOLITAN MANILA,62428.0,19561.0,412.0,416.0,18.0,11.0,,,,1728.0,1575.0,42.0,,2015
8,CITY OF NAVOTAS,NCR - National Capital Region,METROPOLITAN MANILA,42529.0,13190.0,73.0,367.0,28.0,3.0,60.0,,1.0,3864.0,785.0,4.0,,2015
9,CITY OF VALENZUELA,NCR - National Capital Region,METROPOLITAN MANILA,111294.0,31136.0,2056.0,3234.0,33.0,309.0,,,,2352.0,2582.0,45.0,,2015


In [634]:
water_cooking_df = water_cooking_df.loc[~water_cooking_df["province"].isnull()]
water_cooking_df = water_cooking_df.loc[~water_cooking_df["city"].isnull()]

In [635]:
water_cooking_df["clean_province"] = (
    water_cooking_df["province"].apply(data_cleaner)
)

water_cooking_df["clean_city"] = water_cooking_df["city"].apply(
    data_cleaner
)

water_cooking_df["clean_idx"] = (
    water_cooking_df["clean_city"].astype(str)
    + ", "
    + water_cooking_df["clean_province"].astype(str)
)

In [636]:
water_cooking_df["clean_idx"] = water_cooking_df["clean_idx"].apply(bohol_to_cebu).apply(replace_idx).apply(quirino_to_isabela).apply(rizal_to_laguna).apply(bohol_to_cebu)

In [637]:
water_cooking_idx = water_cooking_df.clean_idx.drop_duplicates().unique().tolist() 

In [638]:
not_in_ref = compare_lists(water_cooking_idx, ref_df_idx)
not_in_ref

[]

In [639]:
not_in_new = compare_lists(ref_df_idx, water_cooking_idx)
not_in_new

['aroroy, masbate',
 'baao, camarinessur',
 'bacacay, albay',
 'bagamanoc, catanduanes',
 'balatan, camarinessur',
 'baleno, masbate',
 'balud, masbate',
 'baras, catanduanes',
 'barcelona, sorsogon',
 'basud, camarinesnorte',
 'bato, camarinessur',
 'bato, catanduanes',
 'batuan, masbate',
 'biliran, biliran',
 'bombon, camarinessur',
 'buenavista, agusandelnorte',
 'buhi, camarinessur',
 'bula, camarinessur',
 'bulacan, bulacan',
 'bulan, sorsogon',
 'bulusan, sorsogon',
 'butuan, agusandelnorte',
 'cabadbaran, agusandelnorte',
 'cabusao, camarinessur',
 'calabanga, camarinessur',
 'camalig, albay',
 'camaligan, camarinessur',
 'canaman, camarinessur',
 'capalonga, camarinesnorte',
 'caramoan, camarinessur',
 'caramoran, catanduanes',
 'carmen, agusandelnorte',
 'carmen, cebu',
 'casiguran, sorsogon',
 'castilla, sorsogon',
 'cataingan, masbate',
 'cawayan, masbate',
 'claveria, masbate',
 'daet, camarinesnorte',
 'danao, cebu',
 'daraga, albay',
 'delgallego, camarinessur',
 'dimasa

In [640]:
water_cooking_df = ref_df[["clean_idx", "name"]].merge(water_cooking_df, 
                                                       how='left',
                                                       on='clean_idx')

In [641]:
water_cooking_df.head(10)

Unnamed: 0,clean_idx,name,city,region_name,province,Own use faucet community water system_count,Shared faucet community water system_count,Own use tubed/piped deep well_count,Shared tubed/piped deep well_count,Tubed/piped shallow well_count,...,Protected spring_count,Unprotected spring_count,Lake river rain and others_count,Peddler_count,Bottled water_count,Others_count,Not Reported_count,year,clean_province,clean_city
0,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),2447.0,1564.0,1007.0,2060.0,413.0,...,150.0,13.0,101.0,4.0,11.0,,,2015.0,palawan,aborlan
1,"abradeilog, occidentalmindoro",Abra De Ilog,ABRA DE ILOG,REGION IV-B (MIMAROPA),OCCIDENTAL MINDORO,1282.0,461.0,917.0,2612.0,848.0,...,538.0,197.0,174.0,,,1.0,,2015.0,occidentalmindoro,abradeilog
2,"abucay, bataan",Abucay,ABUCAY,REGION III (Central Luzon),BATAAN,1146.0,1518.0,628.0,5777.0,33.0,...,129.0,,12.0,1.0,203.0,2.0,,2015.0,bataan,abucay
3,"abulug, cagayan",Abulug,ABULUG,REGION II (Cagayan Valley),CAGAYAN,632.0,273.0,2274.0,1987.0,236.0,...,,1.0,,,73.0,,,2015.0,cagayan,abulug
4,"abuyog, leyte",Abuyog,ABUYOG,REGION VIII (Eastern Visayas),LEYTE (excluding Tacloban City),806.0,3322.0,947.0,3260.0,153.0,...,1779.0,499.0,9.0,50.0,275.0,,,2015.0,leyte,abuyog
5,"adams, ilocosnorte",Adams,ADAMS,REGION I (Ilocos Region),ILOCOS NORTE,380.0,31.0,,,,...,,,,,,,,2015.0,ilocosnorte,adams
6,"agdangan, quezon",Agdangan,AGDANGAN,REGION IV-A (CALABARZON),QUEZON (excluding Lucena City),374.0,716.0,339.0,1340.0,80.0,...,1.0,6.0,34.0,3.0,54.0,,,2015.0,quezon,agdangan
7,"aglipay, quirino",Aglipay,AGLIPAY,REGION II (Cagayan Valley),QUIRINO,450.0,256.0,1473.0,2512.0,659.0,...,461.0,594.0,1.0,2.0,28.0,1.0,,2015.0,quirino,aglipay
8,"agno, pangasinan",Agno,AGNO,REGION I (Ilocos Region),PANGASINAN,411.0,105.0,1432.0,1231.0,60.0,...,257.0,54.0,14.0,,82.0,1.0,,2015.0,pangasinan,agno
9,"agoncillo, batangas",Agoncillo,AGONCILLO,REGION IV-A (CALABARZON),BATANGAS,6071.0,606.0,253.0,615.0,200.0,...,,,43.0,,20.0,3.0,,2015.0,batangas,agoncillo


In [642]:
water_cooking_df.drop(columns=drop_cols, inplace=True)

water_cooking_df.to_csv(Path(PROJECTROOT, "data", "cleaned-datasets", "housing-census-water-supply-cooking-2015.csv"))

## Water supply for drinking

In [643]:
water_drinking_df = pd.read_csv(WATER_SUPPLY_DRINKING)
water_drinking_df.drop(columns=["Unnamed: 0"], inplace=True)

In [644]:
water_drinking_df.head(10)

Unnamed: 0,city,region_name,province,Own use faucet community water system_count,Shared faucet community water system_count,Own use tubed/piped deep well_count,Shared tubed/piped deep well_count,Tubed/piped shallow well_count,Dug well_count,Protected spring_count,Unprotected spring_count,"Lake, river, rain and others_count",Peddler_count,Bottled water_count,Others_count,Not Reported_count,year
0,CITY OF MANILA,NCR - National Capital Region,METROPOLITAN MANILA,188513.0,52760.0,883.0,834.0,91.0,50.0,622.0,6.0,29.0,11449.0,179153.0,764.0,,2015
1,CITY OF MANDALUYONG,NCR - National Capital Region,METROPOLITAN MANILA,56381.0,6412.0,48.0,216.0,48.0,2.0,233.0,1.0,4.0,2720.0,33300.0,991.0,,2015
2,CITY OF MARIKINA,NCR - National Capital Region,METROPOLITAN MANILA,68434.0,6373.0,229.0,397.0,24.0,15.0,318.0,,2.0,1380.0,20907.0,159.0,,2015
3,CITY OF PASIG,NCR - National Capital Region,METROPOLITAN MANILA,99263.0,11296.0,234.0,435.0,55.0,31.0,846.0,5.0,10.0,2403.0,64530.0,1504.0,,2015
4,QUEZON CITY,NCR - National Capital Region,METROPOLITAN MANILA,401143.0,90313.0,1591.0,2165.0,325.0,337.0,1576.0,23.0,42.0,9656.0,170761.0,5112.0,,2015
5,CITY OF SAN JUAN,NCR - National Capital Region,METROPOLITAN MANILA,15950.0,2444.0,,,,,,,,773.0,9456.0,,,2015
6,CALOOCAN CITY,NCR - National Capital Region,METROPOLITAN MANILA,212602.0,57637.0,3628.0,4241.0,247.0,483.0,14.0,,,3794.0,84570.0,662.0,,2015
7,CITY OF MALABON,NCR - National Capital Region,METROPOLITAN MANILA,30322.0,11656.0,363.0,276.0,9.0,21.0,1.0,,,3377.0,40000.0,166.0,,2015
8,CITY OF NAVOTAS,NCR - National Capital Region,METROPOLITAN MANILA,19461.0,7915.0,50.0,321.0,18.0,4.0,268.0,,2.0,2788.0,29983.0,94.0,,2015
9,CITY OF VALENZUELA,NCR - National Capital Region,METROPOLITAN MANILA,72828.0,24330.0,868.0,2173.0,36.0,40.0,,,,3305.0,49366.0,95.0,,2015


In [645]:
water_drinking_df = water_drinking_df.loc[~water_drinking_df["province"].isnull()]
water_drinking_df = water_drinking_df.loc[~water_drinking_df["city"].isnull()]

In [646]:
water_drinking_df["clean_province"] = (
    water_drinking_df["province"].apply(data_cleaner)
)

water_drinking_df["clean_city"] = water_drinking_df["city"].apply(
    data_cleaner
)

water_drinking_df["clean_idx"] = (
    water_drinking_df["clean_city"].astype(str)
    + ", "
    + water_drinking_df["clean_province"].astype(str)
)

In [647]:
water_drinking_df["clean_idx"] = water_drinking_df["clean_idx"].apply(bohol_to_cebu).apply(replace_idx).apply(quirino_to_isabela).apply(rizal_to_laguna).apply(bohol_to_cebu)

In [648]:
water_drinking_idx = water_drinking_df.clean_idx.drop_duplicates().unique().tolist() 

In [649]:
not_in_ref = compare_lists(water_cooking_idx, ref_df_idx)
not_in_ref

[]

In [650]:
not_in_new = compare_lists(ref_df_idx, water_cooking_idx)
not_in_new

['aroroy, masbate',
 'baao, camarinessur',
 'bacacay, albay',
 'bagamanoc, catanduanes',
 'balatan, camarinessur',
 'baleno, masbate',
 'balud, masbate',
 'baras, catanduanes',
 'barcelona, sorsogon',
 'basud, camarinesnorte',
 'bato, camarinessur',
 'bato, catanduanes',
 'batuan, masbate',
 'biliran, biliran',
 'bombon, camarinessur',
 'buenavista, agusandelnorte',
 'buhi, camarinessur',
 'bula, camarinessur',
 'bulacan, bulacan',
 'bulan, sorsogon',
 'bulusan, sorsogon',
 'butuan, agusandelnorte',
 'cabadbaran, agusandelnorte',
 'cabusao, camarinessur',
 'calabanga, camarinessur',
 'camalig, albay',
 'camaligan, camarinessur',
 'canaman, camarinessur',
 'capalonga, camarinesnorte',
 'caramoan, camarinessur',
 'caramoran, catanduanes',
 'carmen, agusandelnorte',
 'carmen, cebu',
 'casiguran, sorsogon',
 'castilla, sorsogon',
 'cataingan, masbate',
 'cawayan, masbate',
 'claveria, masbate',
 'daet, camarinesnorte',
 'danao, cebu',
 'daraga, albay',
 'delgallego, camarinessur',
 'dimasa

In [651]:
water_drinking_df = ref_df[["clean_idx", "name"]].merge(water_drinking_df, 
                                                       how='left',
                                                       on='clean_idx')

In [652]:
water_drinking_df.head(10)

Unnamed: 0,clean_idx,name,city,region_name,province,Own use faucet community water system_count,Shared faucet community water system_count,Own use tubed/piped deep well_count,Shared tubed/piped deep well_count,Tubed/piped shallow well_count,...,Protected spring_count,Unprotected spring_count,"Lake, river, rain and others_count",Peddler_count,Bottled water_count,Others_count,Not Reported_count,year,clean_province,clean_city
0,"aborlan, palawan",Aborlan,ABORLAN,REGION IV-B (MIMAROPA),PALAWAN (excluding Puerto Princesa City),2130.0,1621.0,932.0,2286.0,403.0,...,164.0,15.0,88.0,14.0,183.0,,,2015.0,palawan,aborlan
1,"abradeilog, occidentalmindoro",Abra De Ilog,ABRA DE ILOG,REGION IV-B (MIMAROPA),OCCIDENTAL MINDORO,1076.0,459.0,846.0,2749.0,773.0,...,613.0,188.0,250.0,1.0,67.0,6.0,,2015.0,occidentalmindoro,abradeilog
2,"abucay, bataan",Abucay,ABUCAY,REGION III (Central Luzon),BATAAN,966.0,1488.0,448.0,5914.0,30.0,...,119.0,1.0,4.0,,477.0,2.0,,2015.0,bataan,abucay
3,"abulug, cagayan",Abulug,ABULUG,REGION II (Cagayan Valley),CAGAYAN,345.0,130.0,1609.0,1608.0,156.0,...,13.0,2.0,,,2217.0,,,2015.0,cagayan,abulug
4,"abuyog, leyte",Abuyog,ABUYOG,REGION VIII (Eastern Visayas),LEYTE (excluding Tacloban City),665.0,3264.0,518.0,2725.0,96.0,...,1955.0,521.0,10.0,134.0,1809.0,8.0,,2015.0,leyte,abuyog
5,"adams, ilocosnorte",Adams,ADAMS,REGION I (Ilocos Region),ILOCOS NORTE,380.0,31.0,,,,...,,,,,,,,2015.0,ilocosnorte,adams
6,"agdangan, quezon",Agdangan,AGDANGAN,REGION IV-A (CALABARZON),QUEZON (excluding Lucena City),144.0,525.0,188.0,1009.0,71.0,...,1.0,6.0,36.0,39.0,1022.0,1.0,,2015.0,quezon,agdangan
7,"aglipay, quirino",Aglipay,AGLIPAY,REGION II (Cagayan Valley),QUIRINO,197.0,189.0,1221.0,2346.0,583.0,...,474.0,592.0,1.0,16.0,834.0,,,2015.0,quirino,aglipay
8,"agno, pangasinan",Agno,AGNO,REGION I (Ilocos Region),PANGASINAN,160.0,102.0,1246.0,1171.0,56.0,...,263.0,52.0,13.0,2.0,856.0,2.0,,2015.0,pangasinan,agno
9,"agoncillo, batangas",Agoncillo,AGONCILLO,REGION IV-A (CALABARZON),BATANGAS,5641.0,621.0,242.0,650.0,197.0,...,2.0,,4.0,126.0,325.0,4.0,,2015.0,batangas,agoncillo


In [653]:
water_drinking_df.drop(columns=drop_cols, inplace=True)

water_drinking_df.to_csv(Path(PROJECTROOT, "data", "cleaned-datasets", "housing-census-water-supply-drinking-2015.csv"))