In [2]:
######################################
#### UNCOMMENT THE COMMANDS BELOW ####
######################################

!pip install -r /content/drive/MyDrive/Projects/philippines-chapter-urban-vunerability/noah/requirements.txt
!npm install -g mapshaper

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.12.2-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fiona
  Downloading Fiona-1.8.22-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.6/16.6 MB[0m [31m42.2 MB/s[0m eta [36m0:00:00[0m
Collecting pyproj>=2.6.1.post1
  Downloading pyproj-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m88.0 MB/s[0m eta [36m0:00:00[0m
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


1. Before running the script make sure you have added a shortcut to the NOAH Datasets and other necessary files into your Google Drive...
2. Making a copy of NOAH dataset on Google Drive. First, cd into the NOAH dataset shortcut by running the command below. The path to it might differ for you but in my case I added it into my GDrive’s root folder. The format for the command is: `%cd /path/to/NOAH/shortcut`
3. Upon running, it should show the exact path to the symbolic link. Copy it! Mine looks like `this:/content/drive/.shortcut-targets-by-id/1ALE4-E9c-4AGjm1fqiPprWHrLUskeY9o/noah`
4. Run the command below to copy from the shortcut into your desired destination folder. This has to be done because files inside shortcuts are read-only: `cp -r '/sym/link/path/.' 'path/to/desired/directory'`
5. Once copied, make sure to work off of the notebook in the new directory you just copied.
6. Make sure to change the value for the PATH_TO_NB  variable under the Set config variables section. This variable contains the path to the notebook within the newly copied directory.
7. Run the cells until the Import ph-cities-shpfile part.
8. Choose between Flood or Landslide-Landslide Hazards.
    * Under Landslide-Landslide Hazards, we only have one sub-directory. One person could work on this. --> c/o @Nathalie
    * Under Flood, we have 3 sub-directories. It would be nice if we could get at least one person to run each sub-directory.
        * flood-100yr
        *  flood-25yr
        * flood-5yr
9. In case your notebook errors out, don’t worry! I have set it up to track the progress of each run. Just re-run the necessary cells and the mapping will start at the point where the notebook errored out.

Note: I have ran the following already so no need to run:
* Storm Surge Advisory 1
* Storm Surge Advisory 2
* Storm Surge Advisory 3
* Storm Surge Advisory 4
* Alluvial Fan
* Debris Flow

In [3]:
cd /content/drive/MyDrive/Projects/philippines-chapter-urban-vunerability/noah

/content/drive/.shortcut-targets-by-id/1D3_MvpM0yinyvBR0VR9XVI2Es3GejevR/noah


In [None]:
cp -r '/content/drive/.shortcut-targets-by-id/1D3_MvpM0yinyvBR0VR9XVI2Es3GejevR/noah' '/content/drive/MyDrive/Projects/philippines-chapter-urban-vunerability/noah_working'

^C


In [None]:
# Mount data from drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
from pathlib import Path
import numpy as np
import shutil
import geopandas as gp
import os
from zipfile import ZipFile
from fiona.io import ZipMemoryFile
import subprocess

### Set config variables

In [4]:
#####################################################
#      Set the value of PATH_TO_NB to the path      #
# to the notebook within the newly-copied directory #
#####################################################

PATH_TO_NB = "/content/drive/MyDrive/Projects/philippines-chapter-urban-vunerability/noah_working/noah/3-merging-noah-datasets.ipynb" 

WORKINGDIR = Path(PATH_TO_NB)
PROJECTROOT = WORKINGDIR.parents[0]

CITY_SHPFILE = Path(PROJECTROOT, "shp-assets", "ph-cities-shp-files", "ph_cities_v2.shp")
CALAMITY_SHPFILE_DIR = Path(PROJECTROOT, "shp-assets", "noah-shp-files")

TMP_FOLDER = Path(PROJECTROOT, "tmp-folder")
VAR_HOLDER = Path(PROJECTROOT, "var-holder")
DATASETS_DIR = Path(PROJECTROOT, "output-datasets")

### Helper functions

In [5]:
def unzip_path(zip):
  ZipFile(zip).extractall(TMP_FOLDER)

In [6]:
def delete_zip_folder():
  for filename in os.listdir(TMP_FOLDER):
    file_path = os.path.join(TMP_FOLDER, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print('Failed to delete %s. Reason: %s' % (file_path, e))

In [7]:
def retrieve_shp_file(path):
  files = os.listdir(path)
  shp = [file for file in files if ".shp" in file][0]
  return shp

In [8]:
def shrink_shp_file():
  shp_file = retrieve_shp_file(TMP_FOLDER)

  source_file = Path(TMP_FOLDER, shp_file)
  destination_file = Path(TMP_FOLDER, f"{shp_file.replace('.shp', '')}-shrunk.shp")
  
  shrink = subprocess.run(["mapshaper", 
                           str(source_file), 
                           "-simplify", 
                           "5%", "-o", 
                           str(destination_file)])
  return destination_file

In [9]:
def dissolve_and_reset(gdf):
  gdf = gdf.copy()

  dissolve_col = [col for col in projected_gdf.columns if col not in ['index', 'name', 'geometry']][0]

  gdf = gdf.dissolve(["name", dissolve_col])
  gdf = gdf.reset_index()

  return gdf

In [10]:
def read_progress(path):
  try: 
    with open(path) as f:
      content = f.readline()
      return int(content)
  except FileNotFoundError:
    write_progress(path, '0')
    return 0

In [11]:
def write_progress(path, progress):
  with open(path, 'w') as f:
    f.write(progress)

In [12]:
def map_to_city(path_to_dir, dir):
  delete_zip_folder()

  print(f"Merging {dir}...")
  path = Path(path_to_dir, dir)

  zip_files = [file for file in os.listdir(path) if ".zip" in file]
  
  progress_var_path = Path(VAR_HOLDER, f"{dir}-progress.txt")

  progress = read_progress(progress_var_path)

  if len(zip_files)!=1:
    if progress == len(zip_files)-1:
      print("This directory has been merged!")
      return None

  for i in np.arange(progress, len(zip_files)): 

    zip_path = Path(path, zip_files[i])

    print(f"Progress: {i+1}/{len(zip_files)}")

    print(f"Unzipping {zip_files[i]}...")

    unzip_path(zip_path)

    print(f"Compressing {zip_files[i]}...")

    destination_path = shrink_shp_file()

    print(f"Reading compressed version of {zip_files[i]}...")

    gdf = gp.read_file(destination_path)

    if i == 0:
      print(f"Mapping to cities: {zip_files[i]}...")
      projected_df = gp.overlay(df, gdf, how="intersection").reset_index()
    else:
      print(f"Mapping to cities: {zip_files[i]}...")
      tmp_projected_df = gp.overlay(df, gdf, how="intersection").reset_index()
      
      print(f"Reading existing projected geodataframe...")
      projected_df = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))
      projected_df = pd.concat([projected_df, tmp_projected_df])

    print(f"Dropping duplicates from output dataframe...")
    projected_df.drop_duplicates(inplace=True)

    print(f"Persisting output dataframe...")
    projected_df.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

    print(f"Deleting {zip_files[i]} artifacts...")
    delete_zip_folder()
    print("========")

    write_progress(Path(VAR_HOLDER, f"{dir}-progress.txt"), str(i))

### Import ph-cities-shpfile

In [13]:
df = gp.read_file(CITY_SHPFILE)
df.head()

Unnamed: 0,psgc,name,city_munic,province,clean_idx,longitude,latitude,coords,geometry
0,1705301000,Aborlan,Aborlan,Palawan,"aborlan, palawan",118.548417,9.437101,"9.4371009, 118.5484168","MULTIPOLYGON (((118.57998 9.37215, 118.57982 9..."
1,1705101000,Abra De Ilog,Abra De Ilog,Occidental Mindoro,"abradeilog, occidentalmindoro",120.726826,13.443721,"13.4437209, 120.7268262","POLYGON ((120.60896 13.35233, 120.60797 13.373..."
2,300801000,Abucay,Abucay,Bataan,"abucay, bataan",120.53487,14.721315,"14.7213146, 120.5348704","POLYGON ((120.45676 14.69671, 120.45620 14.696..."
3,201501000,Abulug,Abulug,Cagayan,"abulug, cagayan",121.457273,18.443485,"18.4434854, 121.4572732","MULTIPOLYGON (((121.40276 18.40896, 121.40276 ..."
4,803701000,Abuyog,Abuyog,Leyte,"abuyog, leyte",125.011485,10.747102,"10.747102, 125.0114853","POLYGON ((125.04650 10.56751, 125.04588 10.576..."


In [14]:
keep_cols = ["name", "geometry"]
df = df[keep_cols]

### Storm Surge

In [None]:
# PATH_TO_STORMSURGE_DIR = Path(CALAMITY_SHPFILE_DIR, "Storm Surge")

------

In [None]:
# dir = "StormSurgeAdvisory1"

In [None]:
# map_to_city(PATH_TO_STORMSURGE_DIR, dir)

In [None]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------

In [None]:
# dir = "StormSurgeAdvisory2"

In [None]:
# map_to_city(PATH_TO_STORMSURGE_DIR, dir)

In [None]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------

In [None]:
# dir = "StormSurgeAdvisory3"

In [None]:
# map_to_city(PATH_TO_STORMSURGE_DIR, dir)

In [None]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------

In [None]:
# dir = "StormSurgeAdvisory4"

In [None]:
# map_to_city(PATH_TO_STORMSURGE_DIR, dir)

In [None]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

### Flood

In [15]:
PATH_TO_FLOOD_DIR = Path(CALAMITY_SHPFILE_DIR, "Flood")

In [None]:
dir = "flood-100yr"

In [None]:
map_to_city(PATH_TO_FLOOD_DIR, dir)

Merging flood-100yr...
Progress: 1/79
Unzipping IlocosSur.zip...
Compressing IlocosSur.zip...
Reading compressed version of IlocosSur.zip...
Mapping to cities: IlocosSur.zip...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting IlocosSur.zip artifacts...
Progress: 2/79
Unzipping IlocosNorte.zip...
Compressing IlocosNorte.zip...
Reading compressed version of IlocosNorte.zip...
Mapping to cities: IlocosNorte.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting IlocosNorte.zip artifacts...
Progress: 3/79
Unzipping Isabela.zip...
Compressing Isabela.zip...
Reading compressed version of Isabela.zip...
Mapping to cities: Isabela.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Isabela.zip artifacts...
Progress: 4/79
Unzipping NuevaVizcaya.zip...
Compressing NuevaVizcaya.zip...
Reading compress

  tmp_projected_df = gp.overlay(df, gdf, how="intersection").reset_index()


Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting CamarinesSur.zip artifacts...
Progress: 25/79
Unzipping Catanduanes.zip...
Compressing Catanduanes.zip...
Reading compressed version of Catanduanes.zip...
Mapping to cities: Catanduanes.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Catanduanes.zip artifacts...
Progress: 26/79
Unzipping Sorsogon.zip...
Compressing Sorsogon.zip...
Reading compressed version of Sorsogon.zip...
Mapping to cities: Sorsogon.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Sorsogon.zip artifacts...
Progress: 27/79
Unzipping Aklan.zip...
Compressing Aklan.zip...
Reading compressed version of Aklan.zip...
Mapping to cities: Aklan.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dat

  tmp_projected_df = gp.overlay(df, gdf, how="intersection").reset_index()


Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Iloilo.zip artifacts...
Progress: 32/79
Unzipping Bohol.zip...
Compressing Bohol.zip...
Reading compressed version of Bohol.zip...
Mapping to cities: Bohol.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Bohol.zip artifacts...
Progress: 33/79
Unzipping Cebu.zip...
Compressing Cebu.zip...
Reading compressed version of Cebu.zip...
Mapping to cities: Cebu.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Cebu.zip artifacts...
Progress: 34/79
Unzipping EasternSamar.zip...
Compressing EasternSamar.zip...
Reading compressed version of EasternSamar.zip...
Mapping to cities: EasternSamar.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output 

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: EPSG:32651

  tmp_projected_df = gp.overlay(df, gdf, how="intersection").reset_index()


Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting MetroManila.zip artifacts...
Progress: 57/79
Unzipping SultanKudarat.zip...
Compressing SultanKudarat.zip...
Reading compressed version of SultanKudarat.zip...
Mapping to cities: SultanKudarat.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting SultanKudarat.zip artifacts...
Progress: 58/79
Unzipping MountainProvince.zip...
Compressing MountainProvince.zip...
Reading compressed version of MountainProvince.zip...
Mapping to cities: MountainProvince.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting MountainProvince.zip artifacts...
Progress: 59/79
Unzipping Ifugao.zip...
Compressing Ifugao.zip...
Reading compressed version of Ifugao.zip...
Mapping to cities: Ifugao.zip...
Reading existing projected geodataframe...
Dropping duplicates from 

IndexError: ignored

In [None]:
projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

--------

In [36]:
dir = "flood-25yr"

In [37]:
map_to_city(PATH_TO_FLOOD_DIR, dir)

Merging flood-25yr...
Progress: 1/71
Unzipping LaUnion.zip...
Compressing LaUnion.zip...
Reading compressed version of LaUnion.zip...
Mapping to cities: LaUnion.zip...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting LaUnion.zip artifacts...
Progress: 2/71
Unzipping IlocosNorte.zip...
Compressing IlocosNorte.zip...
Reading compressed version of IlocosNorte.zip...
Mapping to cities: IlocosNorte.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting IlocosNorte.zip artifacts...
Progress: 3/71
Unzipping IlocosSur.zip...
Compressing IlocosSur.zip...
Reading compressed version of IlocosSur.zip...
Mapping to cities: IlocosSur.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting IlocosSur.zip artifacts...
Progress: 4/71
Unzipping Pangasinan.zip...
Compressing Pangasinan.zip...
Reading compressed ve

In [38]:
projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [39]:
projected_gdf = dissolve_and_reset(projected_gdf)

In [40]:
projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------------

In [16]:
dir = "flood-5yr"

In [17]:
map_to_city(PATH_TO_FLOOD_DIR, dir)

Merging flood-5yr...
Progress: 21/65
Unzipping Rizal.zip...
Compressing Rizal.zip...
Reading compressed version of Rizal.zip...
Mapping to cities: Rizal.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Rizal.zip artifacts...
Progress: 22/65
Unzipping Masbate.zip...
Compressing Masbate.zip...
Reading compressed version of Masbate.zip...
Mapping to cities: Masbate.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Masbate.zip artifacts...
Progress: 23/65
Unzipping Sorsogon.zip...
Compressing Sorsogon.zip...
Reading compressed version of Sorsogon.zip...
Mapping to cities: Sorsogon.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Sorsogon.zip artifacts...
Progress: 24/65
Unzipping CamarinesNorte.zip...
Compressing CamarinesNorte.zip...
Re

In [18]:
projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [19]:
projected_gdf = dissolve_and_reset(projected_gdf)

In [20]:
projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------------

### Landslide - Landslide Hazards

In [16]:
PATH_TO_DEBRIS_FLOW = Path(CALAMITY_SHPFILE_DIR, "Landslide")

In [17]:
dir = "LandslideHazards"

In [18]:
map_to_city(PATH_TO_DEBRIS_FLOW, dir)

Merging LandslideHazards...
Progress: 5/82
Unzipping Batanes.zip...
Compressing Batanes.zip...
Reading compressed version of Batanes.zip...
Mapping to cities: Batanes.zip...
Reading existing projected geodataframe...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Batanes.zip artifacts...
Progress: 6/82
Unzipping Isabela.zip...
Compressing Isabela.zip...


ERROR:fiona._env:/content/drive/MyDrive/Projects/philippines-chapter-urban-vunerability/noah_working/noah/tmp-folder/Isabela_LandslideHazards_IMWTG_Undissolved-shrunk.shp: No such file or directory


Reading compressed version of Isabela.zip...


DriverError: ignored

In [None]:
projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------

### Alluvial Fan, Debris Flow

In [None]:
# PATH_TO_AFDF = Path(CALAMITY_SHPFILE_DIR, "AlluvialFanDebrisFlow")

In [None]:
# dir = "AlluvialFan"

In [None]:
# map_to_city(PATH_TO_AFDF, dir)

Merging AlluvialFan...
Progress: 1/1
Unzipping Philippines_AlluvialFan.zip...
Reading Philippines_AlluvialFan.zip...
Mapping to cities: Philippines_AlluvialFan.zip...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Philippines_AlluvialFan.zip artifacts...


In [None]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

-------

In [None]:
# PATH_TO_DEBRIS_FLOW = Path(CALAMITY_SHPFILE_DIR, "AlluvialFanDebrisFlow")

In [None]:
# dir = "DebrisFlow"

In [None]:
# map_to_city(PATH_TO_AFDF, dir)

Merging DebrisFlow...
Progress: 1/1
Unzipping Philippines_DebrisFlow.zip...
Reading Philippines_DebrisFlow.zip...
Mapping to cities: Philippines_DebrisFlow.zip...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Philippines_DebrisFlow.zip artifacts...


In [None]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------