Before running the script make sure you have added a shortcut to the [NOAH Datasets and other necessary files](https://drive.google.com/drive/folders/1D3_MvpM0yinyvBR0VR9XVI2Es3GejevR?usp=sharing) into your Google Drive...

*Making a copy of NOAH dataset on Google Drive.* First, `cd` into the NOAH dataset shortcut by running the cell below. The path to it might differ for you but in my case I added it into my GDrive's root folder. 

The format for the command is: 


```
%cd /path/to/NOAH/shortcut
```

Upon running, it should show the exact path to the symbolic link. Copy it! Mine looks like this: 

`/content/drive/.shortcut-targets-by-id/1ALE4-E9c-4AGjm1fqiPprWHrLUskeY9o/NOAH Downloads`

In [46]:
#%cd /content/drive/MyDrive/NOAH Downloads

Run the cell below to copy from the shortcut into your desired destination folder. This has to be done because files inside shortcuts are read-only. 

`cp -r '/sym/link/path/.' 'path/to/desired/directory'`

```
# This is formatted as code
```



-------

In [1]:
#!pip install -r /content/drive/MyDrive/noah/requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
#!npm install -g mapshaper

[K[?25h/tools/node/bin/mapshaper -> /tools/node/lib/node_modules/mapshaper/bin/mapshaper
/tools/node/bin/mapshaper-gui -> /tools/node/lib/node_modules/mapshaper/bin/mapshaper-gui
/tools/node/bin/mapshaper-xl -> /tools/node/lib/node_modules/mapshaper/bin/mapshaper-xl
[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m @tmcw/togeojson@5.6.0 requires a peer of @types/geojson@* but none is installed. You must install peer dependencies yourself.
[0m
+ mapshaper@0.6.18
updated 1 package in 4.497s

[33m[39m
[33m   ╭───────────────────────────────────────────────────────────────╮[39m
   [33m│[39m                                                               [33m│[39m
   [33m│[39m      New [31mmajor[39m version of npm available! [31m6.14.8[39m → [32m9.2.0[39m       [33m│[39m
   [33m│[39m   [33mChangelog:[39m [36mhttps://github.com/npm/cli/releases/tag/v9.2.0[39m   [33m│[39m
   [33m│[39m               Run [32mnpm install -g npm[39m to update!               [33m│[39m


In [3]:
# Mount data from drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
import pandas as pd
from pathlib import Path
import numpy as np
import shutil
import geopandas as gp
import os
from zipfile import ZipFile
from fiona.io import ZipMemoryFile
import subprocess

### Set config variables

In [2]:
PATH_TO_NB = "/content/drive/MyDrive/noah/3-merging-noah-datasets.ipynb" # Change this

WORKINGDIR = Path(PATH_TO_NB)
PROJECTROOT = WORKINGDIR.parents[0]

CITY_SHPFILE = Path(PROJECTROOT, "shp-assets", "ph-cities-shp-files", "ph_cities_v2.shp")
CALAMITY_SHPFILE_DIR = Path(PROJECTROOT, "shp-assets", "noah-shp-files")

TMP_FOLDER = Path(PROJECTROOT, "tmp-folder")
VAR_HOLDER = Path(PROJECTROOT, "var-holder")
DATASETS_DIR = Path(PROJECTROOT, "output-datasets")

### Helper functions

In [3]:
def unzip_path(zip):
  ZipFile(zip).extractall(TMP_FOLDER)

In [4]:
def delete_zip_folder():
  for filename in os.listdir(TMP_FOLDER):
    file_path = os.path.join(TMP_FOLDER, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print('Failed to delete %s. Reason: %s' % (file_path, e))

In [5]:
def retrieve_shp_file(path):
  files = os.listdir(path)
  shp = [file for file in files if ".shp" in file][0]
  return shp

In [6]:
def shrink_shp_file():
  shp_file = retrieve_shp_file(TMP_FOLDER)

  source_file = Path(TMP_FOLDER, shp_file)
  destination_file = Path(TMP_FOLDER, f"{shp_file.replace('.shp', '')}-shrunk.shp")
  
  shrink = subprocess.run(["mapshaper", 
                           str(source_file), 
                           "-simplify", 
                           "5%", "-o", 
                           str(destination_file)])
  return destination_file

In [7]:
def dissolve_and_reset(gdf):
  gdf = gdf.copy()

  dissolve_col = [col for col in projected_gdf.columns if col not in ['index', 'name', 'geometry']][0]

  gdf = gdf.dissolve(["name", dissolve_col])
  gdf = gdf.reset_index()

  return gdf

In [8]:
def read_progress(path):
  try: 
    with open(path) as f:
      content = f.readline()
      return int(content)
  except FileNotFoundError:
    write_progress(path, '0')
    return 0

In [9]:
def write_progress(path, progress):
  with open(path, 'w') as f:
    f.write(progress)

In [10]:
def map_to_city(path_to_dir, dir):
  delete_zip_folder()

  print(f"Merging {dir}...")
  path = Path(path_to_dir, dir)

  zip_files = [file for file in os.listdir(path) if ".zip" in file]
  
  progress_var_path = Path(VAR_HOLDER, f"{dir}-progress.txt")

  progress = read_progress(progress_var_path)

  if len(zip_files)!=1:
    if progress == len(zip_files)-1:
      print("This directory has been merged!")
      return None

  for i in np.arange(progress, len(zip_files)): 

    zip_path = Path(path, zip_files[i])

    print(f"Progress: {i+1}/{len(zip_files)}")

    print(f"Unzipping {zip_files[i]}...")

    unzip_path(zip_path)

    print(f"Compressing {zip_files[i]}...")

    destination_path = shrink_shp_file()

    print(f"Reading compressed version of {zip_files[i]}...")

    gdf = gp.read_file(destination_path)

    if i == 0:
      print(f"Mapping to cities: {zip_files[i]}...")
      projected_df = gp.overlay(df, gdf, how="intersection").reset_index()
    else:
      print(f"Mapping to cities: {zip_files[i]}...")
      tmp_projected_df = gp.overlay(df, gdf, how="intersection").reset_index()
      
      print(f"Reading existing projected geodataframe...")
      projected_df = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))
      projected_df = pd.concat([projected_df, tmp_projected_df])

    print(f"Dropping duplicates from output dataframe...")
    projected_df.drop_duplicates(inplace=True)

    print(f"Persisting output dataframe...")
    projected_df.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

    print(f"Deleting {zip_files[i]} artifacts...")
    delete_zip_folder()
    print("========")

    write_progress(Path(VAR_HOLDER, f"{dir}-progress.txt"), str(i))

### Import ph-cities-shpfile

In [11]:
df = gp.read_file(CITY_SHPFILE)
df.head()

Unnamed: 0,psgc,name,city_munic,province,clean_idx,longitude,latitude,coords,geometry
0,1705301000,Aborlan,Aborlan,Palawan,"aborlan, palawan",118.548417,9.437101,"9.4371009, 118.5484168","MULTIPOLYGON (((118.57998 9.37215, 118.57982 9..."
1,1705101000,Abra De Ilog,Abra De Ilog,Occidental Mindoro,"abradeilog, occidentalmindoro",120.726826,13.443721,"13.4437209, 120.7268262","POLYGON ((120.60896 13.35233, 120.60797 13.373..."
2,300801000,Abucay,Abucay,Bataan,"abucay, bataan",120.53487,14.721315,"14.7213146, 120.5348704","POLYGON ((120.45676 14.69671, 120.45620 14.696..."
3,201501000,Abulug,Abulug,Cagayan,"abulug, cagayan",121.457273,18.443485,"18.4434854, 121.4572732","MULTIPOLYGON (((121.40276 18.40896, 121.40276 ..."
4,803701000,Abuyog,Abuyog,Leyte,"abuyog, leyte",125.011485,10.747102,"10.747102, 125.0114853","POLYGON ((125.04650 10.56751, 125.04588 10.576..."


In [12]:
keep_cols = ["name", "geometry"]
df = df[keep_cols]

### Storm Surge

In [13]:
# PATH_TO_STORMSURGE_DIR = Path(CALAMITY_SHPFILE_DIR, "Storm Surge")

------

In [14]:
# dir = "StormSurgeAdvisory1"

In [15]:
# map_to_city(PATH_TO_STORMSURGE_DIR, dir)

In [16]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [17]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [18]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------

In [19]:
# dir = "StormSurgeAdvisory2"

In [20]:
# map_to_city(PATH_TO_STORMSURGE_DIR, dir)

In [21]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [22]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [23]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------

In [24]:
# dir = "StormSurgeAdvisory3"

In [25]:
# map_to_city(PATH_TO_STORMSURGE_DIR, dir)

In [26]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [27]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [28]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------

In [29]:
# dir = "StormSurgeAdvisory4"

In [30]:
# map_to_city(PATH_TO_STORMSURGE_DIR, dir)

In [31]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [32]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [33]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

### Flood

In [34]:
PATH_TO_FLOOD_DIR = Path(CALAMITY_SHPFILE_DIR, "Flood")

In [35]:
dir = "flood-100yr"

In [None]:
map_to_city(PATH_TO_FLOOD_DIR, dir)

In [None]:
projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

--------

In [None]:
dir = "flood-25yr"

In [None]:
map_to_city(PATH_TO_FLOOD_DIR, dir)

In [None]:
projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------------

In [None]:
dir = "flood-5yr"

In [None]:
map_to_city(PATH_TO_FLOOD_DIR, dir)

In [None]:
projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------------

### Landslide - Landslide Hazards

In [None]:
PATH_TO_DEBRIS_FLOW = Path(CALAMITY_SHPFILE_DIR, "Landslide")

In [None]:
dir = "LandslideHazards"

In [None]:
map_to_city(PATH_TO_DEBRIS_FLOW, dir)

In [None]:
projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [None]:
projected_gdf = dissolve_and_reset(projected_gdf)

In [None]:
projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------

### Alluvial Fan, Debris Flow

In [32]:
# PATH_TO_AFDF = Path(CALAMITY_SHPFILE_DIR, "AlluvialFanDebrisFlow")

In [33]:
# dir = "AlluvialFan"

In [34]:
# map_to_city(PATH_TO_AFDF, dir)

Merging AlluvialFan...
Progress: 1/1
Unzipping Philippines_AlluvialFan.zip...
Reading Philippines_AlluvialFan.zip...
Mapping to cities: Philippines_AlluvialFan.zip...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Philippines_AlluvialFan.zip artifacts...


In [35]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [36]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [37]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

-------

In [38]:
# PATH_TO_DEBRIS_FLOW = Path(CALAMITY_SHPFILE_DIR, "AlluvialFanDebrisFlow")

In [39]:
# dir = "DebrisFlow"

In [40]:
# map_to_city(PATH_TO_AFDF, dir)

Merging DebrisFlow...
Progress: 1/1
Unzipping Philippines_DebrisFlow.zip...
Reading Philippines_DebrisFlow.zip...
Mapping to cities: Philippines_DebrisFlow.zip...
Dropping duplicates from output dataframe...
Persisting output dataframe...
Deleting Philippines_DebrisFlow.zip artifacts...


In [41]:
# projected_gdf = gp.read_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

In [42]:
# projected_gdf = dissolve_and_reset(projected_gdf)

In [43]:
# projected_gdf.to_parquet(Path(DATASETS_DIR, f"{dir}-projected.parquet"))

------