# River Raster

The goal of this note book is to take river network from [Yan 2019](https://doi.org/10.1038/s41597-019-0243-y) vector files, join it with coastline files from the ocean_mask.tif and make a 1-km binary raster. Also using [Hydrosheds] 30s product. (https://www.hydrosheds.org/)

Note that the isloated 'coastline' from the GHS-SMOD also includes inland lakes. 

**Steps**

*To do once complete: Add file names and data descriptions to project*

### Dependencies

In [23]:
import rasterio 
import numpy as np
import pandas as pd
import geopandas as gpd
from rasterio import features
import os
from glob import glob

### Files Paths

In [39]:
# DATA_IN = '/Users/cascade/Desktop/Yan2019_Rivers/river_and_wrz/Global_River_L1_to_L4/' #YAN2019
DATA_IN = '/Users/cascade/Github/wastewater_ohi/data/raw/Hydrosheds/RIV_30s/'
DATA_OUT = '/Users/cascade/Desktop/Hydro_river_files/'

### Loop through rivers and merge them

In [40]:
dir_list = glob(DATA_IN)

In [41]:
dir_list= glob(DATA_IN+'*/')
dir_list

['/Users/cascade/Github/wastewater_ohi/data/raw/Hydrosheds/RIV_30s/na_riv_30s/',
 '/Users/cascade/Github/wastewater_ohi/data/raw/Hydrosheds/RIV_30s/eu_riv_30s/',
 '/Users/cascade/Github/wastewater_ohi/data/raw/Hydrosheds/RIV_30s/af_riv_30s/',
 '/Users/cascade/Github/wastewater_ohi/data/raw/Hydrosheds/RIV_30s/as_riv_30s/',
 '/Users/cascade/Github/wastewater_ohi/data/raw/Hydrosheds/RIV_30s/ca_riv_30s/',
 '/Users/cascade/Github/wastewater_ohi/data/raw/Hydrosheds/RIV_30s/sa_riv_30s/',
 '/Users/cascade/Github/wastewater_ohi/data/raw/Hydrosheds/RIV_30s/au_riv_30s/']

In [48]:
# build loop

# gpd_out = gpd.GeoDataFrame()

# # for hydro, for Yan2019 drop dir loop
# for dir_nm in dir_list:
#     for fn in os.listdir(dir_nm):
#         if fn.endswith('.shp'): 
#             river = gpd.read_file(dir_nm+fn) # open shape file
#             gpd_out = gpd_out.append(river) # merge to gpd_out
#             print(gpd_out.shape)

# # write out to file            
# gpd_out.to_file(DATA_OUT+'Hydro30s_rivers_stacked.shp')

(432538, 3)
(896714, 3)
(1613576, 3)
(2483432, 3)
(2560117, 3)
(2999273, 3)
(3199865, 3)


KeyboardInterrupt: 

### Write rivers to bianary raster
- add coastlines later

In [10]:
# Load raster
RAW_IN = '/Users/cascade/Github/wastewater_ohi/data/raw/'
pop2015_fn = 'GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif'
DATA_OUT = '/Users/cascade/Desktop/river_files/'
rst = rasterio.open(RAW_IN+pop2015_fn)

In [11]:
def poly_to_raster (rst, polys, value, touched, out_fn, fill_value):
    """Function makes a raster from a list of polygons
    
    Args:   rst = input raster already read in as a rasterio object to act as a template
            polys = input polygons already read in as a gpd dataframe
            value = col with value to burn into raster
            touched = bool, if True all pixels touched (not centers) are burned into raster
            out_fn = out file name 
            fill_value = value to revalue input raster before burning in polygons 
    
    """

    meta = rst.meta.copy() # copy meta data from rst
    out_arr = rst.read(1) # get an array to burn shapes
    out_arr.fill(fill_value) # revalue rst to an Nan Value before burning in polygons
    
    # extract geom and values to burn
    shapes = ((geom,value) for geom, value in zip(polys['geometry'], polys[value])) 
    
    # burn shapes intp an array
    burned = features.rasterize(shapes=shapes, fill=0, out=out_arr, transform=rst.transform, all_touched=touched)
    
    # write our raster to disk
    with rasterio.open(out_fn, 'w', **meta) as out:
        out.write_band(1, burned)

In [12]:
# Load rivers
#rivers = gpd.read_file(DATA_OUT+'Yan2019_rivers_stacked.shp') #yan2019
rivers = gpd.read_file(DATA_OUT+'Hydro30s_rivers_stacked.shp')

In [13]:
rivers.head()

Unnamed: 0,R_ID,R_level,Length,geometry
0,60101010000.0,4,90328.717472,LINESTRING (118.2444971291418 -34.228773852898...
1,60101010000.0,4,20712.337649,LINESTRING (118.1846008990577 -34.099170876814...
2,60101010000.0,4,11548.006486,LINESTRING (118.0960340697835 -34.064426320294...
3,60101010000.0,4,75171.66553,LINESTRING (117.877462213175 -34.5236290553232...
4,60101010000.0,4,18002.743494,LINESTRING (117.538478763331 -34.8584626724751...


In [15]:
# try transforming the crs
print(rivers.crs)
new_crs = rst.crs
print(new_crs)
rivers = rivers.to_crs(new_crs)
rivers.head()

{'init': 'epsg:4326'}
PROJCS["World_Mollweide",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.017453292519943295]],PROJECTION["Mollweide"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],UNIT["Meter",1.0]]


Unnamed: 0,R_ID,R_level,Length,geometry
0,60101010000.0,4,90328.717472,LINESTRING (10532425.24887448 -4134666.1291585...
1,60101010000.0,4,20712.337649,LINESTRING (10537166.65721238 -4119754.1537796...
2,60101010000.0,4,11548.006486,LINESTRING (10531962.68409732 -4115755.0188741...
3,60101010000.0,4,75171.66553,LINESTRING (10476717.44246639 -4168559.5852571...
4,60101010000.0,4,18002.743494,LINESTRING (10420277.5602347 -4206993.75550851...


In [17]:
# make a bianary column to burn into the raster
rivers['true'] = 1
rivers.head()

Unnamed: 0,R_ID,R_level,Length,geometry,true
0,60101010000.0,4,90328.717472,LINESTRING (10532425.24887448 -4134666.1291585...,1
1,60101010000.0,4,20712.337649,LINESTRING (10537166.65721238 -4119754.1537796...,1
2,60101010000.0,4,11548.006486,LINESTRING (10531962.68409732 -4115755.0188741...,1
3,60101010000.0,4,75171.66553,LINESTRING (10476717.44246639 -4168559.5852571...,1
4,60101010000.0,4,18002.743494,LINESTRING (10420277.5602347 -4206993.75550851...,1


In [18]:
# Try to make a raster

meta = rst.meta.copy() # copy meta data from rst
out_arr = rst.read(1) # get an array to burn shapes
out_arr.fill(0) # revalue rst to an Nan Value before burning in polygons

# extract geom and values to burn
shapes = ((geom,value) for geom, value in zip(rivers['geometry'], rivers['true'])) 

In [19]:
# burn shapes intp an array
burned = features.rasterize(shapes=shapes, fill=0, out=out_arr, transform=rst.transform, all_touched=True)

In [20]:
# Check Uniques

unique, counts = np.unique(burned, return_counts=True)
dict(zip(unique, counts))

{0.0: 645092767, 1.0: 4383233}

In [21]:
# Write raster
fn_out = 'Yan2019_rivers_raster.tif'

# write our raster to disk
with rasterio.open(DATA_OUT+fn_out, 'w', **meta) as out:
    out.write_band(1, burned)

# Add coastlines to river raster

Because the ocean_mask is in EPSG:4326, we cannot use it to match with the rivers projected into ESPG:54009 (used by the GHS-Pop). So intead we are going to isolate coastlines using our rural_urban.tif.

This is actually better because the GHS-Pop isolates major rivers and lakes

## First need to isolate coastlines from GHS-SMOD Urban/Rural raster

In [33]:
rural_urban_path = '/Users/cascade/Github/wastewater_ohi/data/interim/rural_urban.tif'

In [34]:
rural_urban_rst = rasterio.open(rural_urban_path)

In [35]:
# check unique values
np.unique(rural_urban_rst.read(1))

array([-200.,   10.,  111.,  222.], dtype=float32)

In [36]:
# Mask ocean as 0 and land as 1
# check unique values
mask = rural_urban_rst.read(1)
mask[mask < 11] = 0
mask[mask > 0] = 1

In [39]:
# write to out land/ocean mask <------------------ LOOKS GOOD NO NEED TO CHECK CPT 2020.01.21
# meta.update({'dtype' : 'float32'}) # update d-type

# with rasterio.open('/Users/cascade/Desktop/ocean_land.tif', 'w', **meta) as out:
#     out.write_band(1, mask)

In [40]:
from skimage.morphology import erosion

In [41]:
a = mask # get land values set to 1

In [42]:
b = erosion(mask) # erode land in-ward by 1-km

In [51]:
# write it out to check it <------------------ LOOKS GOOD NO NEED TO CHECK CPT 2020.01.21

meta.update({'dtype' : 'float32'}) # update d-type

file_out = '/Users/cascade/Desktop/ocean_land1_erosion1.tif'
with rasterio.open(file_out, 'w', **meta) as dst:
    dst.write_band(1, b)

In [45]:
# Isolate coastline values

a[a == 1] = 2 # up the values and then subtract
c = a - b

In [46]:
np.unique(c)

array([0., 1., 2.], dtype=float32)

In [47]:
# We want to keep 2 as the coastline
c[c == 1] = 0

In [48]:
np.unique(c)

array([0., 2.], dtype=float32)

In [50]:
# write it out to check it

meta.update({'dtype' : 'float32'}) # update d-type

file_out = '/Users/cascade/Desktop/ocean_land_coastline.tif'
with rasterio.open(file_out, 'w', **meta) as dst:
    dst.write_band(1, c)

## Combine Coastline and rivers into one raster

In [54]:
river_rst = rasterio.open(DATA_OUT+'riv_30s.tif')
coast_rst = rasterio.open('/Users/cascade/Desktop/Yan2019_river_files/ocean_land_coastline.tif')

In [56]:
# check values
print(np.unique(river_rst.read(1)))
print(np.unique(coast_rst.read(1)))

[0. 1.]
[0. 2.]


In [57]:
river_coast = river_rst.read(1) + coast_rst.read(1)

In [58]:
np.unique(river_coast) # ocean = 0, 1 = river, 2 = coastline and lakes, 3 = river-coast agreement 

array([0., 1., 2., 3.])

In [59]:
# set to bianry <<<-- CPT can mask out inland lakes if needed by setting remaining 2 values to 2

out = river_coast
out[out > 0] = 1
np.unique(out)

array([0., 1.])

In [62]:
meta = coast_rst.meta.copy() # copy meta data from rst

meta.update({'dtype' : 'float64'}) # update d-type

file_out = '/Users/cascade/Desktop/riv_30s_coastlines.tif'
with rasterio.open(file_out, 'w', **meta) as dst:
    dst.write_band(1, out)