# Intersect catchment with MOD10A1 
# Finds the mean ndsi of each HRU in the model setup with rasterstats.

### Note
The rasterstats function `ZonalStatistics` automatically adds the calculated value to the shapefile used as input to the function. The workflow is thus:
1. Find the source catchment shapefile;
2. Copy the source catchment shapefile to the destintion location;
3. Run the zonal statistics algorithm on the copy.

In [1]:
# modules
import os
import sys
from pathlib import Path
from shutil import copyfile
from datetime import datetime
import geopandas as gpd
import rasterstats
import pandas as pd
import rasterio
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr

#from qgis.core import QgsVectorLayer
#from qgis.core import QgsRasterLayer
#from qgis.analysis import QgsZonalStatistics

#### Control file handling

In [2]:
# Easy access to control file folder
controlFolder = Path('../../0_control_files')

In [3]:
# Store the name of the 'active' file in a variable
controlFile = 'control_active.txt'

In [4]:
# Function to extract a given setting from the control file
def read_from_control( file, setting ):
    
    # Open 'control_active.txt' and ...
    with open(file) as contents:
        for line in contents:
            
            # ... find the line with the requested setting
            if setting in line and not line.startswith('#'):
                break
    
    # Extract the setting's value
    substring = line.split('|',1)[1]      # Remove the setting's name (split into 2 based on '|', keep only 2nd part)
    substring = substring.split('#',1)[0] # Remove comments, does nothing if no '#' is found
    substring = substring.strip()         # Remove leading and trailing whitespace, tabs, newlines
       
    # Return this value    
    return substring

In [5]:
# Function to specify a default path
def make_default_path(suffix):
    
    # Get the root path
    rootPath = Path( read_from_control(controlFolder/controlFile,'root_path') )
    
    # Get the domain folder
    domainName = read_from_control(controlFolder/controlFile,'domain_name')
    domainFolder = 'domain_' + domainName
    
    # Specify the forcing path
    defaultPath = rootPath / domainFolder / suffix
    
    return defaultPath

#### Find location of shapefile and DEM

In [6]:
# Catchment shapefile path & name
catchment_path = read_from_control(controlFolder/controlFile,'catchment_shp_path')
catchment_name = read_from_control(controlFolder/controlFile,'catchment_shp_name')

In [7]:
# Specify default path if needed
if catchment_path == 'default':
    catchment_path = make_default_path('shapefiles/catchment') # outputs a Path()
else:
    catchment_path = Path(catchment_path) # make sure a user-specified path is a Path()

In [21]:
# DEM path & name
mod10a1_path = read_from_control(controlFolder/controlFile,'observation_snow_mod10a1_path')
mod10a1_name = read_from_control(controlFolder/controlFile,'observation_snow_mod10a1_name')

In [22]:
# Specify default path if needed
if mod10a1_path == 'default':
    mod10a1_path = make_default_path('observations/RS_Snow/MOD10A1/6_tif_multiband') # outputs a Path()
else:
    mod10a1_path = Path(mod10a1_path) # make sure a user-specified path is a Path()

mod10a1_path

PosixPath('/Users/darrieythorsson/compHydro/data/CWARHM_data/domain_Yukon/observations/RS_Snow/MOD10A1/6_tif_multiband')

#### Find where the intersection needs to go

In [23]:
# Intersected shapefile path and name
intersect_path = read_from_control(controlFolder/controlFile,'intersect_mod10a1_path')
intersect_name = read_from_control(controlFolder/controlFile,'intersect_mod10a1_name')
print(intersect_name)
print(intersect_path)

catchment_with_mod10a1.shp
default


In [24]:
# Specify default path if needed
if intersect_path == 'default':
    intersect_path = make_default_path('shapefiles/catchment_intersection/with_mod10a1') # outputs a Path()
else:
    intersect_path = Path(intersect_path) # make sure a user-specified path is a Path()

In [25]:
# Make the folder if it doesn't exist
intersect_path.mkdir(parents=True, exist_ok=True)

#### Copy the source catchment shapefile into the destination location

In [26]:
# Find the name without extension
catchment_base = catchment_name.replace('.shp','')

In [27]:
# Loop over directory contents and copy files that match the filename of the shape
for file in os.listdir(catchment_path):
    if catchment_base in file: # copy only the relevant files in case there are more than 1 .shp files
        
        # make the output file name
        _,ext = os.path.splitext(file)                    # extension of current file
        basefile,_ = os.path.splitext(intersect_name)     # name of the intersection file w/o extension
        newfile = basefile + ext                          # new name + old extension
        
        # copy
        copyfile(catchment_path/file, intersect_path/newfile);

## Get the SUMMA output file

In [28]:
simulation_path = read_from_control(controlFolder/controlFile,'experiment_output_summa')
simulation_name = read_from_control(controlFolder/controlFile,'experiment_id')

In [29]:
# Specify default path if needed
if simulation_path == 'default':
    simulation_path = make_default_path('simulations/' + simulation_name + '/SUMMA/' + simulation_name + '_day.nc')
    simulation_path = Path(simulation_path) # make sure a user-specified path is a Path()

simulation_path

PosixPath('/Users/darrieythorsson/compHydro/data/CWARHM_data/domain_Yukon/simulations/run_Yukon_Merit_1/SUMMA/run_Yukon_Merit_1_day.nc')

## Rasterstats analysis

In [30]:
ds = xr.open_dataset(simulation_path)
ds = ds.sel(time = slice('2002-01-01','2002-12-31'))
ds

#### Spatial analysis

In [31]:
# Convert Path() to string for QGIS
catchment_file = str(intersect_path/intersect_name) # needs to be the coped file because output is automatically added to this
mod10a1_file = str(mod10a1_path/mod10a1_name)

In [39]:
layer_polygon = catchment_file
layer_raster = mod10a1_file
layer_raster = Path('/Users/darrieythorsson/compHydro/data/CWARHM_data/domain_Yukon/observations/RS_Snow/MOD10A1/6_tif_multiband/2005/domain_MOD10A1_2005.tif')

print(layer_raster)
print(layer_polygon)

/Users/darrieythorsson/compHydro/data/CWARHM_data/domain_Yukon/observations/RS_Snow/MOD10A1/6_tif_multiband/2004/domain_MOD10A1_2004.tif
/Users/darrieythorsson/compHydro/data/CWARHM_data/domain_Yukon/shapefiles/catchment_intersection/with_mod10a1/catchment_with_mod10a1.shp


In [40]:
raster_file = rasterio.open(layer_raster)

raster = np.array(raster_file.read(1)).astype(float)
raster[raster > 101.0] = np.nan

#plt.imshow(raster, cmap="BrBG")
#plt.title("Raster")
#plt.show()


In [41]:
raster_file.count

348

In [21]:
raster

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., 83., 83., nan],
       [nan, nan, nan, ..., 81., 81., nan],
       [nan, nan, nan, ..., 81., 81., nan]])

In [22]:
raster_file = rasterio.open(layer_raster)
affine = raster_file.transform
shp = gpd.read_file(layer_polygon)

ndsi = []
for i, dt in enumerate(ds.time):

    array = np.array(raster_file.read(i+1)).astype(float)
    array[array > 101.0] = np.nan
    zstats = rasterstats.zonal_stats(layer_polygon, array, affine=affine)
    zstats = pd.DataFrame(zstats)

    ndsi.append(zstats['mean'])
    #shp['ndsi_mean'] = zstats['mean']

print(ndsi)

In [None]:
ds['ndsi'] = (['time','hru'],ndsi)

correlation_map = xr.corr(ds.ndsi, ds.scalarSWE, dim ='time')
shp['ndsi_corr'] = abs(correlation_map) 

In [None]:
save_corr_file_path = read_from_control(controlFolder/controlFile,'observation_snow_correlation_path')
save_corr_file_name = read_from_control(controlFolder/controlFile,'observation_snow_correlation_name')

# Specify default path if needed
if save_corr_file_path == 'default':
    save_corr_file_path = make_default_path('evaluation/MOD10A1') # outputs a Path()
else:
    save_corr_file_path = Path(save_corr_file_path) # make sure a user-specified path is a Path()


shp.to_file(save_corr_file_path/Path(save_corr_file_name))







  shp.to_file(save_corr_file_path/Path(save_corr_file_name))


[[255 239 239 ... 239 239 239]
 [255 239 239 ... 239 239 239]
 [255 239 239 ... 239 239 239]
 ...
 [255 239 239 ... 250 250 250]
 [255 239 239 ... 250 250 250]
 [255 239 239 ... 250 250 250]]
[{'min': 54.0, 'max': 250.0, 'mean': 109.35341337907376, 'count': 14575}, {'min': 40.0, 'max': 87.0, 'mean': 67.4421052631579, 'count': 855}, {'min': 250.0, 'max': 250.0, 'mean': 250.0, 'count': 6718}, {'min': 21.0, 'max': 250.0, 'mean': 137.65284702177905, 'count': 11433}, {'min': 250.0, 'max': 250.0, 'mean': 250.0, 'count': 20225}, {'min': 250.0, 'max': 250.0, 'mean': 250.0, 'count': 8}, {'min': 250.0, 'max': 250.0, 'mean': 250.0, 'count': 5736}, {'min': 18.0, 'max': 250.0, 'mean': 180.18817308312376, 'count': 31014}, {'min': 250.0, 'max': 250.0, 'mean': 250.0, 'count': 3602}, {'min': 250.0, 'max': 250.0, 'mean': 250.0, 'count': 6049}, {'min': 18.0, 'max': 250.0, 'mean': 242.7997585998793, 'count': 16570}, {'min': 250.0, 'max': 250.0, 'mean': 250.0, 'count': 4878}, {'min': 33.0, 'max': 250.0, 'm

#### Code provenance
Generates a basic log file in the domain folder and copies the control file and itself there.

In [None]:
# Set the log path and file name
logPath = intersect_path
log_suffix = '_catchment_dem_intersect_log.txt'

In [None]:
# Create a log folder
logFolder = '_workflow_log'
Path( logPath / logFolder ).mkdir(parents=True, exist_ok=True)

In [None]:
# Copy this script
thisFile = '1_find_HRU_elevation.ipynb'
copyfile(thisFile, logPath / logFolder / thisFile);

In [None]:
# Get current date and time
now = datetime.now()

In [None]:
# Create a log file 
logFile = now.strftime('%Y%m%d') + log_suffix
with open( logPath / logFolder / logFile, 'w') as file:
    
    lines = ['Log generated by ' + thisFile + ' on ' + now.strftime('%Y/%m/%d %H:%M:%S') + '\n',
             'Found mean HRU elevation from MERIT Hydro adjusted elevation DEM.']
    for txt in lines:
        file.write(txt)  