In [1]:
%matplotlib inline

import glob
import importlib.util
import os

import gdal
import geopandas as gpd
import matplotlib.pyplot as plt
import nivapy3 as nivapy
import numpy as np
import pandas as pd
import rasterio
from IPython.display import display

plt.style.use("ggplot")

In [2]:
# Import CL functions
spec = importlib.util.spec_from_file_location(
    "critical_loads", "../../notebooks/critical_loads.py"
)
cl = importlib.util.module_from_spec(spec)
spec.loader.exec_module(cl)

In [3]:
# Connect to PostGIS
eng = nivapy.da.connect_postgis()

Connection successful.


In [4]:
# Connect to CL db
cl_eng = nivapy.da.connect_postgis(database="critical_loads")

Connection successful.


# CL Vestland: Calculate exceedances

**Note:** This notebook is rather messy and should be tidied up. In particular, cell 6 currently subsets the data to avoid issues with overlapping catchment polygons: the three non-overlapping polygons are processed simultaneously, and the fourth is then processed separately. This **requires running the notebook twice** (once for each catchment "subset"), each time changing the name of the summary CSV generated in section 5 to avoid overwriting. This is "hacky" and can be cleaned up, but it seems pragmatic for the moment.

## 1. Get catchments of interest

### 1.1. Outflow points

In [5]:
# Get outflows
sql = (
    "SELECT * FROM niva.stations "
    "WHERE station_id IN ( "
    "  SELECT station_id FROM niva.projects_stations "
    "  WHERE project_id IN ( "
    "    SELECT project_id FROM niva.projects "
    "    WHERE project_name = 'CL Vestland' "
    "    ) "
    "  ) "
)
stn_gdf = gpd.read_postgis(sql, eng)

# Reproject to ETRS89 UTM Z33N
stn_gdf = stn_gdf.to_crs("epsg:25833")

stn_gdf.head()

Unnamed: 0,station_id,station_code,station_name,aquamonitor_id,longitude,latitude,geom
0,1260,Eks_Main,Eksingedal_Main,,5.793674,60.728518,POINT (-1042.153 6767775.232)
1,1261,Eks_Opp,Eksingedal_Oppstr,,5.90576,60.792344,POINT (6017.082 6773983.678)
2,1262,Eks_Side,Eksingedal_Side,,5.806698,60.734305,POINT (-246.174 6768315.730)
3,1263,Gud_Main,Guddal_Main,,5.345209,61.31132,POINT (-15742.579 6835715.827)
4,1264,Gud_Opp,Guddal_Oppstr,,5.590573,61.229553,POINT (-4014.772 6824753.423)


### 1.2. Catchment boundaries

In [6]:
# Get catchments
stn_list = list(stn_gdf["station_id"].astype(str))
bind_pars = ",".join(stn_list)
sql = f"SELECT * FROM niva.catchments " f"WHERE station_id IN ({bind_pars})"
cat_gdf = gpd.read_postgis(sql, eng)

# Reproject to ETRS89 UTM Z33N
cat_gdf = cat_gdf.to_crs("epsg:25833")

# Join codes
cat_gdf = cat_gdf.merge(
    stn_gdf[["station_id", "station_code"]], how="left", on="station_id"
)
cat_gdf = cat_gdf[["station_id", "station_code", "geom"]]

# Add area
cat_gdf["cat_area_km2"] = cat_gdf["geom"].area / 1e6

cat_gdf.head()

Unnamed: 0,station_id,station_code,geom,cat_area_km2
0,1260,Eks_Main,"MULTIPOLYGON (((3082.020 6765260.340, 3036.760...",255.688755
1,1261,Eks_Opp,"MULTIPOLYGON (((7265.790 6767632.250, 7240.550...",193.956874
2,1262,Eks_Side,"MULTIPOLYGON (((6017.098 6773983.675, 5981.501...",39.101413
3,1263,Gud_Main,"MULTIPOLYGON (((-15898.619 6835765.865, -15817...",263.187712
4,1264,Gud_Opp,"MULTIPOLYGON (((-1925.930 6823525.440, -1851.4...",94.099467


## 2. Calculate critical loads

### 2.1. Process input template

Kari has supplied two input templates - one for the "main" catchments and one for the subcatchments. This avoid issues of overlapping polygons during the rasterisation steps. **This notebook should therefore be run twice, modifying `catch_set` below, as necessary**.


We wish to use the "F-factor" method for these calculations.

In [7]:
# Choose catchments to process (either 'main' or 'sub')
catch_set = "main"

assert catch_set in ["sub", "main"]

In [8]:
# Path to completed template
xl_path = f"../data/input_template_critical_loads_water_Vestland_{catch_set}.xlsx"

# Read template
req_df = pd.read_excel(xl_path, sheet_name="required_parameters")
mag_df = pd.read_excel(xl_path, sheet_name="magic_parameters")
opt_df = pd.read_excel(xl_path, sheet_name="optional_parameters")

# Read quantites calculated in notebook 2 and patch input template
# Required pars
nupt_df = pd.read_csv(r"../output/cl_vestland_nupt_summary.csv")
nupt_df = pd.merge(nupt_df, stn_gdf, how="left", on="station_id")
nupt_df = nupt_df[["station_name", "nupt_meqpm2pyr"]]
nupt_df.columns = ["Region_id", "Nupt"]
del req_df["Nupt"]
req_df = pd.merge(req_df, nupt_df, how="left", on="Region_id")

# Optional pars
lake_df = pd.read_csv(r"../output/cl_vestland_nve_lakes_db_summary.csv")
lake_df = pd.merge(lake_df, stn_gdf, how="left", on="station_id")
lake_df = pd.merge(
    lake_df, cat_gdf[["station_id", "cat_area_km2"]], how="left", on="station_id"
)
lake_df = lake_df[
    [
        "station_name",
        "cat_area_km2",
        "area_km2",
    ]
]
lake_df.columns = ["Region_id", "Catch_area", "Lake_area"]

# Kari would like to assume the forest area is (1 - Lake_Area). See e-mail received
# 11.12.2020 at 08.30 for details
lake_df["Forest_area"] = lake_df["Catch_area"] - lake_df["Lake_area"]

opt_df.drop(["Catch_area", "Lake_area", "Forest_area"], axis="columns", inplace=True)
opt_df = pd.merge(opt_df, lake_df, how="left", on="Region_id")

In [9]:
# Set BC0 method
bc0_method = "_Ffac"

# Calculate CLs
cl_df = cl.calculate_critical_loads_for_water(
    req_df=req_df, mag_df=mag_df, opt_df=opt_df
)

# Get cols of interest
cols = [
    "Region_id",
    f"CLAOAA{bc0_method}_meq/m2/yr",
    "ENO3_flux_meq/m2/yr",
    "CLminN_meq/m2/yr",
    f"CLmaxNoaa{bc0_method}_meq/m2/yr",
    f"CLmaxSoaa{bc0_method}_meq/m2/yr",
]
cl_df = cl_df[cols]

cl_df

Unnamed: 0,Region_id,CLAOAA_Ffac_meq/m2/yr,ENO3_flux_meq/m2/yr,CLminN_meq/m2/yr,CLmaxNoaa_Ffac_meq/m2/yr,CLmaxSoaa_Ffac_meq/m2/yr
0,Eksingedal_Main,70.857152,20.696279,3.398348,85.180176,71.150682
1,Modal_Main,34.417273,12.131256,3.051812,45.812502,34.854685
2,Uskedal_Main,108.333074,74.946459,3.81215,128.923918,108.794215
3,Yndesdal_Main,14.06762,12.264412,3.111899,20.06623,14.197445
4,Guddal_Main,36.082843,7.562008,3.930213,48.747023,36.542096
5,Samnanger_Main,49.928898,28.130218,3.162959,62.464704,50.306267


## 2.2. Rasterise critical loads

In [10]:
# Cell size for rasterisation
cell_size = 50

# Snap tiff
snap_tif = f"../raster/cl_vestland_snap_ras_{cell_size}m.tif"

# Simplify col names (as units are consistent)
cl_df.columns = [i.split("_")[0].lower() for i in cl_df.columns]
cl_df.rename({"region": "station_name"}, inplace=True, axis=1)
cl_df.dropna(how="any", inplace=True)

# Add CLminS as 0
cl_df["clmins"] = 0

# Join to catchments
cl_df = pd.merge(
    cl_df, stn_gdf[["station_name", "station_code"]], on="station_name", how="left"
)
del cl_df["station_name"]
cat_gdf = cat_gdf.merge(cl_df, how="left", on="station_code")
cat_gdf.dropna(inplace=True)
cat_gdf.reset_index(inplace=True, drop=True)

# Save temporary file
temp = "../raster/temp.geojson"
cat_gdf.to_file(temp, driver="GeoJSON")

# Rasterize each column
cols = ["claoaa", "eno3", "clminn", "clmaxnoaa", "clmaxsoaa", "clmins"]
for col in cols:
    print(f"Rasterising {col}...")
    # Tiff to create
    out_tif = f"../raster/critical_loads/{col}_meqpm2pyr_{cell_size}m.tif"
    cl.vec_to_ras(temp, out_tif, snap_tif, col, -9999, "Float32")

# Delete temp file
os.remove(temp)

cat_gdf

Rasterising claoaa...
Rasterising eno3...
Rasterising clminn...
Rasterising clmaxnoaa...
Rasterising clmaxsoaa...
Rasterising clmins...


Unnamed: 0,station_id,station_code,geom,cat_area_km2,claoaa,eno3,clminn,clmaxnoaa,clmaxsoaa,clmins
0,1260,Eks_Main,"MULTIPOLYGON (((3082.020 6765260.340, 3036.760...",255.688755,70.857152,20.696279,3.398348,85.180176,71.150682,0.0
1,1263,Gud_Main,"MULTIPOLYGON (((-15898.619 6835765.865, -15817...",263.187712,36.082843,7.562008,3.930213,48.747023,36.542096,0.0
2,1265,Mod_Main,"MULTIPOLYGON (((14094.657 6778408.305, 14086.6...",380.979125,34.417273,12.131256,3.051812,45.812502,34.854685,0.0
3,1275,Sam_Main,"MULTIPOLYGON (((547.072 6746299.978, 595.735 6...",236.241723,49.928898,28.130218,3.162959,62.464704,50.306267,0.0
4,1277,Usk_Main,"MULTIPOLYGON (((-8426.110 6679150.110, -8345.3...",45.818372,108.333074,74.946459,3.81215,128.923918,108.794215,0.0
5,1278,Ynd_Main,"MULTIPOLYGON (((-24317.570 6787914.540, -24284...",119.32353,14.06762,12.264412,3.111899,20.06623,14.197445,0.0


## 3. Process deposition data

### 3.1. Select deposition series

In [11]:
# List available series
with pd.option_context("display.max_colwidth", -1):
    ser_grid = cl.view_dep_series(cl_eng)
    display(ser_grid)

Unnamed: 0,series_id,name,short_name,grid,description
0,1,Middel 1978-1982,7882,blr,Fordelt til BLR av NILU 2002
1,2,Middel 1992-1996,9296,blr,Fordelt til BLR av NILU 2002
2,3,Middel 1997-2001,9701,blr,Fordelt til BLR av NILU 2002
3,4,Middel 2002-2006,0206,blr,Fordelt til BLR av NILU 2008 (Wenche Aas)
4,5,Beregnet 2010,,,"Gøteborg protokollen 1999. ""Gamle"""
...,...,...,...,...,...
60,61,EMEP 1995 b,emep1995b,emep,"1995 data based on the EMEP 0.50x0.25 degree grid. 3 vegetation classes. Extracted by Max Posch, received Oct 2020"
61,62,EMEP 2000 b,emep2000b,emep,"2000 data based on the EMEP 0.50x0.25 degree grid. 3 vegetation classes. Extracted by Max Posch, received Oct 2020"
62,63,EMEP 2005 b,emep2005b,emep,"2005 data based on the EMEP 0.50x0.25 degree grid. 3 vegetation classes. Extracted by Max Posch, received Oct 2020"
63,64,EMEP 2010 b,emep2010b,emep,"2010 data based on the EMEP 0.50x0.25 degree grid. 3 vegetation classes. Extracted by Max Posch, received Oct 2020"


We are interested in series IDs 28 and 59. Series ID 65 will also be required for bias correction.

### 3.2. Rasterise deposition data

In [12]:
ser_dict = {"1216": 28, "2015": 65, "2030": 59}

for par in ["nitrogen", "sulphur"]:
    for period in ser_dict.keys():
        print(f"Rasterising {par}, {period}...")
        ser_id = ser_dict[period]

        # Get dep data
        dep_gdf = cl.extract_deposition_as_gdf(
            ser_id, par, cl_eng, veg_class="grid average"
        ).to_crs("epsg:25833")

        # Save temporary file
        temp = "../raster/temp.geojson"
        dep_gdf.to_file(temp, driver="GeoJSON")

        # Convert to raster
        col_name = f"{par[0]}dep_meqpm2pyr"
        out_tif = (
            f"../raster/deposition/{par[0]}dep_{period}_meqpm2pyr_{cell_size}m.tif"
        )
        cl.vec_to_ras(temp, out_tif, snap_tif, col_name, -9999, "Float32")

        # Delete temp file
        os.remove(temp)

Rasterising nitrogen, 1216...
Rasterising nitrogen, 2015...
Rasterising nitrogen, 2030...
Rasterising sulphur, 1216...
Rasterising sulphur, 2015...
Rasterising sulphur, 2030...


### 3.3. Bias-correct deposition data

In [13]:
# Output location
data_fold = r"../raster/deposition"

# Loop over pars
for par in ["nitrogen", "sulphur"]:
    # Paths
    base_path = os.path.join(data_fold, f"{par[0]}dep_1216_meqpm2pyr_{cell_size}m.tif")
    emep2015_path = os.path.join(
        data_fold, f"{par[0]}dep_2015_meqpm2pyr_{cell_size}m.tif"
    )
    emep2030_path = os.path.join(
        data_fold, f"{par[0]}dep_2030_meqpm2pyr_{cell_size}m.tif"
    )

    # Datasets
    base_src = rasterio.open(base_path)
    emep2015_src = rasterio.open(emep2015_path)
    emep2030_src = rasterio.open(emep2030_path)

    # Create output dataset
    prof = base_src.profile
    out_tif = os.path.join(data_fold, f"{par[0]}dep_2030bc_meqpm2pyr_{cell_size}m.tif")
    out_dst = rasterio.open(out_tif, "w", **prof)

    # Process in blocks to conserve memory
    for block_index, window in base_src.block_windows(1):
        # Read block
        base_block = base_src.read(window=window).astype(float)
        emep2015_block = emep2015_src.read(window=window).astype(float)
        emep2030_block = emep2030_src.read(window=window).astype(float)

        # Set NoData to NaN
        base_block[base_block == -9999] = np.nan
        emep2015_block[emep2015_block == -9999] = np.nan
        emep2030_block[emep2030_block == -9999] = np.nan

        # Apply delta-change method
        pred2030_block = base_block * emep2030_block / emep2015_block

        # Set NaN to -9999
        pred2030_block = np.nan_to_num(pred2030_block, nan=-9999)

        out_dst.write(pred2030_block.astype(rasterio.float32), window=window)

    # Tidy up
    base_src.close()
    emep2015_src.close()
    emep2030_src.close()
    out_dst.close()

## 4. Calculate exceedances

### 4.1. SSWC

**Note:** Values <0 are no longer set back to zero in the code below. See e-mail from Kari received 11.06.2020 at 15.08. Uncomment the line in the cell below to use the "standard" approach.

In [14]:
for period in ["1216", "2030bc"]:
    # Read grids
    s_tif = f"../raster/deposition/sdep_{period}_meqpm2pyr_{cell_size}m.tif"
    s_dep, s_ndv, epsg, extent = nivapy.spatial.read_raster(s_tif)

    eno3_tif = f"../raster/critical_loads/eno3_meqpm2pyr_{cell_size}m.tif"
    eno3fl, eno3_ndv, epsg, extent = nivapy.spatial.read_raster(eno3_tif)

    claoaa_tif = f"../raster/critical_loads/claoaa_meqpm2pyr_{cell_size}m.tif"
    claoaa, cla_ndv, epsg, extent = nivapy.spatial.read_raster(claoaa_tif)

    # Set ndv
    s_dep[s_dep == s_ndv] = np.nan
    eno3fl[eno3fl == eno3_ndv] = np.nan
    claoaa[claoaa == cla_ndv] = np.nan

    # Exceedance
    sswc_ex = s_dep + eno3fl - claoaa
    del s_dep, eno3fl, claoaa

    # Set <0 to 0
    # sswc_ex[sswc_ex < 0] = 0

    # Write geotif
    sswc_tif = f"../raster/exceedance/sswc_ex_{period}_meqpm2pyr_{cell_size}m.tif"
    cl.write_geotiff(sswc_ex, sswc_tif, snap_tif, -1, gdal.GDT_Float32)
    del sswc_ex

### 4.2. FAB

In [15]:
# Read CL arrays
for period in ["1216", "2030bc"]:
    array_dict = {}

    for name in ["clminn", "clmaxnoaa", "clmins", "clmaxsoaa"]:
        # Read tif
        tif_path = f"../raster/critical_loads/{name}_meqpm2pyr_{cell_size}m.tif"
        data, ndv, epsg, extent = nivapy.spatial.read_raster(tif_path)
        data[data == ndv] = np.nan
        array_dict[name] = data

    # Read dep arrays
    for name in ["ndep", "sdep"]:
        # Read tif
        tif_path = f"../raster/deposition/{name}_{period}_meqpm2pyr_{cell_size}m.tif"
        data, ndv, epsg, extent = nivapy.spatial.read_raster(tif_path)
        data[data == ndv] = np.nan
        array_dict[name] = data

    # Extract arrays from dict
    cln_min = array_dict["clminn"]
    cln_max = array_dict["clmaxnoaa"]
    cls_min = array_dict["clmins"]
    cls_max = array_dict["clmaxsoaa"]
    dep_n = array_dict[f"ndep"]
    dep_s = array_dict[f"sdep"]

    # Estimate exceedances
    ex_n, ex_s, reg_id = cl.vectorised_exceed_ns_icpm(
        cln_min, cln_max, cls_min, cls_max, dep_n, dep_s
    )

    # Save GeoTiffs
    # N
    n_tif = f"../raster/exceedance/fab_ex_n_{period}_meqpm2pyr_{cell_size}m.tif"
    cl.write_geotiff(ex_n, n_tif, snap_tif, -1, gdal.GDT_Float32)

    # S
    s_tif = f"../raster/exceedance/fab_ex_s_{period}_meqpm2pyr_{cell_size}m.tif"
    cl.write_geotiff(ex_s, s_tif, snap_tif, -1, gdal.GDT_Float32)

    # N+S
    ns_tif = f"../raster/exceedance/fab_ex_ns_{period}_meqpm2pyr_{cell_size}m.tif"
    cl.write_geotiff(ex_n + ex_s, ns_tif, snap_tif, -1, gdal.GDT_Float32)

    # Exceedance 'region'
    reg_tif = f"../raster/exceedance/fab_ex_reg_id_{period}_{cell_size}m.tif"
    cl.write_geotiff(reg_id, reg_tif, snap_tif, -1, gdal.GDT_Float32)

## 5. Summary statistics

In [16]:
# Save temporary file
temp = "../raster/temp.geojson"
cat_gdf.to_file(temp, driver="GeoJSON")

# Get paths to all dep and ex grids
search_path1 = "../raster/exceedance/*.tif"
flist1 = glob.glob(search_path1)
search_path2 = "../raster/deposition/*.tif"
flist2 = glob.glob(search_path2)
search_path3 = "../raster/critical_loads/*.tif"
flist3 = glob.glob(search_path3)
flist = flist1 + flist2 + flist3

df_list = []
for fname in flist:
    ds_name = os.path.split(fname)[1][:-4]
    sum_df = nivapy.spatial.zonal_stats(temp, fname, -9999, global_src_extent=True)
    sum_df["dataset"] = ds_name
    sum_df = pd.concat([sum_df, cat_gdf[["station_code"]]], axis=1, sort=True)
    df_list.append(sum_df)

sum_df = pd.concat(df_list, sort=True)
sum_df = sum_df[
    ["dataset", "station_code", "min", "mean", "max", "std", "count"]
].reset_index(drop=True)

os.remove(temp)

# Save
csv_path = f"../output/cl_vestland_results_summary_meqpm2pyr_{catch_set}_catches.csv"
sum_df.to_csv(csv_path, index=False)

sum_df.head(20)

Unnamed: 0,dataset,station_code,min,mean,max,std,count
0,fab_ex_n_2030bc_meqpm2pyr_50m,Eks_Main,0.0,0.0,0.0,0.0,102245
1,fab_ex_n_2030bc_meqpm2pyr_50m,Gud_Main,0.0,4.51825,7.224773,2.342739,105275
2,fab_ex_n_2030bc_meqpm2pyr_50m,Mod_Main,0.0,0.18767,1.608936,0.496311,152332
3,fab_ex_n_2030bc_meqpm2pyr_50m,Sam_Main,0.0,0.0,0.0,0.0,94510
4,fab_ex_n_2030bc_meqpm2pyr_50m,Usk_Main,0.0,0.0,0.0,0.0,18329
5,fab_ex_n_2030bc_meqpm2pyr_50m,Ynd_Main,26.920616,31.153116,34.78672,1.990038,47744
6,sswc_ex_2030bc_meqpm2pyr_50m,Eks_Main,-46.262722,-45.424749,-42.952797,0.816003,102245
7,sswc_ex_2030bc_meqpm2pyr_50m,Gud_Main,-23.59082,-20.446053,-18.614641,1.31118,105275
8,sswc_ex_2030bc_meqpm2pyr_50m,Mod_Main,-18.387865,-17.763239,-16.380424,0.631806,152332
9,sswc_ex_2030bc_meqpm2pyr_50m,Sam_Main,-16.405926,-15.0767,-13.967899,0.585489,94510
