In [1]:
from osgeo import ogr
from osgeo import gdal
import numpy as np
import pandas as pd
import geopandas as gpd
import critical_loads as cl
from IPython.display import Image
import imp

# Critical loads for vegetation

## 1. Mosaic 30 m land cover data

Raw land cover data with 30 m resolution are here:

K:\Avdeling\317 Klima- og miljømodellering\KAU\Focal Centre\Vegetation\Veg map\satveg_30\0

I've copied this locally and used the `Mosiac_To_New_Raster` tool in ArcToolbox to combine the tiles into a single 8-bit integer GeoTiff (`sat_veg_30m_all.tif`), which has an uncompressed size of 2.2 GB.

## 2. Reclassify vegetation according to critical loads

### 2.1. Read lookup table link vegetation classes to critical loads

The land use classes for the vegeation map are given here:

C:\Data\James_Work\Staff\Kari_A\Critical_Loads\sat_veg_land_use_classes.xlsx

The first step is to reclassify the land use grid according to the critical loads. Land classes 0, 23, 24 and 25 are not used, so I'll set the values to 255 (for no data).

**Update 17/10/2017:** Following discussion with Kari, we have agreed to initially process the data in units of mgN/m2/yr. I have therefore created a new column in the spreadsheet above called `CL_100smgN/m2/yr`, which divides the original values by 100 (and sets the values for the NoData classes to 255). Dividing by 100 makes it possible to store the dataset as an 8-bit integer grid without loss of precision (because all the raw values are integer multiples of 100), **but need to remember to convert units back during processing!**


In [2]:
# Read lookup table
in_xlsx = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
           r'\sat_veg_land_use_classes.xlsx')
df = pd.read_excel(in_xlsx, sheetname='EUNIS_tilGIS', index_col=0)

df = df[['CL_100smgN/m2/yr']].round(0).astype(int)

df

Unnamed: 0_level_0,CL_100smgN/m2/yr
NORUTcode,Unnamed: 1_level_1
1,5
2,5
3,5
4,10
5,10
6,5
7,5
8,5
9,5
10,10


### 2.2. Reclassify

In [3]:
# Input national veg map
in_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
          r'\GIS\Raster\sat_veg_30m_all.tif')

# Output geotiff for critical loads values
out_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
           r'\GIS\Raster\sat_veg_30m_cr_lds_div100.tif')

# Reclassify
cl.reclassify_raster(in_tif, out_tif, df, 'CL_100smgN/m2/yr', 255)

## 3. Process deposition data

### 3.1. Get deposition data from database

The deposition data are found in `RESA2.DEP_BLR_VALUES`. Parameter IDs are defined in `RESA2.AIR_PARAMETER_DEFINITIONS` and the various data series are defined in `RESA2.DEP_SERIES_DEFINITIONS`. For vegetation, we calculate exceedance as

$$E_{veg} = Dep_N - CL$$

where $Dep_N$ is total nitrogen deposition and $CL$ is the critical load (see e-mail from Espen received  03/10/2017 at 14.25). Parameter ID 6 in `RESA2.AIR_PARAMETER_DEFINITIONS` corresponds to `ENTot` in `mEkv/m2/year`, which I assume is what we use for $Dep_N$ in the equation above. The different time series definitions are less obvious to me, and I'm not sure which ones we need to recalculate this year. I have exported this table to:

C:\Data\James_Work\Staff\Kari_A\Critical_Loads\dep_series_names.xlsx

**Ask Kari/Espen which series we are interested in**. For now, I'll just pick a series at random for use in testing.

**Update 05/10/2017:** See e-mail from Kari received 05/10/2017 at 13.11. We are interested in series IDs 1 to 4 and 25. However, these do not have values for `ENTot`, so I instead need to calculate N deposition as the sum of `N (oks)` and `N (red)`, converted to equivalents.

**Update 17/10/2017:** We have agreed to work in mgN initially, so no need to convert to equivalents.

In [4]:
# Connect to db
resa2_basic_path = (r'C:\Data\James_Work\Staff\Heleen_d_W\ICP_Waters\Upload_Template'
                    r'\useful_resa2_code.py')
resa2_basic = imp.load_source('useful_resa2_code', resa2_basic_path)
engine, conn = resa2_basic.connect_to_resa2()

In [5]:
# Get all dep values
sql = ("SELECT blr, dep_series_id as series, value as dep "
       "FROM resa2.dep_blr_values "
       "WHERE parameter_id IN (1, 2) "
       "AND dep_series_id IN (1, 2, 3, 4, 25)")

df = pd.read_sql(sql, engine)

# Sum N_oks and N_red
df = df.groupby(['blr', 'series']).sum()

# Reshape and tidy
df = df.unstack()
df.columns = df.columns.get_level_values(1)
df.columns.name = ''
df.columns = ['Ndep78_82', 'Ndep92_96', 'Ndep97_01', 'Ndep02_06', 'Ndep07_11']

print np.nanmin(df.values), np.nanmax(df.values)

df.head()

69.8 2252.72


Unnamed: 0_level_0,Ndep78_82,Ndep92_96,Ndep97_01,Ndep02_06,Ndep07_11
blr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
58006001,1700.0,1771.17,1576.91,,1363.48
58006002,1728.85,1247.05,1149.28,1192.63,1383.95
58006003,1217.42,1171.2,1059.42,1157.39,1203.15
58006004,1392.29,1310.32,1425.35,1302.15,1112.38
58006005,1293.28,1302.73,1255.17,1037.57,1184.53


These values do not lie between 0 and 255, so we'll need to store them as 16-bit integers.

For now, just process one column as an example.

In [6]:
# Get one column as an example
dep_df = df[['Ndep07_11']].dropna(how='any').round(0).astype(int).reset_index()

# Rename cols to match shapefile
dep_df.columns = ['BLR', 'Ndep07_11']

dep_df.head()

Unnamed: 0,BLR,Ndep07_11
0,58006001,1363
1,58006002,1384
2,58006003,1203
3,58006004,1112
4,58006005,1185


### 3.2. Join to BLR grid

The deposition values from the database need to be joined to the BLR grid shapefile.

In [7]:
# Read BLR grid
in_shp = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads\GIS'
          r'\Shapefiles\blrgrid_uten_grums_utm_z33n.shp')

blr_df = gpd.read_file(in_shp)

# Join
dep_df = blr_df.merge(dep_df, on='BLR')
del dep_df['area_m2']

dep_df.head()

Unnamed: 0,BLR,geometry,Ndep07_11
0,58006001,POLYGON ((-10886.67732153146 6503779.108531611...,1363
1,58006002,"POLYGON ((3660.479678658361 6501900.217185441,...",1384
2,58006003,"POLYGON ((18211.98529702786 6500076.037824233,...",1203
3,58006004,"POLYGON ((32767.70791637653 6498306.548390091,...",1112
4,58006005,POLYGON ((-11562.15574961301 6498623.842200487...,1185


In [8]:
# Write to shapefile
out_shp = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads\GIS'
           r'\Shapefiles\dep_07-11.shp')

dep_df.to_file(out_shp)

### 3.3. Convert BLR grid to 30 m raster

We can now convert the shapefile to a 30 m raster with exactly the same extent etc. as the reclassified vegetation map. All values here are stored as 16-bit integers.

In [9]:
# BLR grid
in_shp = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
          r'\GIS\Shapefiles\dep_07-11.shp')

# Output BLR raster
out_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
           r'\GIS\Raster\dep_07-11_30m.tif')

# Snap raster
snap_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
            r'\GIS\Raster\sat_veg_30m_all.tif')

# Rasterize
cl.vec_to_ras(in_shp, out_tif, snap_tif, 'Ndep07_11', -1, gdal.GDT_Int16)

## 4. Calculate exceedance

Exceedance can now be calculated easily by subtracting these grids. Some care is need to properly handle no data values and to avoid arithmetic over-/under-flow. In the code below, I temporarily upcast both grids to 32-bit floats. This is not very memory efficient, but it's easy and with 32 GB of RAM on my laptop it should be fine. Also need to convert critical loads units by multiplying by 100.

In [10]:
# Paths to CL and DEP grids
cl_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
          r'\GIS\Raster\sat_veg_30m_cr_lds_div100.tif')

dep_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
           r'\GIS\Raster\dep_07-11_30m.tif')

# Read grids
cl_grid, cl_ndv = cl.read_geotiff(cl_tif)
dep_grid, dep_ndv = cl.read_geotiff(dep_tif)

# Upcast to float32 for safe handling of negative values
cl_grid = cl_grid.astype(np.float32) 
dep_grid = dep_grid.astype(np.float32)

# Set ndv
cl_grid[cl_grid==cl_ndv] = np.nan
dep_grid[dep_grid==dep_ndv] = np.nan

# Apply scaling factor to CLs
cl_grid = cl_grid*100.
      
# Exceedance
ex_grid = dep_grid - cl_grid
del dep_grid, cl_grid

# Set <0 to 0
ex_grid[ex_grid<0] = 0

# Reset ndv
ex_grid[np.isnan(ex_grid)] = -1

# Downcast to int16 to save space
ex_grid = ex_grid.round(0).astype(np.int16)



In [11]:
# Output exceedance
out_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
           r'\GIS\Raster\exceed_07-11_30m.tif')

# Snap raster
snap_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
            r'\GIS\Raster\sat_veg_30m_all.tif')

# Write results
cl.write_geotiff(ex_grid, out_tif, snap_tif, -1, gdal.GDT_Int16)

This whole workflow takes approximately 5 minutes to run, which I think is much faster than the previous vector-based approach. Once I know which deposition series we are interested in, I can restructure the code to loop over all the datasets.

An ArcMap file showing the results from this experiemnt can be found on the network here:

K:\Prosjekter\JES\Critical_Loads

## 5. Loop over all data

The code below combines loops over all 5 datasets. It also calculates proportion of Norway where critical loads have been exceeded.

In [12]:
%%time

# Path to raw BLR shapefile
in_shp = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads\GIS'
          r'\Shapefiles\blrgrid_uten_grums_utm_z33n.shp')

# Snap raster
snap_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
            r'\GIS\Raster\sat_veg_30m_all.tif')

# Critical loads raster
cl_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
          r'\GIS\Raster\sat_veg_30m_cr_lds_div100.tif')

# Container for output
data_dict = {'series':[],
             'nor_area_km2':[],
             'ex_area_km2':[]}

# Loop over series
for ser in ['Ndep78_82', 'Ndep92_96', 'Ndep97_01', 'Ndep02_06', 'Ndep07_11']:    
    
    print 'Processing: %s' % ser
    print '    Building deposition shapefile...'
    
    # Get deposition
    dep_df = df[[ser]].dropna(how='any').round(0).astype(int).reset_index()

    # Rename cols to match shapefile
    dep_df.columns = ['BLR', ser]

    # Read shapefile
    blr_df = gpd.read_file(in_shp)

    # Join and tidy
    dep_df = blr_df.merge(dep_df, on='BLR')
    del dep_df['area_m2']
    
    # Write output shapefile
    dep_shp = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads\GIS'
               r'\Shapefiles\dep_%s.shp' % ser)
    dep_df.to_file(dep_shp)
    
    # Convert shp to ras
    print '    Rasterising shapefile...'
    
    # Output BLR raster
    dep_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
               r'\GIS\Raster\dep_%s_30m.tif' % ser)

    cl.vec_to_ras(dep_shp, dep_tif, snap_tif, ser, -1, gdal.GDT_Int16)
    
    # Exceedance
    print '    Calculating exceedance...'
    
    # Read grids
    cl_grid, cl_ndv = cl.read_geotiff(cl_tif)
    dep_grid, dep_ndv = cl.read_geotiff(dep_tif)

    # Upcast to float32 for safe handling of negative values
    cl_grid = cl_grid.astype(np.float32)
    dep_grid = dep_grid.astype(np.float32)
   
    # Set ndv
    cl_grid[cl_grid==cl_ndv] = np.nan
    dep_grid[dep_grid==dep_ndv] = np.nan

    # Get total area of non-NaN from dep grid
    nor_area = np.count_nonzero(~np.isnan(dep_grid))*30.*30./1.E6

    # Apply scaling factor to CLs
    cl_grid = cl_grid*100.

    # Exceedance
    ex_grid = dep_grid - cl_grid
    del dep_grid, cl_grid  
    
    # Get total area exceeded
    ex_area = np.count_nonzero(ex_grid > 0)*30.*30./1.E6

    # Set <0 to 0
    ex_grid[ex_grid<0] = 0
    
    # Reset ndv
    ex_grid[np.isnan(ex_grid)] = -1

    # Downcast to int16 to save space
    ex_grid = ex_grid.round(0).astype(np.int16)
    
    # Append results
    data_dict['series'].append(ser)
    data_dict['nor_area_km2'].append(nor_area)
    data_dict['ex_area_km2'].append(ex_area)
    
    # Write output
    print '    Saving exceedance grid...'
    ex_tif = (r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads'
              r'\GIS\Raster\exceed_%s_30m.tif' % ser)
    
    cl.write_geotiff(ex_grid, ex_tif, snap_tif, -1, gdal.GDT_Int16)
    
    print '    Done.'

# Build output df
ex_df = pd.DataFrame(data_dict)
ex_df['ex_pct'] = 100 * ex_df['ex_area_km2'] / ex_df['nor_area_km2']
ex_df.index = ex_df['series']
del ex_df['series']
ex_df = ex_df.round(0).astype(int)

# Save
out_csv = r'C:\Data\James_Work\Staff\Kari_A\Critical_Loads\nor_prop_exceed.csv'
ex_df.to_csv(out_csv)

print 'Finished.'

Processing: Ndep78_82
    Building deposition shapefile...
    Rasterising shapefile...
    Calculating exceedance...




    Saving exceedance grid...
    Done.
Processing: Ndep92_96
    Building deposition shapefile...
    Rasterising shapefile...
    Calculating exceedance...
    Saving exceedance grid...
    Done.
Processing: Ndep97_01
    Building deposition shapefile...
    Rasterising shapefile...
    Calculating exceedance...
    Saving exceedance grid...
    Done.
Processing: Ndep02_06
    Building deposition shapefile...
    Rasterising shapefile...
    Calculating exceedance...
    Saving exceedance grid...
    Done.
Processing: Ndep07_11
    Building deposition shapefile...
    Rasterising shapefile...
    Calculating exceedance...
    Saving exceedance grid...
    Done.
Finished.
Wall time: 12min 3s


In [13]:
ex_df

Unnamed: 0_level_0,ex_area_km2,nor_area_km2,ex_pct
series,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ndep78_82,95122,320584,30
Ndep92_96,71470,320584,22
Ndep97_01,69485,320584,22
Ndep02_06,71065,320390,22
Ndep07_11,68475,320584,21
