In [1]:
%matplotlib ipympl
import nivapy3 as nivapy
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import critical_loads as cl
import os
import statsmodels.formula.api as sm

plt.style.use('ggplot')

# Critcal Loads: DSToolkit migration (November 2019)

This notebook transfers the Critical Loads data from NIVA's various internal data sources onto the JupyterHub/DSToolkit. It is modified from the workflow originally developed in [this notebook](https://nbviewer.jupyter.org/github/JamesSample/critical_loads_2/blob/master/notebooks/migrate_to_docker_test.ipynb). **The code here primarily focuses on the post-2017 workflow ("*new method; new grid*")**, although some older datasets are transferred as well.

**Note:** This notebook must be run locally from within the NIVA firewall in order to establish a connection to the NIVABASE; it cannot be run directly from the JupyterHub.

## 1. Setup 

### 1.1. Establish database connections

Datasets are transferred from (i) the NIVABASE Oracle instance, (ii) a local PostGIS databse and (iii) various Excel, CSV and spatial datasets on NIVA's internal network. All data is stored on the JupyterHub either in a dedicated PostGIS database named `critical_loads`, or on the `shared` drive under:

    shared/critical_loads
    
The code below installs PostGIS if it's not already present in the database, and it also grants the default `Jovyan` JupyterHub user read-only access to all the datasets.

In [2]:
# Connect to the NIVABASE
ora_eng = nivapy.da.connect()

Username:  ···
Password:  ········


Connection successful.


In [3]:
# Connect to local PostGIS
loc_pg_eng = nivapy.da.connect_postgis(admin=True,
                                       host='host.docker.internal',
                                       database='niva_work',
                                       port=5432)

Username:  ········
Password:  ········


Connection successful.


In [5]:
# Connect to Jupyter PostGIS
jup_pg_eng = nivapy.da.connect_postgis(admin=True,
                                       host='104.199.55.41',
                                       database='critical_loads',
                                       port=5432)

Username:  ········
Password:  ·······


Connection successful.


### 1.2. Define schemas

In [6]:
# Activate PostGIS extension
sql = "CREATE EXTENSION IF NOT EXISTS postgis"
jup_pg_eng.execute(sql)

# Create deposition schema
sql = "CREATE SCHEMA deposition"
jup_pg_eng.execute(sql)

# Create veg schema
sql = "CREATE SCHEMA vegetation"
jup_pg_eng.execute(sql)

# Create water schema
sql = "CREATE SCHEMA water"
jup_pg_eng.execute(sql)

# Create soil schema
sql = "CREATE SCHEMA soil"
jup_pg_eng.execute(sql)

# Create summary schema
sql = "CREATE SCHEMA summaries"
doc_pg_eng.execute(sql)

<sqlalchemy.engine.result.ResultProxy at 0x7fbd9cb1df28>

In [None]:
# Grant "ReadOnly" privileges to default Jovyan user
sql_list = [
    'GRANT CONNECT ON DATABASE critical_loads TO jovyan',
    'GRANT USAGE ON SCHEMA deposition TO jovyan',
    'GRANT USAGE ON SCHEMA vegetation TO jovyan',
    'GRANT USAGE ON SCHEMA water TO jovyan',
    'GRANT USAGE ON SCHEMA soil TO jovyan',
    'GRANT USAGE ON SCHEMA summaries TO jovyan',
    'GRANT SELECT ON ALL TABLES IN SCHEMA deposition TO jovyan',
    'GRANT SELECT ON ALL TABLES IN SCHEMA vegetation TO jovyan',
    'GRANT SELECT ON ALL TABLES IN SCHEMA water TO jovyan',
    'GRANT SELECT ON ALL TABLES IN SCHEMA soil TO jovyan',
    'GRANT SELECT ON ALL TABLES IN SCHEMA summaries TO jovyan',
    'ALTER DEFAULT PRIVILEGES IN SCHEMA deposition GRANT SELECT ON TABLES TO jovyan',
    'ALTER DEFAULT PRIVILEGES IN SCHEMA vegetation GRANT SELECT ON TABLES TO jovyan',
    'ALTER DEFAULT PRIVILEGES IN SCHEMA water GRANT SELECT ON TABLES TO jovyan',
    'ALTER DEFAULT PRIVILEGES IN SCHEMA soil GRANT SELECT ON TABLES TO jovyan',
    'ALTER DEFAULT PRIVILEGES IN SCHEMA summaries GRANT SELECT ON TABLES TO jovyan',
]

for sql in sql_list:
    jup_pg_eng.execute(sql)

## 2. Deposition data

### 2.1. Transfer BLR and 0.1 degree deposition grids

From 2018 onwards, deposition data from NILU is supplied on a 0.1 degree grid. The code below transfers it to the JupyterHub.

In [8]:
# Get 0.1 degree dep grid from PostGIS
sql = ("SELECT * FROM public.dep_grid_0_1deg")
dep_gdf = gpd.read_postgis(sql, loc_pg_eng)

# Write to new db
nivapy.da.gdf_to_postgis(dep_gdf, 
                         'dep_grid_0_1deg', 
                         'deposition', 
                         jup_pg_eng,
                         'dep_dep_grid_0_1deg_spidx',
                         if_exists='replace',
                         index=False,
                         method='multi',
                         chunksize=1000,
                        )

#  Drop primary key col added automatically by NivaPy
sql = ("ALTER TABLE deposition.dep_grid_0_1deg DROP COLUMN id")
jup_pg_eng.execute(sql)

# Use 'cell_id' col as primary key
sql = ("ALTER TABLE deposition.dep_grid_0_1deg "
       "ADD CONSTRAINT dep_grid_0_1deg_pk "
       "PRIMARY KEY (cell_id)")
jup_pg_eng.execute(sql)

<sqlalchemy.engine.result.ResultProxy at 0x7fbd9ca36d68>

In previous years, data have been supplied using the BLR grid, which is available as a shapefile. The code below loads this into the database as well.

In [9]:
# Read BLR .shp
blr_path = (r'../../../data/vector/blrgrid_uten_grums.shp')
dep_gdf = gpd.read_file(blr_path)
dep_gdf.rename({'BLR':'blr'},
               axis=1,
               inplace=True)

# Write to new db
nivapy.da.gdf_to_postgis(dep_gdf, 
                         'dep_grid_blr', 
                         'deposition', 
                         jup_pg_eng,
                         'dep_dep_grid_blr_spidx',
                         if_exists='replace',
                         index=False,
                         method='multi',
                         chunksize=1000,
                        )

#  Drop primary key col added automatically by NivaPy
sql = ("ALTER TABLE deposition.dep_grid_blr DROP COLUMN id")
jup_pg_eng.execute(sql)

# Use 'blr' col as primary key
sql = ("ALTER TABLE deposition.dep_grid_blr "
       "ADD CONSTRAINT dep_grid_blr_pk "
       "PRIMARY KEY (blr)")
jup_pg_eng.execute(sql)

  {'MultiPolygon', 'Polygon'}
These will be cast to "Multi" type. If this is not what you want, consider using gdf.explode() first


<sqlalchemy.engine.result.ResultProxy at 0x7fbd9c793c18>

### 2.2. Transfer "Deposition Series Definitions" from the NIVABASE

In RESA2, the table `DEP_SERIES_DEFINITIONS` identifies the datasets previously supplied by NILU. Each dataset has a separate row (and ID) in this table. For consistency, I will transfer this table directly and use the same dataset IDs in the new workflow.

In [10]:
# Read data from RESA
sql = "SELECT * FROM resa2.dep_series_definitions"
df = pd.read_sql(sql, ora_eng)

# Tidy
df.rename({'dep_series_id':'series_id'},
          axis=1,
          inplace=True)
df.sort_values('series_id', inplace=True)
df.tail()

Unnamed: 0,series_id,name,description
28,26,Middel 2012-2016,Fordelt til BLR av NILU 2017 (Wenche Aas; old ...
16,27,Middel 2012-2016 (new),Fordelt til BLR av NILU 2017 (Wenche Aas; new ...
17,28,Middel 2012-2016 (new; hi-res),Fordelt til BLR av NILU 2017 (Wenche Aas; new ...
29,29,Middel 1983-1987,Fordelt til BLR av NILU 2019 (Wenche Aas; old ...
27,30,Middel 1988-1992,Fordelt til BLR av NILU 2019 (Wenche Aas; old ...


In [11]:
# Write to new db
df.to_sql('dep_series_defs', 
          jup_pg_eng,
          'deposition',
          if_exists='replace',
          method='multi',
          chunksize=1000,
          index=False,
         )

# Use 'dep_series_id' col as primary key
sql = ("ALTER TABLE deposition.dep_series_defs "
       "ADD CONSTRAINT dep_series_defs_pk "
       "PRIMARY KEY (series_id)")
jup_pg_eng.execute(sql)

<sqlalchemy.engine.result.ResultProxy at 0x7fbd9c7a4668>

### 2.3. Transfer "Deposition Parameter Definitions" from the NIVABASE

In RESA2, the table `AIR_PARAMETER_DEFINITIONS` identifies various deposition parameters. As far as I can see, only the first three or four entries in this table are currently relevant. Many of the columns also seem unnecessary at present

In [12]:
# Read data from RESA
sql = ("SELECT * FROM resa2.air_parameter_definitions "
       "WHERE parameter_id < 5")
df = pd.read_sql(sql, ora_eng)

# Tidy
df.sort_values('parameter_id', inplace=True)
df.drop(['formula', 'category', 'function', 'entered_by', 'entered_date'],
        axis=1,
        inplace=True)
df.rename({'parameter_id':'param_id'}, 
          inplace=True,
          axis=1)
df

Unnamed: 0,param_id,name,unit,description
0,1,N(oks),mg N/m2/year,Oxidized nitrogen
1,2,N(red),mg N/m2/year,Reduced nitogen
2,3,S,mg S/m2/year,Total sulphur
3,4,S*,mg S/m2/year,Non marine sulphur


In [13]:
# Write to new db
df.to_sql('dep_param_defs', 
          jup_pg_eng,
          'deposition',
          if_exists='replace',
          index=False,
          method='multi',
          chunksize=1000,
         )

# Use 'dep_series_id' col as primary key
sql = ("ALTER TABLE deposition.dep_param_defs "
       "ADD CONSTRAINT dep_param_defs_pk "
       "PRIMARY KEY (param_id)")
jup_pg_eng.execute(sql)

<sqlalchemy.engine.result.ResultProxy at 0x7fbd9ca0d7f0>

### 2.4. Data for "Deposition Values"

#### 2.4.1. Create tables

The old workflow used the coarse BLR grid for deposition values. Deposition `series_ids` from 1 to 27 are all based on this grid, so these values will be transferred to a new table named `'dep_values_blr_grid'`. Deposition `series_id=28` uses the new 0.1 degree grid, so this dataset (and subsequent ones) will need adding to a separate table (`'dep_values_0_1deg_grid'`). As future data will also be delivered using this grid, it will be useful to have a function for processing and adding raw data to this table.

Additionally, both these tables need constraints to ensure that relevant series and parameter IDs are defined before uploading data. Note that I have not included a constraint on the BLR/cell ID, because the data supplied by NILU often includes cells that are outside of terrestrial Norway.

In [14]:
# Delete if already exist
sql = ("DROP TABLE IF EXISTS deposition.dep_values_blr_grid, "
       "  deposition.dep_values_0_1deg_grid")
jup_pg_eng.execute(sql)

# Create table for BLR data
sql = ("CREATE TABLE deposition.dep_values_blr_grid "
       "( "
       "  series_id integer NOT NULL, "
       "  blr integer NOT NULL, "
       "  param_id integer NOT NULL, "
       "  value numeric, "
       "  PRIMARY KEY (series_id, blr, param_id), "
       "  CONSTRAINT series_id_fkey FOREIGN KEY (series_id) "
       "      REFERENCES deposition.dep_series_defs (series_id) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION, "
       "  CONSTRAINT param_id_fkey FOREIGN KEY (param_id) "
       "      REFERENCES deposition.dep_param_defs (param_id) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION "
       ")")
jup_pg_eng.execute(sql)

# Create table for 0.1 degree data
sql = ("CREATE TABLE deposition.dep_values_0_1deg_grid "
       "( "
       "  series_id integer NOT NULL, "
       "  cell_id integer NOT NULL, "
       "  param_id integer NOT NULL, "
       "  value numeric, "
       "  PRIMARY KEY (series_id, cell_id, param_id), "
       "  CONSTRAINT series_id_fkey FOREIGN KEY (series_id) "
       "      REFERENCES deposition.dep_series_defs (series_id) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION, "
       "  CONSTRAINT param_id_fkey FOREIGN KEY (param_id) "
       "      REFERENCES deposition.dep_param_defs (param_id) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION "
       ")")
jup_pg_eng.execute(sql)

<sqlalchemy.engine.result.ResultProxy at 0x7fbd9ca0d438>

#### 2.4.2. Transfer "old" data

The old data using the BLR grid are stored in RESA2 in the table `DEP_BLR_VALUES`. We'll just transfer the data for `param_ids` 1 to 4.

In [15]:
# Read data from RESA
sql = ("SELECT * FROM resa2.dep_blr_values "
       "WHERE parameter_id < 5")
df = pd.read_sql(sql, ora_eng)

# Tidy
df.rename({'dep_series_id':'series_id',
           'parameter_id':'param_id'},
          axis=1,
          inplace=True)

# Write to new db
df.to_sql('dep_values_blr_grid', 
          jup_pg_eng,
          'deposition',
          if_exists='append',
          index=False,
          method='multi',
          chunksize=1000,
         )

#### 2.4.3. Import "new" data

In 2017, NILU supplied raw data for the 0.1 degree grid in `.dat` format. I previously wrote some code ([here](http://nbviewer.jupyter.org/github/JamesSample/critical_loads/blob/master/notebooks/critical_loads_workflow_new_grid.ipynb#1.1.-Upload-new-data-to-database)) to process this data. I have now generalised this and moved it into `critical_loads.py`, along with other useful functions.

In [16]:
# Process NILU data and add to db
nilu_fold = r'../../../data/raw/nilu_dep/2012-2016'
df = cl.upload_nilu_0_1deg_dep_data(nilu_fold, 
                                    jup_pg_eng,
                                    28)
df.head()

207000 new rows added successfully.


Unnamed: 0,cell_id,param_id,value,series_id
0,50050305,2,270.87,28
1,50050315,2,240.5,28
2,50050325,2,259.19,28
3,50050335,2,252.06,28
4,50050345,2,332.82,28


## 3. Vegatation data

### 3.1. Transfer critical loads table for vegetation

The Excel file 

    new_workflow_nov_2018\data\raw\veg_cl_classes\sat_veg_land_use_classes.xlsx
    
contains critical load values for various vegetation classes. The original is in the sheet named `'EUNIS_tilGIS'`, but I have also created a tidied version (with e.g. lower case column names) in the sheet named `'eunis_lower_case'`.

As above, if this table is likely to change, it should be normalised and loaded into the database more carefully. For now, the "flat" Excel format is convenient, but I nevertheless want to store it in the database with the rest of the data.

In [18]:
# Read Excel data
xl_path = r'../../../data/raw/veg_cl_classes/sat_veg_land_use_classes.xlsx'
df = pd.read_excel(xl_path, sheet_name='eunis_lower_case')

# Write to new db
df.to_sql('land_class_crit_lds', 
          jup_pg_eng,
          'vegetation',
          if_exists='replace',
          index=False,
          method='multi',
          chunksize=1000,
         )

# Use 'norut_code' as primary key
sql = ("ALTER TABLE vegetation.land_class_crit_lds "
       "ADD CONSTRAINT veg_land_class_crit_lds_pk "
       "PRIMARY KEY (norut_code)")
jup_pg_eng.execute(sql)

<sqlalchemy.engine.result.ResultProxy at 0x7fbd9bf38860>

### 3.2. Transfer vector vegetation data

Analyses undertaken for the periods 2002-2006 and 2007-2011 used a vector vegetation dataset. From 2012-2016 onwards, this has been replaced by a more detailed raster dataset (Section 3.3, below). The code in this section adds the old vector dataset to PostGIS for future reference.

In [20]:
# Read veg .shp
shp_path = (r'../../../data/vector/nye_2010_talegrenser_veg.shp')
veg_gdf = gpd.read_file(shp_path)

# Tidy
del veg_gdf['ID'], veg_gdf['ID_1'], veg_gdf['GRIDCODE'], veg_gdf['EUNIS_1']
veg_gdf.columns = [i.lower() for i in veg_gdf.columns]

# Reproject to WGS84
veg_gdf = veg_gdf.to_crs({'init':'epsg:4326'})

# Write to new db
# Use PK added by NivaPy
nivapy.da.gdf_to_postgis(veg_gdf, 
                         'vector_veg_pre_2012', 
                         'vegetation', 
                         jup_pg_eng,
                         'vector_veg_pre_2012_spidx',
                         if_exists='replace',
                         index=False,
                         method='multi',
                         chunksize=1000,
                        )

### 3.3. Transfer vegetation grids

The code in Section 1.3 of [this notebook](http://nbviewer.jupyter.org/github/JamesSample/critical_loads/blob/master/notebooks/critical_loads_workflow_new_grid.ipynb#1.3.-Reclassify) converted 30 m resolution satellite vegetation data into a grid of critical loads (based on the Excel table above - Section 3.1). **Unless we update the vegetation data, this calculation does not need to be performed again**.

I have experimented with storing these grids in the PostGIS database, but performance seems poor. Instead, I've added them to the JupyterHub `shared` drive here:

    shared/critical_loads/raster
   
For the vegetation exceedance calculations, the following grids are relevant:

 * `sat_veg_30m_all.tif`
 * `sat_veg_60m_all.tif`
 * `sat_veg_120m_all.tif`
 * `sat_veg_30m_cr_lds_div100.tif`
 * `sat_veg_60m_cr_lds_div100.tif`
 * `blr_land_mask.tif`
 * `blr_land_mask_60m.tif`

### 3.4. Create tables to store exceedances

Summary exceedance statistics for each grid cell are stored in the database in the following tables.

In [21]:
# Delete if already exist
sql = ("DROP TABLE IF EXISTS vegetation.exceedance_stats_blr_grid, "
       "  vegetation.exceedance_stats_0_1deg_grid")
jup_pg_eng.execute(sql)

# Create table for BLR data
sql = ("CREATE TABLE vegetation.exceedance_stats_blr_grid "
       "( "
       "  series_id integer NOT NULL, "
       "  blr integer NOT NULL, "
       "  exceeded_area_km2 numeric, "
       "  total_area_km2 numeric, "
       "  pct_exceeded numeric, "
       "  PRIMARY KEY (series_id, blr), "
       "  CONSTRAINT series_id_fkey FOREIGN KEY (series_id) "
       "      REFERENCES deposition.dep_series_defs (series_id) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION, "
       "  CONSTRAINT blr_fkey FOREIGN KEY (blr) "
       "      REFERENCES deposition.dep_grid_blr (blr) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION "
       ")")
jup_pg_eng.execute(sql)

# Create table for BLR data
sql = ("CREATE TABLE vegetation.exceedance_stats_0_1deg_grid "
       "( "
       "  series_id integer NOT NULL, "
       "  cell_id integer NOT NULL, "
       "  exceeded_area_km2 numeric, "
       "  total_area_km2 numeric, "
       "  pct_exceeded numeric, "
       "  PRIMARY KEY (series_id, cell_id), "
       "  CONSTRAINT series_id_fkey FOREIGN KEY (series_id) "
       "      REFERENCES deposition.dep_series_defs (series_id) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION, "
       "  CONSTRAINT cell_id_fkey FOREIGN KEY (cell_id) "
       "      REFERENCES deposition.dep_grid_0_1deg (cell_id) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION "
       ")")
jup_pg_eng.execute(sql)

# Create table for exceedance per land class data
sql = ("CREATE TABLE vegetation.exceedance_stats_land_class "
       "( "
       "  series_id integer NOT NULL, "
       "  norut_code integer NOT NULL, "
       "  exceeded_area_km2 numeric, "
       "  total_area_km2 numeric, "
       "  pct_exceeded numeric, "
       "  PRIMARY KEY (series_id, norut_code), "
       "  CONSTRAINT series_id_fkey FOREIGN KEY (series_id) "
       "      REFERENCES deposition.dep_series_defs (series_id) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION, "
       "  CONSTRAINT norut_code_fkey FOREIGN KEY (norut_code) "
       "      REFERENCES vegetation.land_class_crit_lds (norut_code) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION "
       ")")
jup_pg_eng.execute(sql)

<sqlalchemy.engine.result.ResultProxy at 0x7fbd9b8be588>

## 4. Water data

### 4.1. MAGIC model output for regression

Kari has a spreadsheet here

    K:\Avdeling\317 Klima- og miljømodellering\KAU\Focal Centre\Data\bc0regresjonNOK_TL2005-rapport_KAU.xls
    
containing output from the MAGIC model. This is used to generate parameters for the calculation of critical loads to water. It is unclear whether this data will be updated in the future. If so, the dataset should be properly normalised before adding to the database. However, for now I'm assuming that this is a static dataset, so I'm just adding the essential data to the database as a "flat" table for the purposes of data storage.

In [24]:
# Read tidied Excel data
xl_path = r'../../../../water/update_nov_2018/regression_data_tidied.xls'
df = pd.read_excel(xl_path, sheet_name='tidied')

# Write to new db
df.to_sql('magic_regression_data', 
          jup_pg_eng,
          'water',
          if_exists='replace',
          index=False,
          method='multi',
          chunksize=1000,
         )

# Use ('resa_stn_id', 'sim_yr') as primary key
sql = ("ALTER TABLE water.magic_regression_data "
       "ADD CONSTRAINT water_magic_regression_data_pk "
       "PRIMARY KEY (resa_stn_id, sim_yr)")
jup_pg_eng.execute(sql)

<sqlalchemy.engine.result.ResultProxy at 0x7fbd9b8c94e0>

### 4.2. Perform regression

In the MAGIC data table, values for `bc_x_k` in 1860 are used to define $BC^*_0$, and values from 1986 are used to define $BC^*$. The regression uses $BC^*$ as the x-variable and $BC^*_0$ as the y-variable.

In [25]:
# Get data
sql = ("SELECT resa_stn_id, sim_yr, bc_x_k "
       "FROM water.magic_regression_data")
df = pd.read_sql(sql, jup_pg_eng)
df.index = df['resa_stn_id']
del df['resa_stn_id']

# Split by year
bc0_df = df.query('sim_yr == 1860')
del bc0_df['sim_yr']
bc0_df.columns = ['BC0']

bc_df = df.query('sim_yr == 1986')
del bc_df['sim_yr']
bc_df.columns = ['BC']

# Join
df = bc0_df.join(bc_df)

# Regression
res = sm.ols(formula='BC0 ~ BC', data=df).fit()

print (res.summary())

                            OLS Regression Results                            
Dep. Variable:                    BC0   R-squared:                       0.985
Model:                            OLS   Adj. R-squared:                  0.985
Method:                 Least Squares   F-statistic:                     5320.
Date:                Mon, 25 Nov 2019   Prob (F-statistic):           1.20e-75
Time:                        12:54:41   Log-Likelihood:                -211.76
No. Observations:                  83   AIC:                             427.5
Df Residuals:                      81   BIC:                             432.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.2744      0.536      0.512      0.6

This is the same result as in Kari's Excel spreadsheet. Note, however, that the intercept is not significantly different from zero. This implies an alternative, "slope-only" model *might* be better.

In [26]:
# Regression
res = sm.ols(formula='BC0 ~ BC - 1', data=df).fit()
print (res.summary())

                                 OLS Regression Results                                
Dep. Variable:                    BC0   R-squared (uncentered):                   0.994
Model:                            OLS   Adj. R-squared (uncentered):              0.994
Method:                 Least Squares   F-statistic:                          1.310e+04
Date:                Mon, 25 Nov 2019   Prob (F-statistic):                    3.12e-92
Time:                        12:54:58   Log-Likelihood:                         -211.89
No. Observations:                  83   AIC:                                      425.8
Df Residuals:                      82   BIC:                                      428.2
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

This model is better (lower AIC and BIC) than the original, and it's also simpler. I therefore propose using

$$BC^*_0 = 0.9481BC^*_t$$

instead of 

$$BC^*_0 = 0.9431BC^*_t + 0.2744$$

## 5. Soils data

### 5.1. Critical Load for S

Critical loads for soil may be updated in the future, but for now I'll transfer the values for S stored in RESA2.TALRGEN_VALUES (with a unit conversion to $mgS/m^2/yr$).

In [None]:
# Delete if already exist
sql = ("DROP TABLE IF EXISTS soil.s_critical_load")
jup_pg_eng.execute(sql)

# Create table for BLR data
sql = ("CREATE TABLE soil.s_critical_load "
       "( "
       "  blr integer PRIMARY KEY, "
       "  cl_mgSpm2 numeric, "
       "  total_area_km2 numeric, "
       "  pct_exceeded numeric, "
       "  PRIMARY KEY (series_id, blr), "
       "  CONSTRAINT blr_fkey FOREIGN KEY (blr) "
       "      REFERENCES deposition.dep_grid_blr (blr) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION "
       ")")

In [None]:
# Get all CLs for S
# in meq/m2/year
sql = ("SELECT blr, xvalue as crit_ld "
       "FROM resa2.talegren_values "
       "WHERE talegren_paramid = 86")

cl_df = pd.read_sql(sql, ora_eng)
cl_df.index = cl_df['blr']
del cl_df['blr']

# Convert to mg-S/m2/yr
cl_df['cl_mgSpm2'] = cl_df['crit_ld']*32.06 / 2.
del cl_df['crit_ld']

# Remove negative CL
cl_df = cl_df.query('cl_mgSpm2 >= 0')
cl_df.reset_index(inplace=True)

# Write to db
df.to_sql('s_critical_load', 
          jup_pg_eng,
          'soil',
          if_exists='append',
          index=False,
          method='multi',
          chunksize=1000,
         )

## 6. Create summary tables

In [None]:
# Create table for overall summary
sql = ("CREATE TABLE summaries.national_summary "
       "( "
       "  series_id integer NOT NULL, "
       "  medium varchar NOT NULL, "
       "  total_area_km2 numeric, "
       "  exceeded_area_km2 numeric, "
       "  exceeded_area_pct numeric, "
       "  PRIMARY KEY (series_id, medium), "
       "  CONSTRAINT series_id_fkey FOREIGN KEY (series_id) "
       "      REFERENCES deposition.dep_series_defs (series_id) "
       "      ON UPDATE NO ACTION ON DELETE NO ACTION "
       ")")
doc_pg_eng.execute(sql)