In [1]:
%matplotlib inline
import pandas as pd
import geopandas as gpd
import nivapy3 as nivapy
import numpy as np
import critical_loads as cl
import matplotlib.pyplot as plt
import os
import glob
from shapely.geometry import Polygon

plt.style.use("ggplot")

# Process and upload EMEP deposition data

This notebook processes deposition data from EMEP on a 0.5 x 0.25 degree grid and adds it to the database. See the issue [here](https://github.com/JamesSample/critical_loads_2/issues/4) for details.

In [2]:
# Connect to PostGIS
eng = nivapy.da.connect_postgis(database="critical_loads", admin=True)

Username:  ········
Password:  ·······


Connection successful.


In [3]:
# Get a list of files to process
data_fold = r"/home/jovyan/shared/critical_loads/emep_data"
search_path = os.path.join(data_fold, "*.NO")
flist = glob.glob(search_path)
flist

['/home/jovyan/shared/critical_loads/emep_data/N_LoLa_a.NO',
 '/home/jovyan/shared/critical_loads/emep_data/S_LoLa_a.NO',
 '/home/jovyan/shared/critical_loads/emep_data/N_LoLa_f.NO',
 '/home/jovyan/shared/critical_loads/emep_data/A_LoLa_f.NO',
 '/home/jovyan/shared/critical_loads/emep_data/A_LoLa_a.NO',
 '/home/jovyan/shared/critical_loads/emep_data/A_LoLa_v.NO',
 '/home/jovyan/shared/critical_loads/emep_data/S_LoLa_v.NO',
 '/home/jovyan/shared/critical_loads/emep_data/S_LoLa_f.NO',
 '/home/jovyan/shared/critical_loads/emep_data/N_LoLa_v.NO']

## 1. Build vector grid for EMEP data

In [4]:
def build_rectangle(row):
    """ Creates a vector polygon with a cell width of 0.5 degrees and a height of 
        0.25 degrees, based on co-ordinates for the cell centre.
    """
    coords = (
        (row["lon"] - 0.25, row["lat"] - 0.125),
        (row["lon"] + 0.25, row["lat"] - 0.125),
        (row["lon"] + 0.25, row["lat"] + 0.125),
        (row["lon"] - 0.25, row["lat"] + 0.125),
        (row["lon"] - 0.25, row["lat"] - 0.125),
    )

    return Polygon(coords)

In [5]:
# Build grid from single file
df = pd.read_csv(flist[0], skiprows=3)
df.rename({"!Lon": "lon_ll"}, axis=1, inplace=True)
df.rename({"Lat": "lat_ll"}, axis=1, inplace=True)
df.columns = [i.strip().lower() for i in df.columns]

# Calculate cell centres
df['lon'] = df['lon_ll'] + 0.25
df['lat'] = df['lat_ll'] + 0.125

# Build unique cell_id from centre co-ords
df['cell_id'] = ((1000*df['lat']).astype(int).astype(str).str.zfill(5) + 
                 (1000*df['lon']).astype(int).astype(str).str.zfill(5))
df['cell_id'] = df['cell_id'].astype(int)

# Convert lower-left coords to polys
df["geom"] = df.apply(build_rectangle, axis=1)
gdf = gpd.GeoDataFrame(df, geometry="geom", crs={"init": "epsg:4326"})

# Tidy
gdf = gdf[['cell_id', 'lat', 'lon', 'lat_ll', 'lon_ll', 'geom']]
gdf.head()

  return _prepare_from_string(" ".join(pjargs))


Unnamed: 0,cell_id,lat,lon,lat_ll,lon_ll,geom
0,5712504250,57.125,4.25,57.0,4.0,"POLYGON ((4.00000 57.00000, 4.50000 57.00000, ..."
1,5712504750,57.125,4.75,57.0,4.5,"POLYGON ((4.50000 57.00000, 5.00000 57.00000, ..."
2,5712505250,57.125,5.25,57.0,5.0,"POLYGON ((5.00000 57.00000, 5.50000 57.00000, ..."
3,5712505750,57.125,5.75,57.0,5.5,"POLYGON ((5.50000 57.00000, 6.00000 57.00000, ..."
4,5712506250,57.125,6.25,57.0,6.0,"POLYGON ((6.00000 57.00000, 6.50000 57.00000, ..."


In [None]:
## Write to new db
#nivapy.da.gdf_to_postgis(
#    gdf,
#    "dep_grid_emep",
#    "deposition",
#    eng,
#    "dep_dep_grid_emep_spidx",
#    if_exists="replace",
#    index=False,
#    method="multi",
#    chunksize=1000,
#)
#
##  Drop primary key col added automatically by NivaPy
#sql = "ALTER TABLE deposition.dep_grid_emep DROP COLUMN id"
#eng.execute(sql)
#
## Use 'cell_id' col as primary key
#sql = (
#    "ALTER TABLE deposition.dep_grid_emep "
#    "ADD CONSTRAINT dep_grid_emep_pk "
#    "PRIMARY KEY (cell_id)"
#)
#eng.execute(sql)
#
## Add column for area
#sql = ("ALTER TABLE deposition.dep_grid_emep " 
#       "ADD COLUMN area_m2 numeric")
#eng.execute(sql)
#
#sql = ("UPDATE deposition.dep_grid_emep " 
#       "SET area_m2 = ST_Area(geom::geography)")
#eng.execute(sql)

## 2. Create tables for EMEP deposition values

### 2.1. Vegetation classes

The EMEP data include three values for each grid cell: one for "forest", one for "semi-natural" and one for "grid-average". I'll add a separate table to store these options.

In [6]:
# Table of EMEP veg classes
veg_dict = {'veg_class_id': [1, 2, 3],
            'veg_class_name':['Grid average', 'Forest', 'Semi-natural']
           }
df = pd.DataFrame(veg_dict)
df

Unnamed: 0,veg_class_id,veg_class_name
0,1,Grid average
1,2,Forest
2,3,Semi-natural


In [None]:
## Write to new db
#df.to_sql('dep_emep_veg_defs', 
#          eng,
#          'deposition',
#          if_exists='replace',
#          index=False,
#         )
#
## Use 'dep_series_id' col as primary key
#sql = ("ALTER TABLE deposition.dep_emep_veg_defs "
#       "ADD CONSTRAINT dep_emep_veg_defs_pk "
#       "PRIMARY KEY (veg_class_id)")
#eng.execute(sql)

### 2.2. Deposition values

In [7]:
## Delete if already exist
#sql = ("DROP TABLE IF EXISTS deposition.dep_values_emep_grid")
#eng.execute(sql)
#
## Create table for EMEP data
#sql = ("CREATE TABLE deposition.dep_values_emep_grid "
#       "( "
#       "  series_id integer NOT NULL, "
#       "  cell_id bigint NOT NULL, "
#       "  param_id integer NOT NULL, "
#       "  veg_class_id integer NOT NULL, "
#       "  value numeric, "
#       "  PRIMARY KEY (series_id, cell_id, param_id, veg_class_id), "
#       "  CONSTRAINT series_id_fkey FOREIGN KEY (series_id) "
#       "      REFERENCES deposition.dep_series_defs (series_id) "
#       "      ON UPDATE NO ACTION ON DELETE NO ACTION, "
#       "  CONSTRAINT cell_id_fkey FOREIGN KEY (cell_id) "
#       "      REFERENCES deposition.dep_grid_emep (cell_id) "
#       "      ON UPDATE NO ACTION ON DELETE NO ACTION, "
#       "  CONSTRAINT param_id_fkey FOREIGN KEY (param_id) "
#       "      REFERENCES deposition.dep_param_defs (param_id) "
#       "      ON UPDATE NO ACTION ON DELETE NO ACTION, "
#       "  CONSTRAINT veg_class_id_fkey FOREIGN KEY (veg_class_id) "
#       "      REFERENCES deposition.dep_emep_veg_defs (veg_class_id) "
#       "      ON UPDATE NO ACTION ON DELETE NO ACTION "
#       ")")
#eng.execute(sql)

<sqlalchemy.engine.result.ResultProxy at 0x7f3ac4583bd0>

## 3. Add deposition values

### 3.1. Combine all data

In [8]:
# Dict mapping EMEP pars to deposition.dep_param_defs
par_dict = {'N':1,
            'A':2,
            'S':4}

# Dict mapping EMEP veg classes to deposition.dep_emep_veg_defs
veg_dict = {'a':1,
            'f':2,
            'v':3}

# Concatenate all data
df_list = []
for fpath in flist:
    # Read data
    df = pd.read_csv(fpath, skiprows=3)
    df.rename({"!Lon": "lon"}, axis=1, inplace=True)
    df.columns = [i.strip().lower() for i in df.columns]
    
    # Calculate cell centres
    df['lon'] = df['lon'] + 0.25
    df['lat'] = df['lat'] + 0.125

    # Build cell_id from centre co-ords
    df['cell_id'] = ((1000*df['lat']).astype(int).astype(str).str.zfill(5) + 
                     (1000*df['lon']).astype(int).astype(str).str.zfill(5))
    df['cell_id'] = df['cell_id'].astype(int)    
    del df['lon'], df['lat']
    
    # Add par and veg IDs
    name = os.path.split(fpath)[1][:-3]    
    df['param_id'] = par_dict[name[0]]
    df['veg_class_id'] = veg_dict[name[-1]]
    
    df_list.append(df)
    
# Concatenate data
df = pd.concat(df_list)

# Set -1 to NaN
df[df==-1] = np.nan

df.head()

Unnamed: 0,1880,1885,1890,1895,1900,1905,1910,1915,1920,1925,...,1990,1995,2000,2005,2010,2020,2030,cell_id,param_id,veg_class_id
0,,,,,,,,,,,...,,,,,,174.3,137.6,5712504250,1,1
1,,,,,,,,,,,...,,,,,,174.9,138.4,5712504750,1,1
2,,,,,,,,,,,...,,,,,,180.0,143.2,5712505250,1,1
3,,,,,,,,,,,...,,,,,,190.1,152.1,5712505750,1,1
4,,,,,,,,,,,...,,,,,,196.1,157.4,5712506250,1,1


### 3.2. Add to database

Each time period corresponds to a data series in `deposition.dep_series_defs`.

In [None]:
## Add series to db
#series_list = list(range(1880, 2011, 5)) + [2020, 2030]
#for idx, series in enumerate(series_list):
#    # Start numbering from 31
#    series_id = 31 + idx
#    long_name = f'EMEP {series}'
#    short_name = f'emep{series}'
#    grid = 'emep'
#    desc = f'{series} data based on the EMEP 0.50x0.25 degree grid. 3 vegetation classes. Extracted by Max Posch, received Jan 2016',
#
#    # Add to table
#    cl.add_dep_series(series_id, 
#                      long_name,
#                      short_name,
#                      grid,
#                      desc,   
#                      eng,
#                     )   

In [11]:
## Add values
#series_list = list(range(1880, 2011, 5)) + [2020, 2030]
#for idx, series in enumerate(series_list):
#    # Start numbering from 31
#    series_id = 31 + idx
#    series = str(series)
#
#    # Get data
#    val_df = df[['cell_id', 'param_id', 'veg_class_id', series]].copy()
#    val_df.dropna(how='any', inplace=True)
#    val_df['series_id'] = series_id
#    val_df.rename({series:'value'}, axis=1, inplace=True)
#    val_df = val_df[['series_id', 'cell_id', 'param_id', 'veg_class_id', 'value']]
#    
#    # Add to db
#    val_df.to_sql('dep_values_emep_grid',
#                  eng,
#                  schema='deposition',
#                  if_exists='append',
#                  index=False,
#                  method="multi",
#                  chunksize=1000,
#                 )