In [1]:
import pandas as pd
import numpy as np
from openpyxl import load_workbook

# RID OSPAR Reporting Template

The final step in the annual RID reporting is to complete and submit an overall summary template to OSPAR. A blank template and guideline documentation were provided by Eva (see e-mail received 04/09/2017 at 13.07) and the 2015 template created by Tore is here:

K:\Avdeling\Vass\316_Miljøinformatikk\Prosjekter\RID\2016\OSPAR\Norway_2015.xlsx

**Note:** Tore previous calculated "upper" load estimates by setting LOD values equal to the LOD itself, and "lower" estimates by setting LOD values equal to zero. This year we have handled LOD values more explicity, by calculating a single estimate as described [here](http://nbviewer.jupyter.org/github/JamesSample/rid/blob/master/notebooks/rid_data_exploration.ipynb#2.3.-Calculate-loads). We therefore only need to fill-in entires for the "mean" rows.

Most of the information required for the template has already been reported in other tables, especially the following:

 * *Table_3_2016.docx* <br><br>
 
 * *loads_and_flows_all_sites_2016.csv* <br><br>
 
 * *concs_and_flows_rid_11-36_2016.csv*

However, some information is not available from these tables, and in previous years Tore has simply left these cells blank. The code here does the same, although in principle we could fill-in the whole thing with a bit more work.

## 1. Get summary data

The notebook [here](http://nbviewer.jupyter.org/github/JamesSample/rid/blob/master/notebooks/summary_table_2016.ipynb) calculates summary values for Table 3. Rather than reading these values from Word, it is easier to repeat the code to summarise the raw data again. This is the data that needs writing to the OSPAR template.

### 1.1. Monitored areas

In [2]:
# Read data
in_csv = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
          r'\Results\Loads_CSVs\loads_and_flows_all_sites_2016.csv')
mon_df = pd.read_csv(in_csv)

# Group by OSPAR region
mon_df1 = mon_df.groupby(['ospar_region', 'rid_group']).sum()

# Totals for Norway
mon_df2 = mon_df.groupby('rid_group').sum().reset_index()
mon_df2['ospar_region'] = 'NORWAY'
mon_df2.set_index(['ospar_region', 'rid_group'], inplace=True)

# Combine
mon_df = pd.concat([mon_df1, mon_df2], axis=0)

# Cols of interest
cols = [i for i in mon_df.columns if i.split('_')[1] != 'Est']
mon_df = mon_df[cols]
del mon_df['station_id'], mon_df['mean_q_1000m3/day']

# Convert units
mon_df['Hg_kg'] = mon_df['Hg_kg']/1000. # kg to tonnes
mon_df['NH4-N_tonnes'] = mon_df['NH4-N_tonnes']/1000. # tonnes to ktonnes
mon_df['NO3-N_tonnes'] = mon_df['NO3-N_tonnes']/1000. # tonnes to ktonnes
mon_df['TOTN_tonnes'] = mon_df['TOTN_tonnes']/1000. # tonnes to ktonnes
mon_df['TOTP_tonnes'] = mon_df['TOTP_tonnes']/1000. # tonnes to ktonnes
mon_df['PO4-P_tonnes'] = mon_df['PO4-P_tonnes']/1000. # tonnes to ktonnes
mon_df['SPM_tonnes'] = mon_df['SPM_tonnes']/1000. # tonnes to ktonnes

# Units are correct, so remove
mon_df.columns = [i.split('_')[0] for i in mon_df.columns]

mon_df.round(0)

  result = _values_from_object(self).round(decimals)


Unnamed: 0_level_0,Unnamed: 1_level_0,Ag,As,Cd,Cr,Cu,Hg,NH4-N,NO3-N,Ni,PO4-P,Pb,SPM,SiO2,TOC,TOTN,TOTP,Zn
ospar_region,rid_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
LOFOTEN-BARENTS SEA,rid_108,,1.0,0.0,1.0,6.0,0.0,0.0,0.0,5.0,0.0,0.0,9.0,21610.0,23194.0,1.0,0.0,6.0
LOFOTEN-BARENTS SEA,rid_11,0.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,7.0,16126.0,15159.0,1.0,0.0,1.0
LOFOTEN-BARENTS SEA,rid_36,0.0,2.0,0.0,4.0,33.0,0.0,0.0,1.0,155.0,0.0,1.0,74.0,80641.0,67688.0,6.0,0.0,16.0
NORTH SEA,rid_108,,1.0,0.0,1.0,10.0,0.0,0.0,3.0,7.0,0.0,3.0,25.0,27778.0,27976.0,5.0,0.0,29.0
NORTH SEA,rid_11,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,4.0,3056.0,4317.0,1.0,0.0,6.0
NORTH SEA,rid_36,0.0,2.0,0.0,3.0,11.0,0.0,0.0,4.0,5.0,0.0,5.0,68.0,32823.0,44560.0,7.0,0.0,86.0
NORWEGIAN SEA2,rid_108,,1.0,0.0,1.0,5.0,0.0,0.0,1.0,4.0,0.0,1.0,22.0,18374.0,20205.0,2.0,0.0,8.0
NORWEGIAN SEA2,rid_11,0.0,1.0,0.0,2.0,9.0,0.0,0.0,0.0,4.0,0.0,1.0,36.0,10365.0,13856.0,1.0,0.0,18.0
NORWEGIAN SEA2,rid_36,0.0,2.0,0.0,7.0,18.0,0.0,0.0,2.0,16.0,0.0,2.0,88.0,46938.0,63117.0,5.0,0.0,35.0
SKAGERAK,rid_108,,1.0,0.0,1.0,2.0,0.0,0.0,1.0,2.0,0.0,1.0,11.0,8392.0,13975.0,2.0,0.0,17.0


### 1.2. Unmonitored areas

In [3]:
# Read data
in_csv = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
          r'\Results\Unmon_loads\unmon_loads_2016.csv')
umon_df = pd.read_csv(in_csv, index_col=0)

# Rename cols
umon_df.columns = [i.replace('RENSEANLEGG', 'sew') for i in umon_df.columns]
umon_df.columns = [i.replace('INDUSTRI', 'ind') for i in umon_df.columns]
umon_df.columns = [i.replace('_tonn', '') for i in umon_df.columns]
umon_df.columns = [i.replace('AQUAKULTUR', 'fish') for i in umon_df.columns]

# Convert Hg to kgs
umon_df['sew_Hg'] = umon_df['sew_Hg']*1000
umon_df['ind_Hg'] = umon_df['ind_Hg']*1000

umon_df.round(0)

Unnamed: 0_level_0,flow,sew_n,sew_p,ind_n,ind_p,fish_n,fish_p,diff_n,diff_p,sew_po4,...,sew_S.P.M.,sew_As,sew_Pb,sew_Cd,sew_Cu,sew_Zn,sew_Ni,sew_Cr,sew_Hg,fish_Cu
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NORWAY,408822.0,13615.0,1128.0,2354.0,246.0,56691.0,9781.0,34781.0,711.0,677.0,...,1927.0,0.0,0.0,0.0,4.0,12.0,2.0,1.0,5.0,1088.0
LOFOTEN-BARENTS SEA,104554.0,1226.0,177.0,83.0,6.0,14141.0,2435.0,4980.0,102.0,106.0,...,120.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,271.0
NORTH SEA,151022.0,3714.0,440.0,412.0,90.0,19561.0,3370.0,14505.0,239.0,264.0,...,912.0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,1.0,375.0
NORWEGIAN SEA2,143758.0,2828.0,392.0,984.0,109.0,22964.0,3971.0,12382.0,281.0,235.0,...,891.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,442.0
SKAGERAK,9488.0,5847.0,120.0,875.0,41.0,25.0,4.0,2915.0,88.0,72.0,...,4.0,0.0,0.0,0.0,2.0,8.0,1.0,0.0,4.0,0.0


### 1.3. Loads for 11 main rivers

In [4]:
# Read data
in_csv = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
          r'\Results\Loads_CSVs\loads_and_flows_all_sites_2016.csv')
rid11_df = pd.read_csv(in_csv, index_col=0)

# Get data for RID11
rid11_df = rid11_df.query('rid_group == "rid_11"')

# Tidy
del rid11_df['station_code'], rid11_df['station_name']
del rid11_df['rid_group'], rid11_df['ospar_region']
del rid11_df['mean_q_1000m3/day']

cols = [i for i in rid11_df.columns if i.split('_')[1] != 'Est']
rid11_df = rid11_df[cols]

# Convert units
rid11_df['Hg_kg'] = rid11_df['Hg_kg']/1000. # kg to tonnes
rid11_df['NH4-N_tonnes'] = rid11_df['NH4-N_tonnes']/1000. # tonnes to ktonnes
rid11_df['NO3-N_tonnes'] = rid11_df['NO3-N_tonnes']/1000. # tonnes to ktonnes
rid11_df['TOTN_tonnes'] = rid11_df['TOTN_tonnes']/1000. # tonnes to ktonnes
rid11_df['TOTP_tonnes'] = rid11_df['TOTP_tonnes']/1000. # tonnes to ktonnes
rid11_df['PO4-P_tonnes'] = rid11_df['PO4-P_tonnes']/1000. # tonnes to ktonnes
rid11_df['SPM_tonnes'] = rid11_df['SPM_tonnes']/1000. # tonnes to ktonnes

# Tidy cols
rid11_df.columns = [i.split('_')[0] for i in rid11_df.columns]

rid11_df.head()

Unnamed: 0_level_0,Ag,As,Cd,Cr,Cu,Hg,NH4-N,NO3-N,Ni,PO4-P,Pb,SPM,SiO2,TOC,TOTN,TOTP,Zn
station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
29615,0.01264,0.542399,0.040416,0.504768,2.085558,0.005182,0.101822,0.593809,1.208246,0.011542,0.659271,9.488567,12561.595742,14398.932184,1.352479,0.024767,10.081694
29821,0.001524,0.188088,0.017968,0.171686,1.212047,0.001501,0.019402,0.267407,0.796592,0.00188,0.191949,1.599127,2757.94622,3367.321686,0.571634,0.00889,5.824033
29783,0.000303,0.049713,0.002911,0.029957,0.26799,0.000152,0.006301,0.083181,0.189997,0.004126,0.075422,1.930079,297.674203,949.895914,0.224112,0.011946,0.578711
29613,0.014897,0.909499,0.083091,0.550997,3.726589,0.002946,0.068659,1.050333,1.755557,0.004187,0.878968,5.616309,19041.63404,22343.958824,2.254362,0.036577,30.338304
29614,0.010146,0.5381,0.083449,0.367309,2.216532,0.012647,0.043319,0.374562,2.078401,0.003149,1.098062,4.119449,6992.435227,12605.409378,1.03046,0.014647,14.547223


## 2. Fill-in template

A copy of the 2016 template from Eva is here:

C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet\Results\OSPAR_Template\01_OSPAR_Norway_2016.xlsx

In [5]:
# Copy of template to update
temp_path = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
             r'\Results\OSPAR_Template\01_OSPAR_Norway_2016.xlsx')

In [6]:
def update_spreadsheet_point_sources(xlsx, sheet, pars, src, df):
    """ Update the OSPAR template for point source data.
    
    Args:
        xslx:  Str. Path to Excel template
        sheet: Str. Sheet name to update
        pars:  List. Parameter names in template to fill-in
        src:   Str. Type of input ('sew', 'ind', 'fish')
        df:    Dataframe. Values to fill-in
        
    Returns:
        None. The template is updated and saved.
    """
    import pandas as pd
    from openpyxl import load_workbook
    
    # Map Excel headings to df cols
    par_dict = {'SPM':'S.P.M.',
                'TOC':'TOC',
                'PO4-P':'po4',
                'P-Total':'p',
                'NO3-N':'no3', 
                'NH4-N':'nh4',
                'N-Total':'n',
                'As':'As',
                'Pb':'Pb',
                'Cd':'Cd', 
                'Cu':'Cu',
                'Zn':'Zn',
                'Ni':'Ni',
                'Total Cr':'Cr',
                'Hg':'Hg'}

    # Map template names to df names
    names_dict = {'Norwegian Sea (NO)':'NORWEGIAN SEA2',
                  'Barents Sea (NO)':'LOFOTEN-BARENTS SEA',
                  'Skagerrak (NO)':'SKAGERAK',
                  'North Sea (NO)':'NORTH SEA',
                  'Norway Total':'NORWAY'}

    # Open new file and get sheet
    wb = load_workbook(filename=xlsx)
    ws = wb[sheet]

    # Get row numbers
    row_dict = {}
    for item in ws['B12':'B%s' % ws.max_row]:
        # Get cell properties
        cell = item[0]
        name = cell.value
        row = cell.row
        row_dict[name] = row

    # Get col numbers
    col_dict = {}
    for cell in ws['E9':'AK9'][0]:
        # Get cell properties
        par = cell.value
        col = cell.column
        col_dict[par] = col

    # Update spreadsheet
    for reg in names_dict.keys():
        for par in pars:
            # Get value from df
            val = df.ix[names_dict[reg], '%s_%s' % (src, par_dict[par])]

            # Get cell co-ords
            row = row_dict[reg] + 2
            col = col_dict[par]

            # Write value
            ws['%s%s' % (col, row)] = val

    # Save
    wb.save(xlsx)

### 2.1. Sheet 5a: Sewage effluents

Note these are the values from Table 3 for **unmonitored areas** and not the total sewage inputs for each of the OSPAR areas. This is the same as what Tore reported previously.

In [7]:
# Get Sewage data
cols = [i for i in umon_df.columns if i.split('_')[0]=='sew']
sew_df = umon_df[cols].copy()

# Convert units
sew_df['sew_Hg'] = sew_df['sew_Hg']/1000. # kg to tonnes
sew_df['sew_nh4'] = sew_df['sew_nh4']/1000. # tonnes to ktonnes
sew_df['sew_no3'] = sew_df['sew_no3']/1000. # tonnes to ktonnes
sew_df['sew_n'] = sew_df['sew_n']/1000. # tonnes to ktonnes
sew_df['sew_po4'] = sew_df['sew_po4']/1000. # tonnes to ktonnes
sew_df['sew_p'] = sew_df['sew_p']/1000. # tonnes to ktonnes
sew_df['sew_S.P.M.'] = sew_df['sew_S.P.M.']/1000. # tonnes to ktonnes

sew_df

Unnamed: 0_level_0,sew_n,sew_p,sew_po4,sew_no3,sew_nh4,sew_S.P.M.,sew_As,sew_Pb,sew_Cd,sew_Cu,sew_Zn,sew_Ni,sew_Cr,sew_Hg
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
NORWAY,13.61495,1.128469,0.677081,0.680747,10.211212,1.926923,0.236261,0.245359,0.014051,3.542174,11.783144,1.655259,0.5493,0.00521
LOFOTEN-BARENTS SEA,1.226449,0.176538,0.105923,0.061322,0.919837,0.1198,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NORTH SEA,3.713773,0.439909,0.263946,0.185689,2.78533,0.91215,0.069973,0.074835,0.00366,0.603395,2.545939,0.43992,0.013323,0.001028
NORWEGIAN SEA2,2.828139,0.391982,0.235189,0.141407,2.121104,0.89063,0.03341,0.04545,0.00144,0.86721,1.59821,0.16472,0.32894,0.00036
SKAGERAK,5.846588,0.12004,0.072024,0.292329,4.384941,0.004343,0.132878,0.125074,0.008951,2.071569,7.638995,1.050619,0.207037,0.003822


In [8]:
# Update sheet 5a
pars = ['Ni', 'Pb', 'NH4-N', 'Total Cr', 'NO3-N', 
        'Zn', 'As', 'Cd', 'P-Total', 'SPM', 'PO4-P', 
        'N-Total', 'Hg', 'Cu']

update_spreadsheet_point_sources(temp_path, '5a', pars, 'sew', sew_df)

### 2.2. Sheet 5b: Industrial effluents

In [9]:
# Get industrial data
cols = [i for i in umon_df.columns if i.split('_')[0]=='ind']
ind_df = umon_df[cols].copy()

# Convert units
ind_df['ind_Hg'] = ind_df['ind_Hg']/1000. # kg to tonnes
ind_df['ind_nh4'] = ind_df['ind_nh4']/1000. # tonnes to ktonnes
ind_df['ind_no3'] = ind_df['ind_no3']/1000. # tonnes to ktonnes
ind_df['ind_n'] = ind_df['ind_n']/1000. # tonnes to ktonnes
ind_df['ind_po4'] = ind_df['ind_po4']/1000. # tonnes to ktonnes
ind_df['ind_p'] = ind_df['ind_p']/1000. # tonnes to ktonnes
ind_df['ind_S.P.M.'] = ind_df['ind_S.P.M.']/1000. # tonnes to ktonnes

ind_df

Unnamed: 0_level_0,ind_n,ind_p,ind_po4,ind_no3,ind_nh4,ind_S.P.M.,ind_TOC,ind_As,ind_Pb,ind_Cd,ind_Cu,ind_Zn,ind_Ni,ind_Cr,ind_Hg
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
NORWAY,2.354008,0.246063,0.147638,0.1177,1.765506,15.609737,954.6953,2.143993,0.959181,0.085454,5.923683,12.906247,5.874748,1.370978,0.008167
LOFOTEN-BARENTS SEA,0.08327,0.006071,0.003643,0.004163,0.062453,4.969425,268.3387,9.7e-05,0.000202,6e-06,0.000716,0.000837,0.000304,0.000306,2e-06
NORTH SEA,0.412077,0.089922,0.053953,0.020604,0.309058,7.328272,472.8335,1.770449,0.566488,0.054544,0.505626,5.356038,3.510515,0.495282,0.003251
NORWEGIAN SEA2,0.983546,0.109187,0.065512,0.049177,0.73766,2.205135,142.7514,0.009915,0.086982,0.005615,0.191182,0.12512,0.117004,0.035974,0.000577
SKAGERAK,0.875114,0.040884,0.02453,0.043756,0.656336,1.106904,70.7717,0.363532,0.305509,0.025289,5.226159,7.424251,2.246925,0.839415,0.004337


In [10]:
# Update sheet 5b
pars = ['Ni', 'Pb', 'NH4-N', 'Total Cr', 'NO3-N', 
        'Zn', 'As', 'Cd', 'P-Total', 'SPM', 'PO4-P', 
        'N-Total', 'Hg', 'Cu', 'TOC']

update_spreadsheet_point_sources(temp_path, '5b', pars, 'ind', ind_df)

### 2.3. Sheet 5c: Aquaculture discharges

In [11]:
# Get fish data
cols = [i for i in umon_df.columns if i.split('_')[0]=='fish']
fish_df = umon_df[cols].copy()

# Convert units
fish_df['fish_nh4'] = fish_df['fish_nh4']/1000. # tonnes to ktonnes
fish_df['fish_no3'] = fish_df['fish_no3']/1000. # tonnes to ktonnes
fish_df['fish_n'] = fish_df['fish_n']/1000. # tonnes to ktonnes
fish_df['fish_po4'] = fish_df['fish_po4']/1000. # tonnes to ktonnes
fish_df['fish_p'] = fish_df['fish_p']/1000. # tonnes to ktonnes

fish_df

Unnamed: 0_level_0,fish_n,fish_p,fish_po4,fish_no3,fish_nh4,fish_Cu
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
NORWAY,56.690666,9.780734,6.748706,6.235973,45.352533,1088.0
LOFOTEN-BARENTS SEA,14.140779,2.435102,1.680221,1.555486,11.312623,270.832318
NORTH SEA,19.560984,3.3701,2.325369,2.151708,15.648787,374.822795
NORWEGIAN SEA2,22.963896,3.971308,2.740203,2.526029,18.371117,441.875212
SKAGERAK,0.025007,0.004223,0.002914,0.002751,0.020005,0.469675


In [12]:
# Update sheet 5c
pars = ['NH4-N', 'NO3-N', 'P-Total', 'PO4-P', 'N-Total', 'Cu']

update_spreadsheet_point_sources(temp_path, '5c', pars, 'fish', fish_df)

### 2.4. Sheet 5d: Other discharges

This sheet is left blank

### 2.5. Sheet 5e: Total direct discharges

The sum of sewage, industrial and fish-farm discharges.

In [13]:
# Combine sew, ind and fish, then aggregate
for df in [sew_df, ind_df, fish_df]:
    df.reset_index(inplace=True)
    df.columns = [i.split('_')[1] for i in df.columns]

td_df = pd.concat([sew_df, ind_df, fish_df], axis=0)
td_df = td_df.groupby('region').sum()

td_df.columns = ['td_'+i for i in td_df.columns]

td_df

Unnamed: 0_level_0,td_As,td_Cd,td_Cr,td_Cu,td_Hg,td_Ni,td_Pb,td_S.P.M.,td_TOC,td_Zn,td_n,td_nh4,td_no3,td_p,td_po4
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
LOFOTEN-BARENTS SEA,9.7e-05,6e-06,0.000306,270.833034,2e-06,0.000304,0.000202,5.089225,268.3387,0.000837,15.450499,12.294913,1.620972,2.617711,1.789786
NORTH SEA,1.840422,0.058204,0.508605,375.931816,0.004279,3.950435,0.641323,8.240422,472.8335,7.901977,23.686834,18.743175,2.358001,3.89993,2.643267
NORWAY,2.380254,0.099505,1.920278,1097.465857,0.013377,7.530007,1.20454,17.536659,954.6953,24.689391,72.659623,57.329251,7.034421,11.155265,7.573425
NORWEGIAN SEA2,0.043325,0.007055,0.364914,442.933604,0.000937,0.281724,0.132432,3.095765,142.7514,1.72333,26.775581,21.229881,2.716613,4.472477,3.040904
SKAGERAK,0.49641,0.03424,1.046452,7.767403,0.008159,3.297544,0.430583,1.111247,70.7717,15.063246,6.746709,5.061282,0.338836,0.165147,0.099468


In [14]:
# Update sheet 5e
pars = ['Ni', 'Pb', 'NH4-N', 'Total Cr', 'NO3-N', 
        'Zn', 'As', 'Cd', 'P-Total', 'SPM', 'PO4-P', 
        'N-Total', 'Hg', 'Cu', 'TOC']

update_spreadsheet_point_sources(temp_path, '5e', pars, 'td', td_df)

### 2.6. Sheet 6a: Monitored rivers

**Does "Inner Oslofjord" in the template correspond to "Alna"?** If so, I can fill-in one additional row in this table.

In [15]:
mon_df.reset_index(inplace=True)
tot_df = mon_df.groupby('ospar_region').sum()
trib_df = mon_df[mon_df['rid_group']!='rid_11'].groupby('ospar_region').sum()

In [16]:
def update_spreadsheet_monitored_rivers(xlsx, sheet, pars, df_dict):
    """ Update the OSPAR template for monitored rivers.
    
    Args:
        xslx:    Str. Path to Excel template
        sheet:   Str. Sheet name to update
        pars:    List. Parameter names in template to fill-in
        src:     Str. Type of input ('sew', 'ind', 'fish')
        df_dict: Dict. {'tot':tot_df, 'trib':trib_df, 'main':rid11_df}
                 Values to fill-in
        
    Returns:
        None. The template is updated and saved.
    """
    import pandas as pd
    from openpyxl import load_workbook

    # Map Excel headings to df cols
    par_dict = {'SPM':'SPM',
                'TOC':'TOC',
                'PO4-P':'PO4-P',
                'P-Total':'TOTP',
                'NO3-N':'NO3-N', 
                'NH4-N':'NH4-N',
                'N-Total':'TOTN',
                'As':'As',
                'Pb':'Pb',
                'Cd':'Cd', 
                'Cu':'Cu',
                'Zn':'Zn',
                'Ni':'Ni',
                'Total Cr':'Cr',
                'Hg':'Hg'}
  
    # Map template names to df names and rows
    names_dict = {'Norwegian Sea (NO)':('NORWEGIAN SEA2', 'tot'),
                  'Barents Sea (NO)':('LOFOTEN-BARENTS SEA', 'tot'),
                  'Skagerrak (NO)':('SKAGERAK', 'tot'),
                  'North Sea (NO)':('NORTH SEA', 'tot'),
                  'Norway Total':('NORWAY', 'tot'),
                  'Tributary Rivers - Norwegian Sea':('NORWEGIAN SEA2', 'trib'),
                  'Tributary Rivers - Barents Sea':('LOFOTEN-BARENTS SEA', 'trib'),
                  'Tributary Rivers - Skagerak':('SKAGERAK', 'trib'),
                  'Tributary Rivers - North Sea':('NORTH SEA', 'trib'),
                  'Orkla':(29778, 'main'),
                  'Vefsna':(29782, 'main'),
                  'Alta':(29779, 'main'),
                  'Glomma':(29617, 'main'),
                  'Drammenselva':(29612, 'main'),
                  u'Numedalslågen':(29615, 'main'),
                  'Skienselva':(29613, 'main'),
                  'Otra':(29614, 'main'),
                  'Orreelva':(29783, 'main'),
                  'Vosso':(29821, 'main')}

    # Open new file and get sheet
    wb = load_workbook(filename=xlsx)
    ws = wb[sheet]

    # Get row numbers
    row_dict = {}
    for item in ws['B12':'B%s' % ws.max_row]:
        # Get cell properties
        cell = item[0]
        name = cell.value
        row = cell.row
        row_dict[name] = row

    # Get col numbers
    col_dict = {}
    for cell in ws['E9':'AK9'][0]:
        # Get cell properties
        par = cell.value
        col = cell.column
        col_dict[par] = col

    # Update spreadsheet
    for reg in names_dict.keys():
        df_idx, df_name = names_dict[reg]
        
        # Get df 
        df = df_dict[df_name]
        
        for par in pars:
            # Get value from df
            val = df.ix[df_idx, par_dict[par]]

            # Get cell co-ords
            row = row_dict[reg] + 2
            col = col_dict[par]

            # Write value
            ws['%s%s' % (col, row)] = val

    # Save
    wb.save(xlsx)

In [17]:
# Update sheet 6a
pars = ['Ni', 'Pb', 'NH4-N', 'Total Cr', 'NO3-N', 
        'Zn', 'As', 'Cd', 'P-Total', 'SPM', 'PO4-P', 
        'N-Total', 'Hg', 'Cu', 'TOC']
df_dict = {'tot':tot_df,
           'main':rid11_df,
           'trib':trib_df}

update_spreadsheet_monitored_rivers(temp_path, '6a', pars, df_dict)

### 2.7. Sheet 6b: Unmonitored areas

In [18]:
# Get diff data
cols = [i for i in umon_df.columns if i.split('_')[0]=='diff']
diff_df = umon_df[cols].copy()

# Convert units
diff_df['diff_nh4'] = diff_df['diff_nh4']/1000. # tonnes to ktonnes
diff_df['diff_no3'] = diff_df['diff_no3']/1000. # tonnes to ktonnes
diff_df['diff_n'] = diff_df['diff_n']/1000. # tonnes to ktonnes
diff_df['diff_po4'] = diff_df['diff_po4']/1000. # tonnes to ktonnes
diff_df['diff_p'] = diff_df['diff_p']/1000. # tonnes to ktonnes

diff_df

Unnamed: 0_level_0,diff_n,diff_p,diff_po4,diff_no3,diff_nh4
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NORWAY,34.780822,0.710667,0.174824,21.738014,1.912945
LOFOTEN-BARENTS SEA,4.979527,0.102167,0.025133,3.112204,0.273874
NORTH SEA,14.504828,0.238642,0.058706,9.065517,0.797766
NORWEGIAN SEA2,12.381687,0.281439,0.069234,7.738554,0.680993
SKAGERAK,2.91478,0.088419,0.021751,1.821737,0.160313


In [19]:
# Update sheet 6b
pars = ['NH4-N', 'NO3-N', 'P-Total', 'PO4-P', 'N-Total']

update_spreadsheet_point_sources(temp_path, '6b', pars, 'diff', diff_df)

### 2.8. Sheet 6c: Total inputs

In [20]:
# Standardise col names
diff_df.columns = [i.split('_')[1] for i in diff_df.columns]
td_df.columns = [i.split('_')[1] for i in td_df.columns]
td_df.index.name = 'ospar_region'

col_dict = {'SPM':'S.P.M.',
            'TOTN':'n',
            'NH4-N':'nh4',
            'NO3-N':'no3',
            'TOTP':'p',
            'PO4-P':'po4'}
for col in col_dict.keys():
    new_col = col_dict[col]
    tot_df[new_col] = tot_df[col]
    del tot_df[col]
    
# Reset index
diff_df.reset_index(inplace=True)
tot_df.reset_index(inplace=True)
td_df.reset_index(inplace=True)

# Concat and aggregate
tot_df = pd.concat([diff_df, td_df, tot_df], axis=0).groupby('ospar_region').sum()

# Rename cols
tot_df.columns = ['tot_'+i for i in tot_df.columns]

tot_df

Unnamed: 0_level_0,tot_Ag,tot_As,tot_Cd,tot_Cr,tot_Cu,tot_Hg,tot_Ni,tot_Pb,tot_S.P.M.,tot_SiO2,tot_TOC,tot_Zn,tot_n,tot_nh4,tot_no3,tot_p,tot_po4
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
LOFOTEN-BARENTS SEA,0.003449,3.093138,0.135754,6.115584,311.496427,0.007996,161.059945,1.275754,95.102309,118376.267748,106308.308425,22.855423,28.168691,12.809966,5.587727,2.915461,1.883485
NORTH SEA,0.020794,5.431193,0.555744,4.479812,398.451334,0.037449,16.684833,9.194332,104.876403,63655.903925,77325.770227,129.2641,50.901127,19.890654,18.577119,4.417158,2.814719
NORWAY,0.152637,23.613841,1.850324,34.706172,1251.267822,0.128003,237.40988,39.501151,759.010794,436698.429921,505589.774822,575.818473,162.646918,60.804104,54.270379,13.271227,8.399216
NORWEGIAN SEA2,0.019359,4.292293,0.223056,10.558801,475.713484,0.026872,23.809678,3.442105,149.040972,75677.082854,97320.474939,62.65073,47.127448,22.116058,13.353774,5.005158,3.23238
SKAGERAK,0.109036,10.797217,0.935771,13.551974,65.606576,0.055685,35.855423,25.58896,409.99111,178989.175394,224635.221231,361.04822,36.449653,5.987426,16.751759,0.93345,0.468632


In [21]:
# Update sheet 6c
pars = ['Ni', 'Pb', 'NH4-N', 'Total Cr', 'NO3-N', 
        'Zn', 'As', 'Cd', 'P-Total', 'SPM', 'PO4-P', 
        'N-Total', 'Hg', 'Cu', 'TOC']

update_spreadsheet_point_sources(temp_path, '6c', pars, 'tot', tot_df)

### 2.9. Sheet 7: Concentrations

In [22]:
# Read data
in_csv = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
          r'\Results\Loads_CSVs\concs_and_flows_rid_11-36_2016.csv')
conc_df = pd.read_csv(in_csv, index_col = 0)

# Convert units
conc_df['Hg_ng/l'] = conc_df['Hg_ng/l'] / 1000 # ng to ug
conc_df['NH4-N_µg/l N'] = conc_df['NH4-N_µg/l N'] / 1000 # ug to mg
conc_df['NO3-N_µg/l N'] = conc_df['NO3-N_µg/l N'] / 1000 # ug to mg
conc_df['TOTN_µg/l N'] = conc_df['TOTN_µg/l N'] / 1000 # ug to mg
conc_df['TOTP_µg/l P'] = conc_df['TOTP_µg/l P'] / 1000 # ug to mg
conc_df['PO4-P_µg/l P'] = conc_df['PO4-P_µg/l P'] / 1000 # ug to mg
conc_df['TOC_mg C/l'] = conc_df['TOC_mg C/l'] * 1000 # mg to ug

# Get flags
cols = [i for i in conc_df.columns if i.split('_')[1]=='flag']
lod_df = conc_df[cols]
lod_df.columns = [i.split('_')[0] for i in lod_df.columns]

# Get vals
cols = [i for i in conc_df.columns if ((i.split('_')[0] in lod_df.columns) 
                                       and (i.split('_')[1] != 'flag'))]
conc_df = conc_df[cols]
conc_df.columns = [i.split('_')[0] for i in conc_df.columns]

# Rename
col_dict = {'SPM':'S.P.M.',
            'TOTN':'n',
            'NH4-N':'nh4',
            'NO3-N':'no3',
            'TOTP':'p',
            'PO4-P':'po4'}
for col in col_dict.keys():
    new_col = col_dict[col]
    lod_df[new_col] = lod_df[col]
    conc_df[new_col] = conc_df[col]
    del conc_df[col], lod_df[col]

# Map Excel headings to df cols
par_dict = {'SPM':'S.P.M.',
            'TOC':'TOC',
            'PO4-P':'po4',
            'P-Total':'p',
            'NO3-N':'no3', 
            'NH4-N':'nh4',
            'N-Total':'n',
            'As':'As',
            'Pb':'Pb',
            'Cd':'Cd', 
            'Cu':'Cu',
            'Zn':'Zn',
            'Ni':'Ni',
            'Total Cr':'Cr',
            'Hg':'Hg'}
    
# Map names to stns
names_dict = {'Orkla':29778,
              'Vefsna':29782,
              'Alta':29779,
              'Glomma':29617,
              'Drammenselva':29612,
              u'Numedalslågen':29615,
              'Skienselva':29613,
              'Otra':29614,
              'Orreelva':29783,
              'Vosso':29821}

# Open new file and get sheet
wb = load_workbook(filename=temp_path)
ws = wb['7']

# Get row numbers
row_dict = {}
for item in ws['B12':'B%s' % ws.max_row]:
    # Get cell properties
    cell = item[0]
    name = cell.value
    row = cell.row
    row_dict[name] = row

# Get col numbers
col_dict = {}
for cell in ws['E9':'AK9'][0]:
    # Get cell properties
    par = cell.value
    col = cell.column
    col_dict[par] = col

pars = ['Ni', 'Pb', 'NH4-N', 'Total Cr', 'NO3-N', 
        'Zn', 'As', 'Cd', 'P-Total', 'SPM', 'PO4-P', 
        'N-Total', 'Hg', 'Cu', 'TOC']

# Update spreadsheet
for reg in names_dict.keys():
    for par in pars:
        # Get values from df
        # 1a. Lower average
        vals = conc_df.ix[names_dict[reg], par_dict[par]].values.copy()
        lods = lod_df.ix[names_dict[reg], par_dict[par]].fillna('0').values.copy()
        vals[(lods=='<')] = 0
        val = vals.mean()
        
        row = row_dict[reg]
        col = col_dict[par]
        ws['%s%s' % (col, row)] = val
        
        # 1b. Upper average
        val = conc_df.ix[names_dict[reg], par_dict[par]].mean()
        row = row_dict[reg]
        col = col_dict[par]
        ws['%s%s' % (col, row+1)] = val

        # 2. Min
        val = conc_df.ix[names_dict[reg], par_dict[par]].min()
        row = row_dict[reg] + 2
        col = col_dict[par]
        ws['%s%s' % (col, row)] = val

        # 3. Max
        val = conc_df.ix[names_dict[reg], par_dict[par]].max()
        row = row_dict[reg] + 3
        col = col_dict[par]
        ws['%s%s' % (col, row)] = val   
        
        # 4. N
        n_samp = len(conc_df.ix[names_dict[reg], par_dict[par]])
        row = row_dict[reg] + 5
        col = col_dict[par]
        ws['%s%s' % (col, row)] = n_samp 

        # 5. N LOD
        n_lod = (~pd.isnull(lod_df.ix[names_dict[reg], par_dict[par]])).sum()
        pct_lod = 100 * float(n_lod)/float(n_samp)
        
        if pct_lod < 30:
            val = 'Yes'
        else:
            val = 'No'
            
        row = row_dict[reg] + 4
        col = col_dict[par]
        ws['%s%s' % (col, row)] = val

        # 6. Std
        val = conc_df.ix[names_dict[reg], par_dict[par]].std()
        row = row_dict[reg] + 7
        col = col_dict[par]
        ws['%s%s' % (col, row)] = val
        
# Save
wb.save(temp_path)

### 2.10. Sheet 9: Discharge

The notebook [here](http://nbviewer.jupyter.org/github/JamesSample/rid/blob/master/notebooks/recalculate_ospar_flows.ipynb) has already recalculated the OSPAR flow data. This can simply be copied manually into the final template from here:

C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet\Recalculate_OSPAR_Flows\Updated