In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import nivapy3 as nivapy
import numpy as np
import seaborn as sn
import statsmodels.formula.api as smf
from IPython.display import Image

plt.style.use('ggplot')

# MARTINI - Process Swedish river data

This notebook generates a dataset of riverine inputs to Skagerrak from 5 major Swedish rivers. It builds upon the data exploration originally described in [this notebook](http://nbviewer.jupyter.org/github/JamesSample/martini/blob/master/notebooks/water_chem.ipynb). The aim is to generate a complete dataset of daily nutrient concentrations covering the period from 2015 to 2017 inclusive. The following parameters are of interest: NH4, NO3, TON, Tot-N, SRP, TOP, Tot-P, DOC and TOC. These basic quantities can then be further subdived according to the requirements of the marine model.

The workflow closely follows that used for the Norwegian data - see [this notebook](http://nbviewer.jupyter.org/github/JamesSample/martini/blob/master/notebooks/process_norway_chem.ipynb) for details.

## 1. List Norwegian rivers

In [2]:
# Read Andre's list of rivers
riv_xlsx = r'../../../andre_river_data/andre_martini_rivers_jes.xlsx'
riv_df = pd.read_excel(riv_xlsx, sheet_name='rivers')

# Filter to Norway
riv_df = riv_df.loc[riv_df['martini_code'].str.startswith('s')]

riv_df

Unnamed: 0,martini_id,station_name,resa_id,vannmiljo_id,lat,lon,martini_code
36,1,Göta Älv,,,57.6896,11.8974,smca108
37,2,Göta Älv Kungälv,,,57.7947,11.8386,smca108
38,3,Bäveån,,,58.3445,11.9107,smca109
39,4,Örekilsälven,,,58.4377,11.6832,smca110
40,5,Strömsån,,,58.9395,11.17,smca111
41,6,Enningdalsälven,,,58.9818,11.4742,smca112


## 2. Tidy/aggregate chemistry data

The code below repeats some processing from the previous notebook to create a tidy dataset for the subsequent analysis.

In [3]:
# Read tidied MVM data back from csv for speed
mvm_csv = r'../../../mvm_export/slu_mvm_data.csv'
wc_df = pd.read_csv(mvm_csv)

# Convert dates ignoring times
wc_df['sample_date'] = pd.to_datetime(wc_df['sample_date']).dt.date

# Remove pars with v. limited data
wc_df = wc_df.query('parameter not in ("Org-N", "NO2-N")')

# Aggregate soluble P params as SRP
totn_pars = ['Tot-N_ps', 'Tot-N_summa', 'Tot-N_TNb']
wc_df['parameter'] = np.where(wc_df['parameter'].isin(totn_pars), 'N-TOT', wc_df['parameter'])

# Rename pars
wc_df['parameter'] = wc_df['parameter'].str.replace('PO4-P', 'SRP')
wc_df['parameter'] = wc_df['parameter'].str.replace('NO2\+NO3-N', 'N-NO3')
wc_df['parameter'] = wc_df['parameter'].str.replace('NH4-N', 'N-NH4')
wc_df['parameter'] = wc_df['parameter'].str.replace('Tot-P', 'P-TOT')

# Merge cols
wc_df['par_unit'] = wc_df['parameter'] + '_' + wc_df['unit']

# Average duplicates
idx_cols = ['mvm_id', 'station_name', 'lon', 'lat', 'sample_date', 'par_unit']
wc_df = wc_df.groupby(idx_cols).mean().reset_index()
wc_df.head()

Unnamed: 0,mvm_id,station_name,lon,lat,sample_date,par_unit,value
0,31,Bäveån Uddevalla,11.939269,58.347117,2000-01-14,N-NH4_µg/l,36.0
1,31,Bäveån Uddevalla,11.939269,58.347117,2000-01-14,N-NO3_µg/l,430.0
2,31,Bäveån Uddevalla,11.939269,58.347117,2000-01-14,N-TOT_µg/l,917.0
3,31,Bäveån Uddevalla,11.939269,58.347117,2000-01-14,P-TOT_µg/l,68.0
4,31,Bäveån Uddevalla,11.939269,58.347117,2000-01-14,SRP_µg/l,21.0


## 3. Concentration-discharge relationships

### 3.1. Regression relationships

The regression model and bias correction factors used in the code below are described in [Section 3 of the Norwegian notebook](http://nbviewer.jupyter.org/github/JamesSample/martini/blob/master/notebooks/process_norway_chem.ipynb#3.-Concentration-discharge-relationships).

### 3.2. Calculate daily Tot-N, Tot-P and TOC

The code below uses the equations from Section 3.1 to estimate daily Tot-N, Tot-P and TOC from discharge.

In [None]:
# Loop over sites
data_dict = {}
for mvm_id in wc_df['mvm_id'].unique():
    # Get stn data
    df = wc_df.query('mvm_id == @mvm_id')

    # Restructure chem
    df = df.sort_values(['sample_date', 'par_unit'])
    df.set_index(idx_cols, inplace=True)
    df = df.unstack('par_unit').reset_index().sort_values('sample_date')
    df.index = df['sample_date']
    del df['mvm_id'], df['station_name'], df['lon'], df['lat'], df['sample_date']
    df.columns = df.columns.get_level_values(1)

    # Get list of chem cols
    if vass == 42954:
        # No TOC
        chem_cols = ['N-TOT_µg/l', 'P-TOT_µg/l']
    else:
        chem_cols = ['N-TOT_µg/l', 'P-TOT_µg/l', 'TOC_mg/l']

    # Get just cols of interest
    df = df[chem_cols]

    # Join to Q
    df = q_df.join(df, how='left')

    # Loop over chem data
    for col in chem_cols:
        # Get regression result
        res = reg_dict[(vass, col)]

        # Calc C
        concs = (10**res.params[0])*(df['flow_m3/s']**res.params[1])

        # Back-transform
        alpha = np.exp(2.651*((res.resid.values)**2).mean())
        concs = alpha*concs

        # Update series
        df[col] = concs #df[col].fillna(concs)

        # Add to results
        data_dict[vass] = df