In [1]:
%matplotlib inline
import pandas as pd
import nivapy3 as nivapy
import matplotlib.pyplot as plt

plt.style.use('ggplot')

# Tests for annual trends (2018 data)

In 2019 (2018 data year), NIVA took over the trend analysis component of the RID report from NIBIO. We wish to calculate **Mann-Kendall trends** in **concentrations** and **fluxes** based on **median annual** data for the following rivers:

 * Glomma
 * Drammenselva
 * Numedalslågen
 * Skienselva
 * Otra
 * Orreelva
 * Orkla
 * Vefsna
 * Altaelva
 
See the e-mail from Cathrine received 11.10.2019 at 12.52 for full details of the time periods and parameters of interest.

In [2]:
# Final year of interest
year = 2018

# Stations of interest
stn_ids = [29617, # Glomma
           29612, # Drammenselva
           29615, # Numedalslågen
           29613, # Skienselva
           29614, # Otra
           29783, # Orreelva
           29778, # Orkla
           29782, # Vefsna
           29779, # Altaelva
          ]

## 1. Long-term trends (1990 to 2018)

From Cathrine:

> Long-term trends (1990 - present) for the following parameters (concentrations + loads):  
>  * Water discharge
>  * Ammonium
>  * Nitrate
>  * Tot-N
>  * Phosphate (PO4)
>  * Tot-P
>  * SPM
>  * TOC
>  * SiO2
>  * Cd
>  * Cu
>  * Ni
>  * Pb
>  * Zn

### 1.1. Trends in concentrations

In [3]:
# Parameters of interest
pars = ['NH4-N_µg/l N',
        'NO3-N_µg/l N',
        'TOTN_µg/l N',
        'PO4-P_µg/l P',
        'TOTP_µg/l P',
        'SPM_mg/l',        
        'TOC_mg C/l',
        'SiO2_mg SiO2/l',
        'Cd_µg/l',
        'Cu_µg/l',
        'Ni_µg/l',
        'Pb_µg/l',
        'Zn_µg/l',
       ]

In [4]:
# Read concentration data
conc_csv = r'../../../Results/Loads_CSVs/concs_and_flows_rid_20_1990-%s.csv' % year
conc_df = pd.read_csv(conc_csv)

# Get stations of interest
conc_df = conc_df.query('station_id in @stn_ids')

# Year from sample date
conc_df['year'] = conc_df['sample_date'].str[:4].astype(int)

# Get cols of interest
idx_cols = ['station_id', 'station_code', 'station_name', 'year']
conc_df = conc_df[idx_cols + pars]

# Calculate annual means
conc_df = conc_df.groupby(idx_cols).median().reset_index()

conc_df.head()

Unnamed: 0,station_id,station_code,station_name,year,NH4-N_µg/l N,NO3-N_µg/l N,TOTN_µg/l N,PO4-P_µg/l P,TOTP_µg/l P,SPM_mg/l,TOC_mg C/l,SiO2_mg SiO2/l,Cd_µg/l,Cu_µg/l,Ni_µg/l,Pb_µg/l,Zn_µg/l
0,29612,BUSEDRA,Drammenselva,1990,,211.5,332.5,1.25,6.0,1.15,,,0.1,1.3,,0.5,4.15
1,29612,BUSEDRA,Drammenselva,1991,,245.0,372.5,2.0,5.5,1.385,2.29,,0.015,1.0,,0.2,4.35
2,29612,BUSEDRA,Drammenselva,1992,21.0,257.5,353.0,1.5,6.5,1.91,2.9,,0.02,0.845,0.565,0.16,2.71
3,29612,BUSEDRA,Drammenselva,1993,13.0,262.5,402.5,1.0,5.0,1.345,2.8,,0.015,0.77,0.635,0.11,2.785
4,29612,BUSEDRA,Drammenselva,1994,16.0,267.5,407.5,2.0,4.5,2.23,3.1,,0.015,0.735,,0.115,3.03


In [5]:
%%capture

# Containers for results
mk_list = []
sen_list = []

# Loop over stations
for stn_id in stn_ids:
    # Get data
    df = conc_df.query('station_id == @stn_id')
    code = df['station_code'].values[0]
    name = df['station_name'].values[0]
    
    # Setup plot
    fig, axes = plt.subplots(nrows=5, ncols=3, figsize=(15,15))
    axes = axes.flatten()
    fig.delaxes(axes[-2])
    fig.delaxes(axes[-1])
    
    # Loop over pars
    for idx, par in enumerate(pars):
        # M-K test
        mk_res = nivapy.stats.mk_test(df, par).set_index('description').T
        mk_res['station_id'] = stn_id
        mk_res['station_code'] = code
        mk_res['station_name'] = name
        mk_res['parameter'] = par
        mk_list.append(mk_res)
        
        # Sen's slope
        sen_res, slp_df = nivapy.stats.sens_slope(df, par, 'year')
        sslp = sen_res.loc['sslp']['value']
        icpt = sen_res.loc['icpt']['value']
        col = slp_df.columns[0]
    
        sen_res = sen_res.set_index('description').T
        sen_res['station_id'] = stn_id
        sen_res['station_code'] = code
        sen_res['station_name'] = name
        sen_res['parameter'] = par
        sen_list.append(sen_res)
        
        # Plot
        slp_df.plot(ax=axes[idx], style='o-', legend=False)
        axes[idx].plot(slp_df.index.values, slp_df.index.values*sslp + icpt, 'k-')  
        par_name, unit = par.split('_')
        if unit == 'None':
            unit = '-'
        axes[idx].set_title('%s [%s]' % (par_name, unit))
        axes[idx].set_xlabel('')
    
    plt.tight_layout()
    
    # Save
    plt_png = r'../../../Results/trends/plots/to_%s/concs_%s_1990-%s.png' % (year, stn_id, year)
    plt.savefig(plt_png, dpi=200)
    plt.close()    

# Build results df for M-K
mk_df = pd.concat(mk_list).reset_index(drop=True)
mk_df.columns.name = ''
mk_df = mk_df[['station_id', 
               'station_code',
               'station_name',
               'parameter', 
               'Variance of test statistic', 
               'M-K test statistic', 
               'Normalised test statistic', 
               'p-value of the significance test', 
               'Type of trend (if present)']]

# Save
mk_csv = r'../../../Results/trends/mk_concs_1990-%s.csv' % year
mk_df.to_csv(mk_csv, index=False, encoding='utf-8')

# Build results df for Sen's slope
sen_df = pd.concat(sen_list).reset_index(drop=True)
sen_df.columns.name = ''
sen_df = sen_df[['station_id', 
                 'station_code',
                 'station_name',
                 'parameter', 
                 'Median slope estimate', 
                 'Estimated intercept', 
                 'Upper bound on slope estimate at specified alpha', 
                 'Type of trend (if present)']]

# Save
sen_csv = r'../../../Results/trends/sens_slp_concs_1990-%s.csv' % year
sen_df.to_csv(sen_csv, index=False, encoding='utf-8')

### 1.2. Trends in fluxes

In [6]:
# Parameters of interest
pars = ['Flow_1000m3/day',
        'NH4-N_tonnes',
        'NO3-N_tonnes',
        'TOTN_tonnes',
        'PO4-P_tonnes',
        'TOTP_tonnes',
        'SPM_tonnes',        
        'TOC_tonnes',
        'SiO2_tonnes',
        'Cd_tonnes',
        'Cu_tonnes',
        'Ni_tonnes',
        'Pb_tonnes',
        'Zn_tonnes',
       ]

In [7]:
# Read flux data
flux_csv = r'../../../Results/Loads_CSVs/loads_and_flows_rid_20_1990-%s.csv' % year
flux_df = pd.read_csv(flux_csv)

# Tidy
flux_df.rename({'mean_q_1000m3/day':'Flow_1000m3/day'}, 
               inplace=True, 
               axis='columns')

# Get stations of interest
flux_df = flux_df.query('station_id in @stn_ids')

# Get cols of interest
idx_cols = ['station_id', 'station_code', 'station_name', 'year']
flux_df = flux_df[idx_cols + pars]

flux_df.head()

Unnamed: 0,station_id,station_code,station_name,year,Flow_1000m3/day,NH4-N_tonnes,NO3-N_tonnes,TOTN_tonnes,PO4-P_tonnes,TOTP_tonnes,SPM_tonnes,TOC_tonnes,SiO2_tonnes,Cd_tonnes,Cu_tonnes,Ni_tonnes,Pb_tonnes,Zn_tonnes
0,29612,BUSEDRA,Drammenselva,1990,25891.13466,181.076922,1975.364898,3227.809787,18.656113,56.204185,11924.057753,32278.558491,23775.827824,1.166017,12.342989,5.774269,4.616598,41.274753
1,29612,BUSEDRA,Drammenselva,1991,19274.318392,134.800359,1727.816303,2672.233338,12.374199,38.000236,11583.424962,16110.439028,17699.605735,0.144235,8.286101,4.29858,2.084903,34.784293
2,29612,BUSEDRA,Drammenselva,1992,22209.901227,163.311239,2262.795669,3365.366651,12.339225,46.277875,14676.547822,23573.589162,20451.228998,0.32769,6.644074,4.685295,1.264921,26.052045
3,29612,BUSEDRA,Drammenselva,1993,28155.888465,140.69709,2508.858689,4076.006047,13.463221,52.699311,17447.305224,28212.455887,25855.551144,0.301564,7.881519,6.506741,1.588111,25.783248
4,29612,BUSEDRA,Drammenselva,1994,27384.945933,149.020724,3577.240539,5356.639363,16.705258,58.529301,26826.724674,30568.086091,25147.594651,0.25026,8.590654,5.973979,2.637257,33.55135


In [8]:
%%capture

# Containers for results
mk_list = []
sen_list = []

# Loop over stations
for stn_id in stn_ids:
    # Get data
    df = flux_df.query('station_id == @stn_id')
    code = df['station_code'].values[0]
    name = df['station_name'].values[0]
    
    # Setup plot
    fig, axes = plt.subplots(nrows=5, ncols=3, figsize=(15,15))
    axes = axes.flatten()
    fig.delaxes(axes[-1])
    
    # Loop over pars
    for idx, par in enumerate(pars):
        # M-K test
        mk_res = nivapy.stats.mk_test(df, par).set_index('description').T
        mk_res['station_id'] = stn_id
        mk_res['station_code'] = code
        mk_res['station_name'] = name
        mk_res['parameter'] = par
        mk_list.append(mk_res)
        
        # Sen's slope
        sen_res, slp_df = nivapy.stats.sens_slope(df, par, 'year')
        sslp = sen_res.loc['sslp']['value']
        icpt = sen_res.loc['icpt']['value']
        col = slp_df.columns[0]
    
        sen_res = sen_res.set_index('description').T
        sen_res['station_id'] = stn_id
        sen_res['station_code'] = code
        sen_res['station_name'] = name
        sen_res['parameter'] = par
        sen_list.append(sen_res)
        
        # Plot
        slp_df.plot(ax=axes[idx], style='o-', legend=False)
        axes[idx].plot(slp_df.index.values, slp_df.index.values*sslp + icpt, 'k-')  
        par_name, unit = par.split('_')
        if unit == 'None':
            unit = '-'
        axes[idx].set_title('%s [%s]' % (par_name, unit))
        axes[idx].set_xlabel('')
    
    plt.tight_layout()
    
    # Save
    plt_png = r'../../../Results/trends/plots/to_%s/fluxes_%s_1990-%s.png' % (year, stn_id, year)
    plt.savefig(plt_png, dpi=200)
    plt.close()    

# Build results df for M-K
mk_df = pd.concat(mk_list).reset_index(drop=True)
mk_df.columns.name = ''
mk_df = mk_df[['station_id', 
               'station_code',
               'station_name',
               'parameter', 
               'Variance of test statistic', 
               'M-K test statistic', 
               'Normalised test statistic', 
               'p-value of the significance test', 
               'Type of trend (if present)']]

# Save
mk_csv = r'../../../Results/trends/mk_fluxes_1990-%s.csv' % year
mk_df.to_csv(mk_csv, index=False, encoding='utf-8')

# Build results df for Sen's slope
sen_df = pd.concat(sen_list).reset_index(drop=True)
sen_df.columns.name = ''
sen_df = sen_df[['station_id', 
                 'station_code',
                 'station_name',
                 'parameter', 
                 'Median slope estimate', 
                 'Estimated intercept', 
                 'Upper bound on slope estimate at specified alpha', 
                 'Type of trend (if present)']]

# Save
sen_csv = r'../../../Results/trends/sens_slp_fluxes_1990-%s.csv' % year
sen_df.to_csv(sen_csv, index=False, encoding='utf-8')

## 2. Short-term trends (2004 to 2018)

From Cathrine:

> Short-term trends (2004 - present) for the following parameters (concentrations + loads):
>  * Cd
>  * Cu
>  * Ni
>  * Pb
>  * Zn

### 2.1. Trends in concentrations

In [9]:
# Parameters of interest
pars = ['Cd_µg/l',
        'Cu_µg/l',
        'Ni_µg/l',
        'Pb_µg/l',
        'Zn_µg/l',
       ]

# Just 2004 to 2018
conc_df = conc_df.query('year >= 2004')

In [10]:
%%capture

# Containers for results
mk_list = []
sen_list = []

# Loop over stations
for stn_id in stn_ids:
    # Get data
    df = conc_df.query('station_id == @stn_id')
    code = df['station_code'].values[0]
    name = df['station_name'].values[0]
    
    # Setup plot
    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15,6))
    axes = axes.flatten()
    fig.delaxes(axes[-1])
    
    # Loop over pars
    for idx, par in enumerate(pars):
        # M-K test
        mk_res = nivapy.stats.mk_test(df, par).set_index('description').T
        mk_res['station_id'] = stn_id
        mk_res['station_code'] = code
        mk_res['station_name'] = name
        mk_res['parameter'] = par
        mk_list.append(mk_res)
        
        # Sen's slope
        sen_res, slp_df = nivapy.stats.sens_slope(df, par, 'year')
        sslp = sen_res.loc['sslp']['value']
        icpt = sen_res.loc['icpt']['value']
        col = slp_df.columns[0]
    
        sen_res = sen_res.set_index('description').T
        sen_res['station_id'] = stn_id
        sen_res['station_code'] = code
        sen_res['station_name'] = name
        sen_res['parameter'] = par
        sen_list.append(sen_res)
        
        # Plot
        slp_df.plot(ax=axes[idx], style='o-', legend=False)
        axes[idx].plot(slp_df.index.values, slp_df.index.values*sslp + icpt, 'k-')  
        par_name, unit = par.split('_')
        if unit == 'None':
            unit = '-'
        axes[idx].set_title('%s [%s]' % (par_name, unit))
        axes[idx].set_xlabel('')
    
    plt.tight_layout()
    
    # Save
    plt_png = r'../../../Results/trends/plots/to_%s/metal_concs_%s_2004-%s.png' % (year, stn_id, year)
    plt.savefig(plt_png, dpi=200)
    plt.close()    

# Build results df for M-K
mk_df = pd.concat(mk_list).reset_index(drop=True)
mk_df.columns.name = ''
mk_df = mk_df[['station_id', 
               'station_code',
               'station_name',
               'parameter', 
               'Variance of test statistic', 
               'M-K test statistic', 
               'Normalised test statistic', 
               'p-value of the significance test', 
               'Type of trend (if present)']]

# Save
mk_csv = r'../../../Results/trends/mk_metal_concs_2004-%s.csv' % year
mk_df.to_csv(mk_csv, index=False, encoding='utf-8')

# Build results df for Sen's slope
sen_df = pd.concat(sen_list).reset_index(drop=True)
sen_df.columns.name = ''
sen_df = sen_df[['station_id', 
                 'station_code',
                 'station_name',
                 'parameter', 
                 'Median slope estimate', 
                 'Estimated intercept', 
                 'Upper bound on slope estimate at specified alpha', 
                 'Type of trend (if present)']]

# Save
sen_csv = r'../../../Results/trends/sens_slp_metal_concs_2004-%s.csv' % year
sen_df.to_csv(sen_csv, index=False, encoding='utf-8')

### 2.2. Trends in fluxes

In [11]:
# Parameters of interest
pars = ['Cd_tonnes',
        'Cu_tonnes',
        'Ni_tonnes',
        'Pb_tonnes',
        'Zn_tonnes',
       ]

# Just 2004 to 2018
flux_df = flux_df.query('year >= 2004')

In [12]:
%%capture

# Containers for results
mk_list = []
sen_list = []

# Loop over stations
for stn_id in stn_ids:
    # Get data
    df = flux_df.query('station_id == @stn_id')
    code = df['station_code'].values[0]
    name = df['station_name'].values[0]
    
    # Setup plot
    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15,6))
    axes = axes.flatten()
    fig.delaxes(axes[-1])
    
    # Loop over pars
    for idx, par in enumerate(pars):
        # M-K test
        mk_res = nivapy.stats.mk_test(df, par).set_index('description').T
        mk_res['station_id'] = stn_id
        mk_res['station_code'] = code
        mk_res['station_name'] = name
        mk_res['parameter'] = par
        mk_list.append(mk_res)
        
        # Sen's slope
        sen_res, slp_df = nivapy.stats.sens_slope(df, par, 'year')
        sslp = sen_res.loc['sslp']['value']
        icpt = sen_res.loc['icpt']['value']
        col = slp_df.columns[0]
    
        sen_res = sen_res.set_index('description').T
        sen_res['station_id'] = stn_id
        sen_res['station_code'] = code
        sen_res['station_name'] = name
        sen_res['parameter'] = par
        sen_list.append(sen_res)
        
        # Plot
        slp_df.plot(ax=axes[idx], style='o-', legend=False)
        axes[idx].plot(slp_df.index.values, slp_df.index.values*sslp + icpt, 'k-')  
        par_name, unit = par.split('_')
        if unit == 'None':
            unit = '-'
        axes[idx].set_title('%s [%s]' % (par_name, unit))
        axes[idx].set_xlabel('')
    
    plt.tight_layout()
    
    # Save
    plt_png = r'../../../Results/trends/plots/to_%s/metal_fluxes_%s_2004-%s.png' % (year, stn_id, year)
    plt.savefig(plt_png, dpi=200)
    plt.close()    

# Build results df for M-K
mk_df = pd.concat(mk_list).reset_index(drop=True)
mk_df.columns.name = ''
mk_df = mk_df[['station_id', 
               'station_code',
               'station_name',
               'parameter', 
               'Variance of test statistic', 
               'M-K test statistic', 
               'Normalised test statistic', 
               'p-value of the significance test', 
               'Type of trend (if present)']]

# Save
mk_csv = r'../../../Results/trends/mk_metal_fluxes_2004-%s.csv' % year
mk_df.to_csv(mk_csv, index=False, encoding='utf-8')

# Build results df for Sen's slope
sen_df = pd.concat(sen_list).reset_index(drop=True)
sen_df.columns.name = ''
sen_df = sen_df[['station_id', 
                 'station_code',
                 'station_name',
                 'parameter', 
                 'Median slope estimate', 
                 'Estimated intercept', 
                 'Upper bound on slope estimate at specified alpha', 
                 'Type of trend (if present)']]

# Save
sen_csv = r'../../../Results/trends/sens_slp_metal_fluxes_2004-%s.csv' % year
sen_df.to_csv(sen_csv, index=False, encoding='utf-8')