In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sn
import mpld3
import pandas as pd
import imp
import numpy as np
import os
from sqlalchemy import create_engine
sn.set_context('notebook')

# Connect to db
resa2_basic_path = (r'C:\Data\James_Work\Staff\Heleen_d_W\ICP_Waters\Upload_Template'
                    r'\useful_resa2_code.py')
resa2_basic = imp.load_source('useful_resa2_code', resa2_basic_path)
engine, conn = resa2_basic.connect_to_resa2()

# Import custom RID functions
rid_func_path = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
                 r'\Python\rid\notebooks\useful_rid_code.py')
rid = imp.load_source('useful_rid_code', rid_func_path)

# RID long term averages

Eva needs some long term averages calculating for the RID programme (see e-mail received 09/10/2017 at 11:32).

## 1. Average annual discharge

**Note:** Eva's Word document specifies annual discharges for the *hydrological* stations, but the values provided as an example for 2015 are actually area-scaled to match the *water chemistry* sites. The code below gets both sets of averages.

In [None]:
# Read stations
in_xlsx = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
           r'\Results\Loads_CSVs\rid_conc_and_loads_summaries_2016.xlsx')
stn_df = pd.read_excel(in_xlsx, sheetname='loads_all_sites_2016')
stn_df = stn_df.query('rid_group == "rid_11"')
stn_df = stn_df[['station_id', 'station_code', 'station_name', 'mean_q_1000m3/day']]

# Convert q units for chem stations
stn_df['chem_mean_q'] = stn_df['mean_q_1000m3/day']*1000/(24*60*60)
del stn_df['mean_q_1000m3/day']

# Get annual averages
sql = ("SELECT dis_station_id, AVG(xvalue) AS hydrol_mean_q "
       "FROM resa2.discharge_values "
       "WHERE dis_station_id IN ( "
       "  SELECT dis_station_id FROM resa2.default_dis_stations "
       "  WHERE station_id IN (29615, 29821, 29783, 29613, 29614, "
       "                       29782, 36225, 29617, 29779, 29612, 29778)) "
       "AND TO_CHAR(xdate, 'YYYY') = '2016' "
       "GROUP BY dis_station_id, TO_CHAR(xdate, 'YYYY') "
       "ORDER BY AVG(xvalue)")
q_df = pd.read_sql(sql, engine)

# Linking table
sql = ("SELECT * FROM resa2.default_dis_stations "
       "WHERE station_id IN (29615, 29821, 29783, 29613, 29614, "
       "                     29782, 36225, 29617, 29779, 29612, 29778)")
lnk_df = pd.read_sql(sql, engine)

# Join
q_df = pd.merge(lnk_df, q_df, how='left', on='dis_station_id')
df = pd.merge(stn_df, q_df, how='left', on='station_id')
del df['dis_station_id']

# Save 
out_path = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
            r'\Results\Loads_CSVs\mean_flows_2016.csv')
df.to_csv(out_path, encoding='utf-8')

df

## 2. Long-term discharge, N, P and SPM

In [2]:
# Read stations
in_xlsx = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
           r'\Results\Loads_CSVs\rid_conc_and_loads_summaries_2016.xlsx')
stn_df = pd.read_excel(in_xlsx, sheetname='loads_all_sites_2016')
stn_df = stn_df[['station_id', 'station_code', 'station_name']]

# Period of interest
st_yr, end_yr = 1990, 2015

# Pars of interest
par_list = ['SPM', 'TOTP', 'TOTN']

# Container for results 
loads_list = []

# Loop over sites
for stn_id in stn_df['station_id'].values:
    # Loop over years
    for year in range(st_yr, end_yr+1):
        print 'Processing Station ID %s for %s' % (stn_id, year)
        
        # Get loads
        l_df = rid.estimate_loads(stn_id, par_list, 
                                  year, engine,
                                  infer_missing=False)
        
        if l_df is not None:
            # Name and reset index
            l_df.index.name = 'station_id'
            l_df.reset_index(inplace=True)

            # Add year
            l_df['year'] = year

            # Add to outout
            loads_list.append(l_df)

# Concatenate to new df
lds_ts = pd.concat(loads_list, axis=0)

# Build multi-index
lds_ts.set_index(['station_id', 'year'], inplace=True)

# Save output
out_csv = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
           r'\Results\Loads_CSVs\loads_ts_155_%s-%s.csv' % (st_yr, end_yr))
lds_ts.to_csv(out_csv)

Processing Station ID 29615 for 1990
Processing Station ID 29615 for 1991
Processing Station ID 29615 for 1992
Processing Station ID 29615 for 1993
Processing Station ID 29615 for 1994
Processing Station ID 29615 for 1995
Processing Station ID 29615 for 1996
Processing Station ID 29615 for 1997
Processing Station ID 29615 for 1998
Processing Station ID 29615 for 1999
    The database contains duplicated values for some station-date-parameter combinations.
    Only the most recent values will be used, but you should check the repeated values are not errors.
    The duplicated entries are returned in a separate dataframe.

Processing Station ID 29615 for 2000
    The database contains duplicated values for some station-date-parameter combinations.
    Only the most recent values will be used, but you should check the repeated values are not errors.
    The duplicated entries are returned in a separate dataframe.

Processing Station ID 29615 for 2001
    The database contains duplicated v

In [4]:
# Average for sites
lds_ts.reset_index(inplace=True)
lds_ts = lds_ts.groupby('station_id').mean()
lds_ts

Unnamed: 0_level_0,year,SPM_tonnes,TOTN_tonnes,TOTP_tonnes
station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
29612,2002.500000,43390.496262,4641.379860,88.832571
29613,2002.500000,11005.791353,2836.818241,37.981787
29614,2002.500000,7932.805755,1205.479074,20.775758
29615,2002.500000,32704.781486,1658.261572,51.188094
29617,2002.500000,245087.766731,13485.267594,446.006882
29778,2002.500000,7757.686599,614.288590,12.922423
29779,2002.500000,19808.119937,597.548779,33.123952
29781,2002.500000,4136.463297,913.160638,11.338346
29782,2002.500000,17265.586529,859.480705,26.416960
29783,2002.500000,2256.785724,298.091631,13.846385


In [8]:
# Read stations
in_xlsx = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
           r'\Results\Loads_CSVs\rid_conc_and_loads_summaries_2016.xlsx')
stn_df = pd.read_excel(in_xlsx, sheetname='loads_all_sites_2016')
stn_df = stn_df[['station_id', 'ospar_region']]

# Join
lds_ts.reset_index(inplace=True)
df = pd.merge(stn_df, lds_ts, how='left', on='station_id')

Unnamed: 0,station_id,ospar_region,year,SPM_tonnes,TOTN_tonnes,TOTP_tonnes
0,29615,SKAGERAK,2002.5,32704.781486,1658.261572,51.188094
1,29821,NORTH SEA,2002.84,2141.884674,628.25342,12.151533
2,29783,NORTH SEA,2002.5,2256.785724,298.091631,13.846385
3,29613,SKAGERAK,2002.5,11005.791353,2836.818241,37.981787
4,29614,SKAGERAK,2002.5,7932.805755,1205.479074,20.775758


In [9]:
df = df.groupby('ospar_region').sum()
df

Unnamed: 0_level_0,station_id,year,SPM_tonnes,TOTN_tonnes,TOTP_tonnes
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LOFOTEN-BARENTS SEA,840582,55940.707397,86873.330907,4530.438232,215.69018
NORTH SEA,1770205,117935.152028,105733.182398,14099.97255,320.554579
NORWEGIAN SEA2,1169467,77953.431061,185222.376706,9166.511393,305.877012
SKAGERAK,874799,57966.247576,381461.333323,30872.32022,793.024911


In [10]:
# Get flow data
q_df = rid.get_flow_volumes(stn_df, 1990, 2015, engine)

q_df.head()

Unnamed: 0,station_id,year,mean_q_1000m3/day
0,29615,1990,10119.152564
1,29615,1991,7884.893256
2,29615,1992,7838.387948
3,29615,1993,9867.524888
4,29615,1994,10823.288734


In [11]:
# Convert to m3/s
q_df['mean_q_m3/s'] = q_df['mean_q_1000m3/day']*1000/(24*60*60)
del q_df['mean_q_1000m3/day']

# Group by station
q_df = q_df.groupby('station_id').mean()
q_df.reset_index(inplace=True)

# Join
q_df = pd.merge(stn_df, q_df, how='left', on='station_id')

# Group by region
q_df = q_df.groupby('ospar_region').sum()

q_df

Unnamed: 0_level_0,station_id,year,mean_q_m3/s
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LOFOTEN-BARENTS SEA,840582,56070.0,934.26817
NORTH SEA,1770205,118147.5,1804.426501
NORWEGIAN SEA2,1169467,78097.5,1622.988498
SKAGERAK,874799,58072.5,2084.599583


In [13]:
# Tidy
del df['station_id'], df['year']
del q_df['station_id'], q_df['year']

# Join
df2 = q_df.join(df)

df2

Unnamed: 0_level_0,mean_q_m3/s,SPM_tonnes,TOTN_tonnes,TOTP_tonnes
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LOFOTEN-BARENTS SEA,934.26817,86873.330907,4530.438232,215.69018
NORTH SEA,1804.426501,105733.182398,14099.97255,320.554579
NORWEGIAN SEA2,1622.988498,185222.376706,9166.511393,305.877012
SKAGERAK,2084.599583,381461.333323,30872.32022,793.024911


In [14]:
# Save output
out_csv = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
           r'\Results\Loads_CSVs\long_term_avgs_155_%s-%s.csv' % (st_yr, end_yr))
df2.to_csv(out_csv)

## 3. Long term metals

In [15]:
# Read stations
in_xlsx = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
           r'\Results\Loads_CSVs\rid_conc_and_loads_summaries_2016.xlsx')
stn_df = pd.read_excel(in_xlsx, sheetname='loads_all_sites_2016')
stn_df = stn_df[['station_id', 'station_code', 'station_name']]

# Period of interest
st_yr, end_yr = 1990, 2015

# Pars of interest
par_list = ['As', 'Pb', 'Cd', 'Cu', 'Zn', 'Ni', 'Cr', 'Hg']

# Container for results 
loads_list = []

# Loop over sites
for stn_id in stn_df['station_id'].values:
    # Loop over years
    for year in range(st_yr, end_yr+1):
        print 'Processing Station ID %s for %s' % (stn_id, year)
        
        # Get loads
        l_df = rid.estimate_loads(stn_id, par_list, 
                                  year, engine,
                                  infer_missing=False)
        
        if l_df is not None:
            # Name and reset index
            l_df.index.name = 'station_id'
            l_df.reset_index(inplace=True)

            # Add year
            l_df['year'] = year

            # Add to outout
            loads_list.append(l_df)

# Concatenate to new df
lds_ts = pd.concat(loads_list, axis=0)

# Build multi-index
lds_ts.set_index(['station_id', 'year'], inplace=True)

# Save output
out_csv = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
           r'\Results\Loads_CSVs\metals_155_%s-%s.csv' % (st_yr, end_yr))
lds_ts.to_csv(out_csv)

Processing Station ID 29615 for 1990
Processing Station ID 29615 for 1991
Processing Station ID 29615 for 1992
Processing Station ID 29615 for 1993
Processing Station ID 29615 for 1994
Processing Station ID 29615 for 1995
Processing Station ID 29615 for 1996
Processing Station ID 29615 for 1997
Processing Station ID 29615 for 1998
Processing Station ID 29615 for 1999
Processing Station ID 29615 for 2000
Processing Station ID 29615 for 2001
Processing Station ID 29615 for 2002
Processing Station ID 29615 for 2003
Processing Station ID 29615 for 2004
    The database contains duplicated values for some station-date-parameter combinations.
    Only the most recent values will be used, but you should check the repeated values are not errors.
    The duplicated entries are returned in a separate dataframe.

Processing Station ID 29615 for 2005
Processing Station ID 29615 for 2006
Processing Station ID 29615 for 2007
Processing Station ID 29615 for 2008
Processing Station ID 29615 for 2009
P

In [16]:
# Average for sites
lds_ts.reset_index(inplace=True)
lds_ts = lds_ts.groupby('station_id').mean()

# Read stations
in_xlsx = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
           r'\Results\Loads_CSVs\rid_conc_and_loads_summaries_2016.xlsx')
stn_df = pd.read_excel(in_xlsx, sheetname='loads_all_sites_2016')
stn_df = stn_df[['station_id', 'ospar_region']]

# Join
lds_ts.reset_index(inplace=True)
df = pd.merge(stn_df, lds_ts, how='left', on='station_id')

df = df.groupby('ospar_region').sum()
df

Unnamed: 0_level_0,station_id,year,As_tonnes,Cd_tonnes,Cr_tonnes,Cu_tonnes,Hg_kg,Ni_tonnes,Pb_tonnes,Zn_tonnes
ospar_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
LOFOTEN-BARENTS SEA,840582,55940.707397,4.211349,0.244902,26.416515,36.408584,19.556438,54.807201,3.994287,49.92751
NORTH SEA,1770205,117935.152028,4.381104,0.918467,12.242974,32.924906,33.544577,15.352847,12.0581,163.999114
NORWEGIAN SEA2,1169467,77953.431061,6.128643,0.787241,36.426303,72.788657,41.821386,33.314691,8.901926,189.430761
SKAGERAK,874799,57966.247576,11.954553,1.902621,22.745671,92.659966,182.882437,42.557585,28.222421,391.389614


In [17]:
# Save output
out_csv = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
           r'\Results\Loads_CSVs\metals_lta_155_%s-%s.csv' % (st_yr, end_yr))
df.to_csv(out_csv)