In [1]:
import pandas as pd
import numpy as np
import imp
from sqlalchemy import create_engine

# Add monitored loads to RESA

The code in this notebook takes the loads calculated for monitored stations (from [here](http://nbviewer.jupyter.org/github/JamesSample/rid/blob/master/notebooks/rid_working_2016-17.ipynb)) and stores them in the database for future reference.

## 1. Read data and restructure

In [2]:
# Year of interest
year = 2016

# Time series ID (hangover from Tore's code)
ts_id = 11

In [3]:
# All sites for year
in_csv = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
          r'\Results\Loads_CSVs\loads_and_flows_all_sites_%s.csv' % year)
all_df = pd.read_csv(in_csv)

# RID_11 1990 to year
in_csv = (r'C:\Data\James_Work\Staff\Oeyvind_K\Elveovervakingsprogrammet'
          r'\Results\Loads_CSVs\loads_and_flows_rid_11_1990-%s.csv' % year)
rid11_df = pd.read_csv(in_csv)

# Check that results for RID_11 are idenitcal in both frames for year
df1 = all_df.query('rid_group == "rid_11"')
df1.index = df1['station_id']

df2 = rid11_df.query('year == @year')
df2.index = df2['station_id']
del df2['year']

assert (df1 == df2).all().all()

# Drop one set of data for RID_11 in year
all_df = all_df.query('rid_group != "rid_11"')
all_df['year'] = year

# Concatenate
df = pd.concat([all_df, rid11_df], axis=0)

# Remove metadata
del df['station_code'], df['station_name'], df['rid_group']
del df['ospar_region']

# Set index
df.set_index(['station_id', 'year'], drop=True, inplace=True)

# Split into two parts
est_cols = [i for i in df.columns if i.split('_')[1] == 'Est']
val_cols = list(set(df.columns) - set(est_cols))
est_df = df[est_cols].copy()
val_df = df[val_cols].copy()

# Convert units
# 1000m3/day to m3/s
val_df['Q_m3/s'] = 1000*val_df['mean_q_1000m3/day']/(24*60*60)
del val_df['mean_q_1000m3/day']

# Hg kg to tonnes
val_df['Hg_tonnes'] = val_df['Hg_kg']
del val_df['Hg_kg']

# Reset indices
est_df.reset_index(inplace=True)
val_df.reset_index(inplace=True)

# Melt
est_df = pd.melt(est_df, id_vars=['station_id', 'year'], 
                 value_name='estimated')
val_df = pd.melt(val_df, id_vars=['station_id', 'year'])

# Get pars from col names
est_df['variable'] = est_df['variable'].str.split('_').str[0]
val_df['variable'] = val_df['variable'].str.split('_').str[0]

# Join
df = pd.merge(val_df, est_df, how='left', 
              on=['station_id', 'year', 'variable'])

# Drop Ag and NaN
df = df.query('variable != "Ag"')
df.dropna(how='any', inplace=True)

# Tidy
df['estimated'] = df['estimated'].astype(int)

df.head()

Unnamed: 0,station_id,year,variable,value,estimated
0,29831,2016,TOTP,4.200404,0
1,29830,2016,TOTP,10.997303,0
2,29843,2016,TOTP,36.475905,0
3,29822,2016,TOTP,6.196935,0
4,29800,2016,TOTP,3.967748,0


## 2. Add to database

I have created a new table in the database using the following SQL:

    CREATE TABLE resa2.RID_MONITORED_LOADS
    ( time_series_id number(3, 0) NOT NULL,
      station_id number(10, 0) NOT NULL,
      out_pid number(4, 0) NOT NULL,
      year number(4, 0) NOT NULL,
      value number NOT NULL,
      estimated number(1,0) NOT NULL,
      CONSTRAINT RID_MONITORED_LOADS_PK PRIMARY KEY (time_series_id, station_id, out_pid, year),
      CONSTRAINT fk_time_series_id
        FOREIGN KEY (time_series_id)
        REFERENCES RESA2.RID_TIMESERIES_DEFINITIONS(time_series_id),
      CONSTRAINT fk_station_id
        FOREIGN KEY (station_id)
        REFERENCES RESA2.STATIONS(station_id), 
      CONSTRAINT fk_out_pid
        FOREIGN KEY (out_pid)
        REFERENCES RESA2.RID_PUNKTKILDER_OUTPAR_DEF(out_pid)
    );
    
Note that this table references three other database tables.

In [4]:
# Dict mapping pars to values in RESA2.RID_PUNKTKILDER_OUTPAR_DEF
par_dict = {'TOTP':14,
            'NO3-N':20,
            'As':15,
            'TOC':16,
            'PO4-P':11,
            'Cr':2,
            'NH4-N':6,
            'Cd':1,
            'Pb':9,
            'TOTN':8,
            'Cu':3,
            'Ni':7,
            'SPM':18,
            'SiO2':19,
            'Zn':13,
            'Hg':4}

# Map vars to pids
df['variable'].replace(par_dict, inplace=True)

# Add cols and reorder
df['time_series_id'] = ts_id
df['out_pid'] = df['variable']
del df['variable']

df = df[['time_series_id', 'station_id', 'out_pid', 'year', 
         'value', 'estimated']]

df.head()

Unnamed: 0,time_series_id,station_id,out_pid,year,value,estimated
0,11,29831,14,2016,4.200404,0
1,11,29830,14,2016,10.997303,0
2,11,29843,14,2016,36.475905,0
3,11,29822,14,2016,6.196935,0
4,11,29800,14,2016,3.967748,0


## 3. Add to database

In [5]:
# Connect to db
resa2_basic_path = (r'C:\Data\James_Work\Staff\Heleen_d_W\ICP_Waters\Upload_Template'
                    r'\useful_resa2_code.py')
resa2_basic = imp.load_source('useful_resa2_code', resa2_basic_path)
engine, conn = resa2_basic.connect_to_resa2()

In [6]:
# Write to db
#df.to_sql('rid_monitored_loads', schema='resa2', 
#          con=engine, if_exists='append',
#          index=False)