In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn
import folium
import imp
from sqlalchemy import create_engine
sn.set_context('notebook')

# Checking RESA2 for Arctic river data

Nikolai has some biological monitoring sites in the Arctic and he'd like to know if we have any additional environmental data for these locations. See e-mail received 04/03/2017 at 16.32 for details. The locations of interest are:

 * Altaelva
 * Halselva
 * Komagelva
 * Neiden
 * Tana
 * Beiearelva
 * Roksdalsvassdraget (Åelva)
 * Saltdalselva
 * Skjoma
 * Målselva
 * Reisaelva
 * Salangselva

Without unique station codes, these rivers may be difficult to identify. This notebook searches the database for any matches (including partial matches) based on the site names and pulls out some basic site properties.

In [2]:
# Create db connection
r2_func_path = r'C:\Data\James_Work\Staff\Heleen_d_W\ICP_Waters\Upload_Template\useful_resa2_code.py'
resa2 = imp.load_source('useful_resa2_code', r2_func_path)

engine, conn = resa2.connect_to_resa2()

In [3]:
# Nikolai's site names of interest
stn_names = [u'Altaelva',
             u'Halselva',
             u'Komagelva',
             u'Neiden',
             u'Tana',
             u'Beiearelva',
             u'Roksdalsvassdraget (Åelva)',
             u'Roksdalsvassdraget',
             u'Åelva',
             u'Saltdalselva',
             u'Skjoma',
             u'Målselva',
             u'Reisaelva',
             u'Salangselva']

In [4]:
# Search database

# Container for output
df_list = []

# Loop over names
for stn in stn_names:
    # Get stn matches
    sql = ("SELECT station_id, station_code, station_name, "
           "lake_or_river, latitude, longitude FROM RESA2.STATIONS "
           "WHERE LOWER(station_name) LIKE '%%%s%%'" % stn.lower())
    
    df = pd.read_sql_query(sql, engine)
    
    df_list.append(df)

# Combine results
stn_df = pd.concat(df_list, axis=0)

# Decode special characters
for col in stn_df.columns:
    if stn_df[col].dtype == object:
        stn_df[col] = stn_df[col].str.decode('windows-1252')

# Tidy
stn_df['station_id'] = stn_df['station_id'].astype(int)
stn_df.reset_index(inplace=True, drop=True)

stn_df

Unnamed: 0,station_id,station_code,station_name,lake_or_river,latitude,longitude
0,15956,ALTA-E 11,Altaelva v/Gøngæsholmen,R,69.837,23.457
1,15957,ALTA-E 12,Altaelva v/Killistraumen,R,69.913,23.287
2,29779,FINEALT,Altaelva,R,69.900992,23.286977
3,30033,FINENEI,Neidenelva,R,69.691992,29.369979
4,1629,FIN-855,UUTTANA,L,62.062,25.012
5,29820,FINETAN,Tanaelva,R,70.229993,28.173988
6,21082,OVELV 86 1,Tana v/Tana bru,,70.200261,28.19608
7,21077,OVELV 81 1,Saltdalselva,,67.047149,15.388531
8,30118,TROEREI,Reisaelva,R,69.764991,21.017979
9,21080,OVELV 84 1,Reisaelva,,69.747004,21.096134


The search identifies 12 stations, most of which are fairly northern (although `UUTTANA` is not within the Arctic Circle and is probably an incorrect match). There are also some duplicated names with very similar co-ordinates, but different station codes. **Check this**.

The map below shows the locations of these 12 sites. Click on a marker to see the site name. **Ask Nikolai to check which of these match his biological data and which do not**.

In [5]:
# Setup map
map1 = folium.Map(location=[67, 18],
                  zoom_start=4,
                  tiles='Stamen Terrain')

# Add clickable markers for sites
for idx, row in stn_df.iterrows():  
    folium.Marker([row['latitude'], row['longitude']], 
                  popup='%s (%s)' % (row['station_name'], 
                                     row['station_code'])).add_to(map1)

map1

For each of the sites identified, the code belows gets the number of water samples in the database and the date/time of the first and last sampling.

In [6]:
# Get water sample data

# Container for output
df_list = []

# Loop over stations
for cde in stn_df['station_id'].unique():
    # Get sample data
    sql = ('SELECT MIN(station_id) as station_id, ' 
           'MIN(sample_date) as series_start, '
           'MAX(sample_date) as series_end, '
           'COUNT(*) as n_samples '
           'FROM resa2.water_samples '
           'WHERE station_id = %s' % cde)
    
    df = pd.read_sql_query(sql, engine)

    df_list.append(df)

# Combine results
samp_df = pd.concat(df_list, axis=0)

# Join to stn data
df = pd.merge(stn_df, samp_df, how='left',
              on='station_id')

df

Unnamed: 0,station_id,station_code,station_name,lake_or_river,latitude,longitude,series_start,series_end,n_samples
0,15956,ALTA-E 11,Altaelva v/Gøngæsholmen,R,69.837,23.457,1980-07-04,1981-09-27 00:00:00,9
1,15957,ALTA-E 12,Altaelva v/Killistraumen,R,69.913,23.287,1980-07-04,1981-09-27 00:00:00,9
2,29779,FINEALT,Altaelva,R,69.900992,23.286977,1990-03-15,2016-12-05 11:15:00,261
3,30033,FINENEI,Neidenelva,R,69.691992,29.369979,1990-01-01,2003-01-01 00:00:00,14
4,1629,FIN-855,UUTTANA,L,62.062,25.012,1995-10-11,1995-10-11 00:00:00,1
5,29820,FINETAN,Tanaelva,R,70.229993,28.173988,1990-01-01,2016-10-09 17:00:00,66
6,21082,OVELV 86 1,Tana v/Tana bru,,70.200261,28.19608,1976-06-08,1976-09-01 00:00:00,4
7,21077,OVELV 81 1,Saltdalselva,,67.047149,15.388531,1976-05-31,1976-05-31 00:00:00,1
8,30118,TROEREI,Reisaelva,R,69.764991,21.017979,1990-01-01,2003-01-01 00:00:00,14
9,21080,OVELV 84 1,Reisaelva,,69.747004,21.096134,1976-04-26,1976-11-20 00:00:00,4
