In [1]:
%matplotlib inline
import nivapy3 as nivapy
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import fiona
import numpy as np
import cartopy
import matplotlib
import cartopy.crs as ccrs

plt.style.use('ggplot')

# Preliminary checking of "1000 Lakes" sampling locations

## 1. Read NVE database

NVE has previously supplied an ESRI File Geodatabase of Norwegian lakes.

In [2]:
# Get layers in GDB
gdb_path = r'../../NVEData.gdb'
layer_list = fiona.listlayers(gdb_path)
print(layer_list)

['Innsjo']


In [3]:
# Read lakes dataset
gdf = gpd.read_file(gdb_path, layer='Innsjo')
gdf.head()

Unnamed: 0,objektType,vatnLnr,navn,hoyde_moh,areal_km2,magasinNr,magasinKategori,magasinFormal_liste,vassdragsNr,elvenavnHierarki,...,arealNorge_km2,dybdekart,nedborfeltAreal_km2,dataUttaksdato,eksportType,SHAPE_Length,SHAPE_Area,UTM_Z33_E,UTM_Z33_N,geometry
0,Innsjø,165333,,-8888,0.00616,,,,025.H,KVINA,...,0.00616,,0.01,2017-09-27T00:00:00,NVEs nedlastningsløsning,394.219939,6159.9713,49828.230106,6560515.0,"(POLYGON ((49808.83000000007 6560467.3902, 498..."
1,Innsjø,56888,Juovsajavrit,438,0.217176,,,,234.GBJ,IESJÅKKA/KARASJÅKKA/TANA,...,0.217176,,5.104375,2017-09-27T00:00:00,NVEs nedlastningsløsning,4450.517347,217176.0148,864997.937388,7764739.0,"(POLYGON ((864712.04 7764341.240200002, 864706..."
2,Innsjø,151207,,-8888,0.009965,,,,109.5G,LITLEDALSELVA,...,0.009965,,0.11,2017-09-27T00:00:00,NVEs nedlastningsløsning,549.726466,9965.4771,169206.496645,6939095.0,(POLYGON ((169246.5300000003 6939027.540199999...
3,Innsjø,192275,,-8888,0.010995,,,,019.CF,GJØV/ARENDALSVASSDRAGET,...,0.010995,,0.06,2017-09-27T00:00:00,NVEs nedlastningsløsning,422.604515,10994.76345,98523.076095,6574547.0,(POLYGON ((98582.62999999989 6574576.120200001...
4,Innsjø,97273,,-8888,0.008618,,,,196.A4B,TAKELVA/MÅLSELVVASSDRAGET,...,0.008618,,0.64,2017-09-27T00:00:00,NVEs nedlastningsløsning,369.532207,8618.1475,657612.040353,7673926.0,(POLYGON ((657658.1200000001 7673975.150199998...


## 2a. Read stations from Excel

Atle has supplied the sampling locations from the 2019 survey.

In [4]:
# Read sampling pts
stn_path = r'../../1000_lakes_2019.xlsx'
stn_df = pd.read_excel(stn_path)
del stn_df['comment_fm'], stn_df['comment_niva']
stn_df.head()

Unnamed: 0,index,station_id,station_code,station_name,lake_name,komm_nr,komm,fylke_nr,fylke,nve_vatn_nr,utm_n,utm_e,utm_zone,lake_area_km2,altitude_m
0,525,55.0,1228-1-16,1230 HOH,1230 HOH,1228,Odda kommune,12,Hordaland,12589.0,6628463.0,389081.407241,32,0.073461,1233
1,72,3062.0,434-1-11,Abbortjørna,Abbortjørna,434,Engerdal kommune,4,Hedmark,35719.0,6911424.0,351994.0,33,0.05,739
2,892,3809.0,2011-1-6,Amasjavri,Amasjavri,2011,Guovdageainnu/Kautokeino komm.,20,Finmark,49720.0,7641815.0,572259.0,34,0.09,336
3,71,16132.0,432-603,Arasjøen,,432,Rendalen kommune,4,Hedmark,1363.0,6877000.0,635600.0,32,1.08,690
4,748,3676.0,1718-1-15,Aunbyvatn,Aunbyvatn,1718,Leksvik kommune,17,Nord-Trøndelag,37123.0,7060349.0,570085.0,32,0.08,331


In [5]:
# Convert co-ords to WGS84
stn_df = nivapy.spatial.utm_to_wgs84_dd(stn_df, 
                                        zone='utm_zone',
                                        east='utm_e',
                                        north='utm_n')

stn_df.head()

Unnamed: 0,index,station_id,station_code,station_name,lake_name,komm_nr,komm,fylke_nr,fylke,nve_vatn_nr,utm_n,utm_e,utm_zone,lake_area_km2,altitude_m,lat,lon
0,525,55.0,1228-1-16,1230 HOH,1230 HOH,1228,Odda kommune,12,Hordaland,12589.0,6628463.0,389081.407241,32,0.073461,1233,59.779099,7.024378
1,72,3062.0,434-1-11,Abbortjørna,Abbortjørna,434,Engerdal kommune,4,Hedmark,35719.0,6911424.0,351994.0,33,0.05,739,62.304998,12.144987
2,892,3809.0,2011-1-6,Amasjavri,Amasjavri,2011,Guovdageainnu/Kautokeino komm.,20,Finmark,49720.0,7641815.0,572259.0,34,0.09,336,68.877993,22.796999
3,71,16132.0,432-603,Arasjøen,,432,Rendalen kommune,4,Hedmark,1363.0,6877000.0,635600.0,32,1.08,690,62.00102,11.589488
4,748,3676.0,1718-1-15,Aunbyvatn,Aunbyvatn,1718,Leksvik kommune,17,Nord-Trøndelag,37123.0,7060349.0,570085.0,32,0.08,331,63.663993,10.415984


## 2b. Read stations from Aquamonitor

Roar has created a project in AM (project ID 12433) and added the new stations to it. Liv Bente and I have then corrected the co-ordinates based on my initial screening of Atle's spreadsheet. The aim of this code is to check that everything is now correct.

In [2]:
# Connect to db
eng = nivapy.da.connect()

Username:  ···
Password:  ········


Connection successful.


In [3]:
# Query projects
stn_df = nivapy.da.select_ndb_project_stations([12433],
                                               eng,
                                               drop_dups=True)

# Add UTM Z33
stn_df = nivapy.spatial.wgs84_dd_to_utm(stn_df)

print(len(stn_df))
assert stn_df['station_id'].is_unique
stn_df.head()

1005


Unnamed: 0,station_id,station_code,station_name,station_type,longitude,latitude,utm_north,utm_east,utm_zone
0,26070,221-1-2,Langtjern,Innsjø,11.850274,59.808643,6634299.0,323345.999797,33
1,26071,101-2-7,Hokksjøen,Innsjø,11.563586,59.004423,6545620.0,302651.000302,33
2,26072,402-2-13,Sætertjern,Innsjø,12.446711,60.060222,6660863.0,357866.0,33
3,26073,419-1-25,Mjøgsjøen,Innsjø,11.842186,60.328578,6692181.0,325659.000153,33
4,26074,425-2-2,Kottern,Innsjø,12.517008,60.590729,6719770.0,364003.000014,33


In [4]:
# Get vatn_nrs from db
stn_ids = stn_df['station_id'].values
sql = ("SELECT station_id, value_t AS am_vatn_nr "
       "FROM nivadatabase.station_attributes "
       "WHERE attribute_id = 4 "
       "AND (1, station_id) IN (%s)" % ','.join('(1, %d)' % i for i in stn_ids))
nve_df = pd.read_sql(sql, eng)
nve_df.head()

Unnamed: 0,station_id,am_vatn_nr
0,27040,1296
1,27052,1482
2,27055,26509
3,27075,63606
4,45764,10026


In [9]:
stn_df = pd.merge(stn_df, nve_df, how='left', on='station_id')
stn_df.head()

Unnamed: 0,station_id,station_code,station_name,station_type,longitude,latitude,utm_north,utm_east,utm_zone,am_vatn_nr
0,26070,221-1-2,Langtjern,Innsjø,11.850274,59.808643,6634299.0,323345.999797,33,3208
1,26071,101-2-7,Hokksjøen,Innsjø,11.563586,59.004423,6545620.0,302651.000302,33,3608
2,26072,402-2-13,Sætertjern,Innsjø,12.446711,60.060222,6660863.0,357866.0,33,4332
3,26073,419-1-25,Mjøgsjøen,Innsjø,11.842186,60.328578,6692181.0,325659.000153,33,4055
4,26074,425-2-2,Kottern,Innsjø,12.517008,60.590729,6719770.0,364003.000014,33,3794


In [10]:
# Link pts to polys
df = nivapy.spatial.identify_point_in_polygon(stn_df,
                                              gdf,
                                              pt_col='station_id',
                                              poly_col='vatnLnr',
                                              lat_col='latitude',
                                              lon_col='longitude')

# Join NVE name
df = pd.merge(df, gdf[['vatnLnr', 'navn']],
              how='left', on='vatnLnr')
df.head()

Unnamed: 0,station_id,station_code,station_name,station_type,longitude,latitude,utm_north,utm_east,utm_zone,am_vatn_nr,vatnLnr,navn
0,26070,221-1-2,Langtjern,Innsjø,11.850274,59.808643,6634299.0,323345.999797,33,3208,3208.0,Langtjenn
1,26071,101-2-7,Hokksjøen,Innsjø,11.563586,59.004423,6545620.0,302651.000302,33,3608,3608.0,Hokksjøen
2,26072,402-2-13,Sætertjern,Innsjø,12.446711,60.060222,6660863.0,357866.0,33,4332,4332.0,Sætertjenn
3,26073,419-1-25,Mjøgsjøen,Innsjø,11.842186,60.328578,6692181.0,325659.000153,33,4055,4055.0,Mjøgsjøen
4,26074,425-2-2,Kottern,Innsjø,12.517008,60.590729,6719770.0,364003.000014,33,3794,3794.0,Kottern


## Which sampling locations are *not* within a lake?

In [11]:
# Which locations are not in a lake?
not_in_lake = df[pd.isna(df['vatnLnr'])]
print(len(not_in_lake), 'station locations are not within a lake.')
not_in_lake.to_csv(r'../../not_in_lake_round2.csv', encoding='utf-8')
not_in_lake

4 station locations are not within a lake.


Unnamed: 0,station_id,station_code,station_name,station_type,longitude,latitude,utm_north,utm_east,utm_zone,am_vatn_nr,vatnLnr,navn
865,26971,2021-1-2,HOH 450,Innsjø,24.664647,69.647796,7756241.0,873779.2586,33,,,
906,27014,1711-3-1,Skurdalsvatnet,Innsjø,12.122,63.372,7030269.0,356116.256054,33,736.0,,
967,45671,2030-623,ST.SAMETTI,Elv,27.084386,69.488029,7755411.0,969921.591065,33,,,
999,71720,807-x,Surtetjørn,Innsjø,9.125968,59.774258,6640888.0,170416.07159,33,,,


## Mismatches between NVE "vatn" numbers

Atle's spreadsheet has an NVE "Vatn Nr" for each lake. The dpo not alweays match the "Vatn Nr" for the polygon enclosing the sampling point in NVE's lakes dataset.

In [16]:
# The following "vatn_nrs" do not agree
vatn_mismatch = df.dropna(subset=['vatnLnr']) 
vatn_mismatch['vatnLnr'] = vatn_mismatch['vatnLnr'].astype(int).astype(str)
vatn_mismatch = vatn_mismatch[vatn_mismatch['am_vatn_nr'] != vatn_mismatch['vatnLnr']]
vatn_mismatch.to_csv(r'../../vatn_mismatch_round2.csv', encoding='utf-8')
vatn_mismatch.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,station_id,station_code,station_name,station_type,longitude,latitude,utm_north,utm_east,utm_zone,am_vatn_nr,vatnLnr,navn


In [None]:
## Update vatn_nrs
#with eng.begin() as conn:
#    for idx, row in df.iterrows():
#        if not pd.isna(row['vatnLnr']):
#            nve_id = str(int(row['vatnLnr']))
#            stn_id = row['station_id']
#            
#            sql = ("SELECT * FROM nivadatabase.station_attributes "
#                   "WHERE station_id = %s "
#                   "AND attribute_id = 4" % stn_id)
#            attr_df = pd.read_sql(sql, eng)
#            
#            if len(attr_df) == 1:                      
#                sql = ("UPDATE nivadatabase.station_attributes "
#                       "SET value_t = %s "
#                       "WHERE station_id = %s "
#                       "AND attribute_id = 4" % (nve_id, stn_id))
#                conn.execute(sql)   
#                
#            elif len(attr_df) == 0:
#                sql = ("INSERT INTO nivadatabase.station_attributes "
#                       "(station_id, attribute_id, value_t) "
#                       "VALUES (%s, 4, %s) " % (stn_id, nve_id))
#                conn.execute(sql)  
#            
#            else:
#                raise ValueError

In [None]:
def chunker(df, size):
    """ Split a dataframe into chunks of length 'size'.
    """
    return (df[pos:pos + size] for pos in range(0, len(df), size))

In [None]:
# Size of square buffer around monitoring point
buff = 500

# Loop over chunks
for df_idx, df_part in enumerate(chunker(not_in_lake.reset_index(drop=True), 20)):
    fig = plt.figure(figsize=(15,20))

    # Loop over stations
    for idx, row in df_part.reset_index(drop=True).iterrows():    
        zone = row['utm_zone']
        north = row['utm_north']
        east = row['utm_east']

        # Define co-ord system
        crs = ccrs.UTM(zone)  

        # Plot
        ax = fig.add_subplot(5, 4, idx+1, projection=crs)
        ax.set_extent([east - buff, east + buff, north - buff, north + buff], crs=crs)
        
        ax.add_wms(wms='https://openwms.statkart.no/skwms1/wms.topo4',
                   layers=['topo4_WMS'])

        ax.scatter(east, 
                   north, 
                   s=50,
                   c='r', 
                   edgecolors='k', 
                   transform=crs)
   
        ax.set_title('%s (%s)' % (row['station_name'], row['station_code']), fontsize=12)
     
    # Save 
    out_png = r'../../grid_maps/not_in_lake_round2/grid_plot_%03d.png' % (df_idx + 1)
    plt.savefig(out_png, dpi=150)
    plt.clf()
    plt.close()

In [None]:
# Size of square buffer around monitoring point
buff = 1000

# Loop over chunks
for df_idx, df_part in enumerate(chunker(vatn_mismatch.reset_index(drop=True), 20)):
    fig = plt.figure(figsize=(15,20))

    # Loop over stations
    for idx, row in df_part.reset_index(drop=True).iterrows():    
        zone = row['utm_zone']
        north = row['utm_n']
        east = row['utm_e']

        # Define co-ord system
        crs = ccrs.UTM(zone)  

        # Plot
        ax = fig.add_subplot(5, 4, idx+1, projection=crs)
        ax.set_extent([east - buff, east + buff, north - buff, north + buff], crs=crs)
        
        ax.add_wms(wms='https://openwms.statkart.no/skwms1/wms.topo4',
                   layers=['topo4_WMS'])

        ax.scatter(east, 
                   north, 
                   s=50,
                   c='r', 
                   edgecolors='k', 
                   transform=crs)

        ax.set_title('%s (%s)' % (row['station_name'], row['station_code']), fontsize=12)
     
    # Save 
    out_png = r'../../grid_maps/vatn_nr_mismatch/grid_plot_%03d.png' % (df_idx + 1)
    plt.savefig(out_png, dpi=150)
    plt.clf()
    plt.close()

In [None]:
df_agg = df.groupby('komm_nr')

# Size of square buffer around monitoring point
buff = 1000

# Loop over kommune
for komm_nr, grp_df in df_agg:
    # Loop over chunks
    for df_idx, df_part in enumerate(chunker(grp_df.reset_index(drop=True), 20)):
        fig = plt.figure(figsize=(15,20))

        # Loop over stations
        for idx, row in df_part.reset_index(drop=True).iterrows():    
            zone = row['utm_zone']
            north = row['utm_n']
            east = row['utm_e']

            # Define co-ord system
            crs = ccrs.UTM(zone)  

            # Plot
            ax = fig.add_subplot(5, 4, idx+1, projection=crs)
            ax.set_extent([east - buff, east + buff, north - buff, north + buff], crs=crs)

            ax.add_wms(wms='https://openwms.statkart.no/skwms1/wms.topo4',
                       layers=['topo4_WMS'])

            ax.scatter(east, 
                       north, 
                       s=50,
                       c='r', 
                       edgecolors='k', 
                       transform=crs)

            ax.set_title('%s (%s)' % (row['station_name'], row['station_code']), fontsize=12)

        # Save 
        out_png = r'../../grid_maps/all_sites/grid_plot_komm%04d_plot%03d.png' % (komm_nr, df_idx + 1)
        plt.savefig(out_png, dpi=150)
        plt.clf()
        plt.close()