In [1]:
%matplotlib inline
import nivapy3 as nivapy
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import fiona

plt.style.use('ggplot')

# Preliminary checking of "1000 Lakes" sampling locations

NVE has previously supplied an ESRI File Geodatabase of Norwegian lakes.

In [2]:
# Get layers in GDB
gdb_path = r'../../NVEData.gdb'
layer_list = fiona.listlayers(gdb_path)
print(layer_list)

['Innsjo']


In [3]:
# Read lakes dataset
gdf = gpd.read_file(gdb_path, layer='Innsjo')
gdf.head()

Unnamed: 0,objektType,vatnLnr,navn,hoyde_moh,areal_km2,magasinNr,magasinKategori,magasinFormal_liste,vassdragsNr,elvenavnHierarki,...,arealNorge_km2,dybdekart,nedborfeltAreal_km2,dataUttaksdato,eksportType,SHAPE_Length,SHAPE_Area,UTM_Z33_E,UTM_Z33_N,geometry
0,Innsjø,165333,,-8888,0.00616,,,,025.H,KVINA,...,0.00616,,0.01,2017-09-27T00:00:00,NVEs nedlastningsløsning,394.219939,6159.9713,49828.230106,6560515.0,"(POLYGON ((49808.83000000007 6560467.3902, 498..."
1,Innsjø,56888,Juovsajavrit,438,0.217176,,,,234.GBJ,IESJÅKKA/KARASJÅKKA/TANA,...,0.217176,,5.104375,2017-09-27T00:00:00,NVEs nedlastningsløsning,4450.517347,217176.0148,864997.937388,7764739.0,"(POLYGON ((864712.04 7764341.240200002, 864706..."
2,Innsjø,151207,,-8888,0.009965,,,,109.5G,LITLEDALSELVA,...,0.009965,,0.11,2017-09-27T00:00:00,NVEs nedlastningsløsning,549.726466,9965.4771,169206.496645,6939095.0,(POLYGON ((169246.5300000003 6939027.540199999...
3,Innsjø,192275,,-8888,0.010995,,,,019.CF,GJØV/ARENDALSVASSDRAGET,...,0.010995,,0.06,2017-09-27T00:00:00,NVEs nedlastningsløsning,422.604515,10994.76345,98523.076095,6574547.0,(POLYGON ((98582.62999999989 6574576.120200001...
4,Innsjø,97273,,-8888,0.008618,,,,196.A4B,TAKELVA/MÅLSELVVASSDRAGET,...,0.008618,,0.64,2017-09-27T00:00:00,NVEs nedlastningsløsning,369.532207,8618.1475,657612.040353,7673926.0,(POLYGON ((657658.1200000001 7673975.150199998...


Atle has also supplied the sampling locations from the 2019 survey.

In [9]:
# Read sampling pts
stn_path = r'../../1000_lakes_2019.xlsx'
stn_df = pd.read_excel(stn_path)
del stn_df['comment_fm'], stn_df['comment_niva']
stn_df.head()

Unnamed: 0,index,station_id,station_code,station_name,lake_name,komm_nr,komm,fylke_nr,fylke,nve_vatn_nr,utm_n,utm_e,utm_zone,lake_area_km2,altitude_m
0,1,2999.0,101-4-1,Femsjøen,Femsjøen,101,Halden kommune,1,Østfold,316.0,6559050.0,642460.0,32,10.64,79
1,2,9.0,101-2-7,Hokksjøen,Hokksjøen,101,Halden kommune,1,Østfold,3608.0,6543369.0,647241.906048,32,0.133879,148
2,3,2998.0,101-2-2,Steinsvatnet,Steinsvatnet,101,Halden kommune,1,Østfold,3562.0,6554004.0,652477.0,32,0.21,178
3,4,3000.0,105-3-6,Isesjøen,Isesjøen,105,Sarpsborg kommune,1,Østfold,133.0,6572400.0,626400.0,32,6.2,38
4,5,3001.0,105-3-10,Tunevatnet,Tunevatnet,105,Sarpsborg kommune,1,Østfold,3451.0,6576676.0,619486.0,32,2.25,40


In [10]:
# Convert co-ords to WGS84
stn_df = nivapy.spatial.utm_to_wgs84_dd(stn_df, 
                                        zone='utm_zone',
                                        east='utm_e',
                                        north='utm_n')

stn_df.head()

Unnamed: 0,index,station_id,station_code,station_name,lake_name,komm_nr,komm,fylke_nr,fylke,nve_vatn_nr,utm_n,utm_e,utm_zone,lake_area_km2,altitude_m,lat,lon
0,1,2999.0,101-4-1,Femsjøen,Femsjøen,101,Halden kommune,1,Østfold,316.0,6559050.0,642460.0,32,10.64,79,59.146735,11.490597
1,2,9.0,101-2-7,Hokksjøen,Hokksjøen,101,Halden kommune,1,Østfold,3608.0,6543369.0,647241.906048,32,0.133879,148,59.004421,11.563584
2,3,2998.0,101-2-2,Steinsvatnet,Steinsvatnet,101,Halden kommune,1,Østfold,3562.0,6554004.0,652477.0,32,0.21,178,59.097995,11.661994
3,4,3000.0,105-3-6,Isesjøen,Isesjøen,105,Sarpsborg kommune,1,Østfold,133.0,6572400.0,626400.0,32,6.2,38,59.271611,11.21784
4,5,3001.0,105-3-10,Tunevatnet,Tunevatnet,105,Sarpsborg kommune,1,Østfold,3451.0,6576676.0,619486.0,32,2.25,40,59.311994,11.098985


In [11]:
# Link pts to polys
df = nivapy.spatial.identify_point_in_polygon(stn_df,
                                              gdf,
                                              pt_col='index',
                                              poly_col='vatnLnr',
                                              lat_col='lat',
                                              lon_col='lon')

# Join NVE name
df = pd.merge(df, gdf[['vatnLnr', 'navn']],
              how='left', on='vatnLnr')
df.head()

Unnamed: 0,index,station_id,station_code,station_name,lake_name,komm_nr,komm,fylke_nr,fylke,nve_vatn_nr,utm_n,utm_e,utm_zone,lake_area_km2,altitude_m,lat,lon,vatnLnr,navn
0,1,2999.0,101-4-1,Femsjøen,Femsjøen,101,Halden kommune,1,Østfold,316.0,6559050.0,642460.0,32,10.64,79,59.146735,11.490597,316.0,Femsjøen
1,2,9.0,101-2-7,Hokksjøen,Hokksjøen,101,Halden kommune,1,Østfold,3608.0,6543369.0,647241.906048,32,0.133879,148,59.004421,11.563584,3608.0,Hokksjøen
2,3,2998.0,101-2-2,Steinsvatnet,Steinsvatnet,101,Halden kommune,1,Østfold,3562.0,6554004.0,652477.0,32,0.21,178,59.097995,11.661994,3562.0,Stensvannet
3,4,3000.0,105-3-6,Isesjøen,Isesjøen,105,Sarpsborg kommune,1,Østfold,133.0,6572400.0,626400.0,32,6.2,38,59.271611,11.21784,133.0,Isesjøen
4,5,3001.0,105-3-10,Tunevatnet,Tunevatnet,105,Sarpsborg kommune,1,Østfold,3451.0,6576676.0,619486.0,32,2.25,40,59.311994,11.098985,3451.0,Tunevannet


## Which sampling locations are *not* within a lake?

In [12]:
# Which locations are not in a lake?
not_in_lake = df[pd.isna(df['vatnLnr'])]
print(len(not_in_lake), 'station locations are not within a lake.')
not_in_lake.to_csv(r'../../not_in_lake.csv', encoding='utf-8')
not_in_lake.head()

64 station locations are not within a lake.


Unnamed: 0,index,station_id,station_code,station_name,lake_name,komm_nr,komm,fylke_nr,fylke,nve_vatn_nr,utm_n,utm_e,utm_zone,lake_area_km2,altitude_m,lat,lon,vatnLnr,navn
58,59,,,Gløtsjøen,Gløtsjøen,428,Trysil kommune,4,Hedmark,33445.0,6827269.0,665778.0,32,0.22,823,61.543256,12.119332,,
141,142,3123.0,544-1-27,HOH 1198,HOH 1198,544,Øystre Slidre kommune,5,Oppland,32848.0,6795656.0,491748.0,32,0.05,1198,61.294998,8.845997,,
144,145,3134.0,545-4-1,Bygdin,Bygdin,545,Vang kommune,5,Oppland,146.0,6801900.0,467900.0,32,39.89,1058,61.349816,8.399882,,
156,157,26.0,604-3-3,Hengsvatnet,Hengsvatnet,604,Kongsberg kommune,6,Buskerud,398.0,6613285.0,525000.0,32,1.01,452,59.656896,9.443625,,
162,163,3141.0,612-5-1,Tyrifjorden,Tyrifjorden,612,Hole kommune,6,Buskerud,522.0,6642673.0,554069.0,32,138.56,63,59.917992,9.966997,,


## Mismatches between NVE "vatn" numbers

Atle's spreadsheet has an NVE "Vatn Nr" for each lake. The dpo not alweays match the "Vatn Nr" for the polygon enclosing the sampling point in NVE's lakes dataset.

In [13]:
# The following "vatn_nrs" do not agree
vatn_mismatch = df[df['nve_vatn_nr'] != df['vatnLnr']].dropna(subset=['vatnLnr'])
vatn_mismatch.to_csv(r'../../vatn_mismatch.csv', encoding='utf-8')
vatn_mismatch.head()

Unnamed: 0,index,station_id,station_code,station_name,lake_name,komm_nr,komm,fylke_nr,fylke,nve_vatn_nr,utm_n,utm_e,utm_zone,lake_area_km2,altitude_m,lat,lon,vatnLnr,navn
31,32,3027.0,402-1-28,Eintjenn,Eintjenn,402,Kongsvinger kommune,4,Hedmark,4313.0,6662390.0,353550.0,33,0.61,175,60.0724,12.368182,4321.0,Eintjenn
47,48,3042.0,419-3-2,Svartbørja,Svarttbørja,419,Sør-Odal kommune,4,Hedmark,4267.0,6665114.0,661943.0,32,1.19,301,60.090999,11.911995,368.0,Storbørja
55,56,17809.0,418-601,Nøklevatn,,425,Åsnes kommune,4,Hedmark,205.0,6708596.0,653876.0,32,1.07,424,60.484068,11.800313,235.0,Nøklevatnet
271,272,15705.0,830-26,Måvatn,,830,Nissedal kommune,8,Telemark,,6563000.0,478400.0,32,0.63,665,59.205534,8.621778,1254.0,Måvatn
305,306,15712.0,904-12,Snøløsvatn,,904,Grimstad kommune,9,Aust-Agder,,6481832.0,468564.0,32,1.24,109,58.475993,8.460996,10651.0,Snøløsvannet
