In [1]:
%matplotlib inline

import pandas as pd
import nivapy3 as nivapy
import geopandas as gpd
import fiona
import matplotlib.pyplot as plt

# Extract AM data

Getting station properties for the 2019 1000 Lakes project for Øyvind.

In [2]:
# Connect to db
eng = nivapy.da.connect()

Username:  ···
Password:  ········


Connection successful.


In [3]:
# Query projects
stn_df = nivapy.da.select_ndb_project_stations([12433],
                                               eng,
                                               drop_dups=True)
del stn_df['station_code'], stn_df['station_name'], stn_df['station_type']

# Add UTM Z33
stn_df = nivapy.spatial.wgs84_dd_to_utm(stn_df)

print(len(stn_df))
assert stn_df['station_id'].is_unique
stn_df.head()

1002


Unnamed: 0,station_id,longitude,latitude,utm_north,utm_east,utm_zone
0,26070,11.850274,59.808643,6634299.0,323345.999797,33
1,26071,11.563586,59.004423,6545620.0,302651.000302,33
2,26072,12.446711,60.060222,6660863.0,357866.0,33
3,26073,11.842186,60.328578,6692181.0,325659.000153,33
4,26074,12.517008,60.590729,6719770.0,364003.000014,33


In [4]:
# Get stations properties
sql = """SELECT c.station_id,
           c.station_code,
           c.station_name,
           b.name,
           a.value_t
         FROM NIVADATABASE.STATION_ATTRIBUTES a,
           NIVADATABASE.STATION_ATTRIBUTE_DEFINITIONS b,
           NIVADATABASE.PROJECTS_STATIONS c
         WHERE c.project_id = 12433
         AND a.attribute_id  = b.attribute_id
         AND a.station_id    = c.station_id
         AND a.attribute_id IN (1,3,9,12,13,14,15,16,17,113,34,133,213,214)
      """

df = pd.read_sql(sql, eng)

# Reshape
df.set_index(['station_id', 'station_code', 'station_name', 'name'], inplace=True)
df = df.unstack('name')
df.reset_index(inplace=True)

# Tidy
df.sort_values('station_id', inplace=True)
df.index.name = ''
df.columns = list(df.columns.get_level_values(0)[:3]) + list(df.columns.get_level_values(1)[3:])
df.rename({'Areal':'Areal_km2'}, inplace=True, axis='columns')
df['Innsjønummer'] = df['Innsjønummer'].astype(int)

df.head()

Unnamed: 0,station_id,station_code,station_name,Areal_km2,Fylke,Fylkenummer,Innsjønavn,Innsjønummer,Kommunenavn,Kommunenummer,Naturvern,Naturvernform,VannforekomstNavn
,,,,,,,,,,,,,
0.0,26070.0,221-1-2,Langtjern,0.0497,Akershus,200.0,LANGTJERN,3208.0,Aurskog-Høland,221.0,,,
1.0,26071.0,101-2-7,Hokksjøen,0.1339,Østfold,100.0,HOKKSJØEN,3608.0,Halden,101.0,,,
2.0,26072.0,402-2-13,Sætertjern,0.1196,Hedmark,400.0,SÆTERTJERN,4332.0,Kongsvinger,402.0,,,
3.0,26073.0,419-1-25,Mjøgsjøen,0.0482,Hedmark,400.0,MJØGSJØEN,4055.0,Sør-Odal,419.0,,,
4.0,26074.0,425-2-2,Kottern,0.1384,Hedmark,400.0,KOTTERN,3794.0,Åsnes,425.0,,,


In [5]:
# Read lakes dataset
gdb_path = r'../../NVEData.gdb'
gdf = gpd.read_file(gdb_path, layer='Innsjo')
gdf = gdf[['vatnLnr', 'hoyde_moh']]
gdf.rename({'vatnLnr':'Innsjønummer'}, inplace=True, axis='columns')
gdf.head()

Unnamed: 0,Innsjønummer,hoyde_moh
0,165333,-8888
1,56888,438
2,151207,-8888
3,192275,-8888
4,97273,-8888


In [6]:
df = pd.merge(df, gdf, how='left', on='Innsjønummer')
df.head()

Unnamed: 0,station_id,station_code,station_name,Areal_km2,Fylke,Fylkenummer,Innsjønavn,Innsjønummer,Kommunenavn,Kommunenummer,Naturvern,Naturvernform,VannforekomstNavn,hoyde_moh
0,26070,221-1-2,Langtjern,0.0497,Akershus,200,LANGTJERN,3208,Aurskog-Høland,221,,,,273.0
1,26071,101-2-7,Hokksjøen,0.1339,Østfold,100,HOKKSJØEN,3608,Halden,101,,,,148.0
2,26072,402-2-13,Sætertjern,0.1196,Hedmark,400,SÆTERTJERN,4332,Kongsvinger,402,,,,252.0
3,26073,419-1-25,Mjøgsjøen,0.0482,Hedmark,400,MJØGSJØEN,4055,Sør-Odal,419,,,,489.0
4,26074,425-2-2,Kottern,0.1384,Hedmark,400,KOTTERN,3794,Åsnes,425,,,,480.0


In [7]:
df = pd.merge(df, stn_df, how='left', on='station_id')
df.head()

Unnamed: 0,station_id,station_code,station_name,Areal_km2,Fylke,Fylkenummer,Innsjønavn,Innsjønummer,Kommunenavn,Kommunenummer,Naturvern,Naturvernform,VannforekomstNavn,hoyde_moh,longitude,latitude,utm_north,utm_east,utm_zone
0,26070,221-1-2,Langtjern,0.0497,Akershus,200,LANGTJERN,3208,Aurskog-Høland,221,,,,273.0,11.850274,59.808643,6634299.0,323345.999797,33
1,26071,101-2-7,Hokksjøen,0.1339,Østfold,100,HOKKSJØEN,3608,Halden,101,,,,148.0,11.563586,59.004423,6545620.0,302651.000302,33
2,26072,402-2-13,Sætertjern,0.1196,Hedmark,400,SÆTERTJERN,4332,Kongsvinger,402,,,,252.0,12.446711,60.060222,6660863.0,357866.0,33
3,26073,419-1-25,Mjøgsjøen,0.0482,Hedmark,400,MJØGSJØEN,4055,Sør-Odal,419,,,,489.0,11.842186,60.328578,6692181.0,325659.000153,33
4,26074,425-2-2,Kottern,0.1384,Hedmark,400,KOTTERN,3794,Åsnes,425,,,,480.0,12.517008,60.590729,6719770.0,364003.000014,33


In [8]:
# Tidy
df = df[['station_id', 'station_code', 'station_name', 'longitude', 'latitude', 
         'utm_north', 'utm_east', 'utm_zone', 'Areal_km2', 'hoyde_moh', 
         'Innsjønavn', 'Innsjønummer', 'Fylke', 'Fylkenummer', 'Kommunenavn', 
         'Kommunenummer', 'Naturvern', 'Naturvernform', 'VannforekomstNavn']]
df.columns = [i.lower() for i in df.columns]

# Save 
df.to_csv('../../1000_Lakes_AM_Export_2019-09-09.csv', encoding='utf-8', index=False)
df.head()

Unnamed: 0,station_id,station_code,station_name,longitude,latitude,utm_north,utm_east,utm_zone,areal_km2,hoyde_moh,innsjønavn,innsjønummer,fylke,fylkenummer,kommunenavn,kommunenummer,naturvern,naturvernform,vannforekomstnavn
0,26070,221-1-2,Langtjern,11.850274,59.808643,6634299.0,323345.999797,33,0.0497,273.0,LANGTJERN,3208,Akershus,200,Aurskog-Høland,221,,,
1,26071,101-2-7,Hokksjøen,11.563586,59.004423,6545620.0,302651.000302,33,0.1339,148.0,HOKKSJØEN,3608,Østfold,100,Halden,101,,,
2,26072,402-2-13,Sætertjern,12.446711,60.060222,6660863.0,357866.0,33,0.1196,252.0,SÆTERTJERN,4332,Hedmark,400,Kongsvinger,402,,,
3,26073,419-1-25,Mjøgsjøen,11.842186,60.328578,6692181.0,325659.000153,33,0.0482,489.0,MJØGSJØEN,4055,Hedmark,400,Sør-Odal,419,,,
4,26074,425-2-2,Kottern,12.517008,60.590729,6719770.0,364003.000014,33,0.1384,480.0,KOTTERN,3794,Hedmark,400,Åsnes,425,,,
