# Getting basins for all NVE discharge-measuring stations
This notebook assumes that NVE info has been downloaded using the get_nve_data.ipynb notebook in this folder

## Loading required packages and getting credentials to connect to the database

In [1]:
import requests
import json
import matplotlib.pyplot as plt
import datetime
import pandas as pd
from datetime import datetime
from datetime import timedelta
from fabric import Connection
import psycopg2
import gmaps
import pandas.io.sql as sqlio
import getpass
from io import StringIO
import paramiko
import sys
from time import sleep

sys.path.insert(0, "/home/jovyan/watexr/PROGNOS/")
from prognos_tools.gce_light import gce_api as gce

key = getpass.getpass('mobiserver password: ')
cloudKey = getpass.getpass('vault password: ')
#Querying necessary tokens
def query(query,fetch=True):
    with psycopg2.connect(user='jose-luis', host='mobiserver.niva.no', port=5432, database='vault',password=key) as db:
        with db.cursor() as cursor :
            cursor.execute(query)
            if fetch:
                result = sqlio.read_sql_query(query, db)
                return result
            
gmapsKey = query('''select niva.getToken('gmaps','{}');'''.format(cloudKey)).iloc[0,0]
sshKey = query('''select niva.getToken('geonorway_ssh_key','{}');'''.format(cloudKey)).iloc[0,0]
cloudKey = json.loads(query('''select niva.getToken('gce_access','{}');'''.format(cloudKey)).iloc[0,0])
gmaps.configure(gmapsKey)
not_really_a_file = StringIO(sshKey)
private_key = paramiko.RSAKey.from_private_key(not_really_a_file)
del key,sshKey

mobiserver password:  ·······
vault password:  ···············


## Starting the instance containing the database using the google compute engine api

In [2]:
#Check status of instance
properties = {'project'      : 'nivacatchment',
             'zone'         : 'europe-north1-a',
             'instanceType' : "n1-standard-4",
             'instanceName' : "dtm10",
             'username'     : "jose-luis",
             }

cloud = gce(properties, cloudKey)
del cloudKey

#Getting instance info
cloud.CommonCalls['custom'] = '''https://compute.googleapis.com/compute/v1/projects/{project}/zones/{zone}/instances/{instanceName}'''
info = cloud.get('custom')
display(info['status'])
#If instance is stopped, start it
if info['status'] != 'RUNNING':
    cloud.CommonCalls['custom'] = '''https://compute.googleapis.com/compute/v1/projects/{project}/zones/{zone}/instances/{instanceName}/start'''
    info = cloud.post('custom')
    display(info['status'])
    cloud.CommonCalls['custom'] = '''https://compute.googleapis.com/compute/v1/projects/{project}/zones/{zone}/instances/{instanceName}'''
    info = cloud.get('custom')
    while info['status'] != 'RUNNING':
           sleep(2)
           info = cloud.get('custom')
          
geonorway = info['networkInterfaces'][0]['accessConfigs'][0]['natIP']    
    
display("IP of the instance containing the database {}:".format(geonorway))

#The config below can be used to connect to the instance using fabric's Connection
config =  {'host' : geonorway, 'user': 'jose-luis', 'connect_kwargs': {'pkey': private_key } }

'RUNNING'

'IP of the instance containing the database 35.228.213.48:'

## Getting the coordinates of NVE's discharge-measuring stations and the basin they belong to

In [3]:
#Helper function to query the database and return a pandas dataframe if required.
def query(query,fetch=True):
    with psycopg2.connect(user='jose-luis', host=geonorway, port=5432, database='geonorway') as db:
        cursor = db.cursor()
        cursor.execute(query)
        if fetch:
            result = sqlio.read_sql_query(query, db)
            return result
        
#Schema where the nve data resides
schema = 'nveq'

#Querying the coordinates of the stations and the basin they are in:
sql = '''with bla as
(select stationid,stationname,st_transform(geom,3045) as geom from {}.stations)
select b.stationid,a.gid,a.vassomr,st_area(a.geom)/1e6 as area
from nedborfelt.vassdragsomr as a, bla as b 
where st_intersects(a.geom,b.geom);'''.format(schema)

a = query(sql)
display(a)

Unnamed: 0,stationid,gid,vassomr,area
0,1.198.0,1,Haldenvassdraget/Iddefjorden,2507.380211
1,1.200.0,1,Haldenvassdraget/Iddefjorden,2507.380211
2,1.48.0,1,Haldenvassdraget/Iddefjorden,2507.380211
3,1.49.0,1,Haldenvassdraget/Iddefjorden,2507.380211
4,1.50.0,1,Haldenvassdraget/Iddefjorden,2507.380211
...,...,...,...,...
597,97.5.0,97,"Storfjorden sør, Hjørundfjorden og Sykkylvsfjo...",868.995031
598,98.4.0,98,"Storfjorden sør, Sunnylvsfjorden og Geirangerf...",1003.641202
599,99.1.0,99,Tafjordvassdraget/Tafjorden og Norddalsfjorden...,643.801323
600,99.17.0,99,Tafjordvassdraget/Tafjorden og Norddalsfjorden...,643.801323


In [4]:
# Checking if all stations fall inside a basin. The query should be empty if all stations fall inside a basin
sql = '''with bla as
(select stationid,stationname,st_transform(geom,3045) as geom from {}.stations)
select b.stationid,b.stationname from bla as b 
where b.stationid not in (select b.stationid from bla as b, nedborfelt.vassdragsomr as a where st_intersects(a.geom,b.geom));'''.format(schema)

display(query(sql))

Unnamed: 0,stationid,stationname
0,19.244.0,Nye Jørundland


In [5]:
#There is a problem with Nye Jørundland, it appears that the latitude is wrong. This will need fixing
display(query('''select * from nveq.stations where stationid='19.244.0'  '''))

#This query gets the coordinates directly from the metadata that was obtained from HydAPI
a = query('''with bla as
(
select json_array_elements(metadata->'data') as singleelement from nveq.metadata
)
select singleelement->'stationId' as sid, singleelement->'stationName' as name ,singleelement->'parameterName' as parameter,
singleelement->'longitude' as longitude,singleelement->'latitude' as latitude,
singleelement->'utmEast_Z33' as utme,singleelement->'utmNorth_Z33' as utmn
from  bla as a 
where (a.singleelement->>'stationId') = '19.244.0'
''')
display(a)

Unnamed: 0,sid,stationname,stationid,latitude,longitude,geom
0,241,Nye Jørundland,19.244.0,1.11498,5.61744,0101000020E61000000EF8FC3042781640F41ABB44F5D6...


Unnamed: 0,sid,name,parameter,longitude,latitude,utme,utmn
0,19.244.0,Nye Jørundland,Vannstand,5.61744,1.11498,-548573,124922
1,19.244.0,Nye Jørundland,Vannføring,5.61744,1.11498,-548573,124922


In [6]:
if not a.empty:
    from pyproj import Transformer
    transformer = Transformer.from_crs("epsg:25833", "epsg:4326")
    new_coord = transformer.transform(a.loc[0,'utme'],a.loc[0,'utmn'])
    display(new_coord)

(1.1149882510510696, 5.617439450536286)

## Creating table with outlet coordinates
A common problem with coordinates is that they do fall on an actual "river". In order for the basin delineation algorithm to work, the given coordinates should fall inside a cell considered a river in the flow accumulation raster. 
In order to guarantee this we will nudge, if necessary, the given coordinates so they fall in a cell considered to be a river. The process is not fool proof and the results should be quality controlled. 

In [7]:
#We will not include the Nye Jørundland stations until we get the right coordinates for it
#Please note that in the outlet table we will setup the srid of the outlet to 3035 since
#that is the epsg of the rasters we use to compute the basins
sql = '''drop table if exists {schema}.outlet;
create table {schema}.outlet as select sid, stationname, stationid, latitude, longitude, st_transform(geom,25833) as geom from {schema}.stations as a
where a.stationid != '19.244.0';
update {schema}.outlet
set latitude=st_y(geom),
longitude=st_x(geom);
'''.format(schema=schema)
query(sql,fetch=False)
display(query('''select * from {schema}.outlet;'''.format(schema=schema)))

Unnamed: 0,sid,stationname,stationid,latitude,longitude,geom
0,235,Songedalsåi,19.104.0,6.594485e+06,100966.613073,0101000020E9640000B44525CF69A6F840C8C08352ED27...
1,1,Berg,1.198.0,6.649056e+06,299270.663130,0101000020E9640000798B0BA71A441241F067B407385D...
2,2,Lierelv,1.200.0,6.647623e+06,306146.133013,0101000020E9640000A09E348888AF124171F317DFD15B...
3,3,Ørje,1.48.0,6.598578e+06,310319.289430,0101000020E964000030666028BDF01241800E367AEC2B...
4,4,Brekke,1.49.0,6.561574e+06,303059.849291,0101000020E9640000929AAC654F7F12414BD08182C907...
...,...,...,...,...,...,...
597,599,Sleddalen,97.5.0,6.911719e+06,54298.943804,0101000020E964000050F7A3335E83EA402FB342DAB95D...
598,600,Øye ndf.,98.4.0,6.908019e+06,79130.031111,0101000020E964000054E66D7FA051F340EA7CDACE1C5A...
599,601,Onilsavatn,99.1.0,6.921633e+06,106860.032109,0101000020E9640000DC478483C016FA40399B10326867...
600,602,Rødøla,99.17.0,6.918836e+06,110112.804136,0101000020E96400008441BDDD0CE2FA404CD863F8AC64...


In [8]:
#Loading the flow accumulation raster locally. We will do the computation one basin at a time. We exclude the stations in Svalbard
#for which we have no elevation data
sql = '''with bla as
(select sid,stationid,stationname,st_transform(geom,3045) as geom from {schema}.stations)
select distinct a.gid, array_agg(sid) as sids
from nedborfelt.vassdragsomr as a, bla as b 
where st_intersects(a.geom,b.geom)
and st_y(b.geom) < 8e6
group by a.gid;'''.format(schema = schema)

a = query(sql)
display(a)

Unnamed: 0,gid,sids
0,1,"[5, 4, 3, 2, 1]"
1,2,"[336, 338, 339, 340, 313, 312, 311, 310, 309, ..."
2,3,"[434, 433]"
3,5,"[480, 483, 481, 482]"
4,6,"[500, 501, 499, 498, 497, 496, 495, 494, 493, ..."
...,...,...
160,250,[436]
161,254,"[438, 437]"
162,255,[439]
163,258,"[440, 443, 444, 441]"


In [9]:
#Processing all stations within one basin

#Creating temporary directory to download data and place intermediary files
with Connection('localhost') as c:
    c.local('rm -rf geodata && mkdir geodata')


for i,j in a[:1].iterrows():
    gid = j['gid']
    sids = j['sids']
    display(gid,sids)
    with Connection(**config) as c:
        c.get('/home/jose-luis/flatLake/basin_{}_flow_acc.tif'.format(gid),'./geodata/')

1

[5, 4, 3, 2, 1]

In [10]:
display(query('''select st_srid(geom) from nedborfelt.vassdragsomr  limit 1;'''))

Unnamed: 0,st_srid
0,3045


In [12]:
import tifffile as tif

a = tif.imread('geodata/basin_1_flow_acc.tif')
display(a.shape)
# array = geoTiff.read()


(16050, 4150)

In [13]:
dir(a)


['T',
 '__abs__',
 '__add__',
 '__and__',
 '__array__',
 '__array_finalize__',
 '__array_function__',
 '__array_interface__',
 '__array_prepare__',
 '__array_priority__',
 '__array_struct__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__complex__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__ilshift__',
 '__imatmul__',
 '__imod__',
 '__imul__',
 '__index__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__irshift__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lshift__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__

In [17]:
a.imag.shape

(16050, 4150)

In [18]:
a.itemset

<function ndarray.itemset>

In [19]:
a.tolist

<function ndarray.tolist>

In [28]:
import tifffile as tif
with tif.TiffFile('geodata/basin_1_flow_acc.tif') as tif:
    for page in tif.pages:
        for tag in page.tags:
            tag_name, tag_value = tag.name, tag.value
            display(tag_name)


'ImageWidth'

'ImageLength'

'BitsPerSample'

'Compression'

'PhotometricInterpretation'

'StripOffsets'

'SamplesPerPixel'

'RowsPerStrip'

'StripByteCounts'

'PlanarConfiguration'

'Predictor'

'SampleFormat'

'ModelPixelScaleTag'

'ModelTiepointTag'

'GeoKeyDirectoryTag'

'GeoDoubleParamsTag'

'GeoAsciiParamsTag'

'GDAL_NODATA'

In [34]:
import glob
with Connection(**config) as c:
    c.run('rm -rf era5 && mkdir era5')
    for file in glob.glob('/home/jovyan/shared/era5/*.nc') :
        c.put(file,'./era5/')
    


