In [1]:
%matplotlib inline
import pandas as pd
import geopandas as gpd
import nivapy3 as nivapy
import matplotlib.pyplot as plt
import fiona
import numpy as np
import cartopy
import matplotlib
import cartopy.crs as ccrs
from geopy.distance import geodesic

plt.style.use("ggplot")

In [2]:
eng = nivapy.da.connect()

Username:  ···
Password:  ········


Connection successful.


# 1000 Lakes 2019: Linking Aquamonitor to RESA2

Now that most/all analyses are complete, we want to link the data in Aquamnonitor to RESA, and do some cleaning & checking in the process.

## 1. Check existing AM - RESA links

Most of the 2019 lakes have been sampled previously in 1995, so connections already exist in most cases. These need checking.

### 1.1. Match AM IDs to RESA

In [3]:
# Read latest station details from AM
am_xls = r"../../1000_Lakes_AM_Export_2020-05-14.xlsx"
am_df = pd.read_excel(am_xls, sheet_name="StationPoint")
am_df = am_df[["StationId", "StationCode", "StationName", "Latitude", "Longitude"]]
am_df.columns = ["am_id", "am_code", "am_name", "am_lat", "am_lon"]

print(len(am_df["am_id"].unique()), "stations in AM.")

am_df.head()

1002 stations in AM.


Unnamed: 0,am_id,am_code,am_name,am_lat,am_lon
0,26070,221-1-2,Langtjern,59.808643,11.850274
1,26071,101-2-7,Hokksjøen,59.004423,11.563586
2,26072,402-2-13,Sætertjern,60.060222,12.446711
3,26073,419-1-25,Mjøgsjøen,60.328578,11.842186
4,26074,425-2-2,Kottern,60.590729,12.517008


In [4]:
# Get RESA IDs for these stations where links already exist
bind_stns = ",".join("(1, %d)" % i for i in am_df["am_id"].unique())
sql = (
    "SELECT station_id as am_id, "
    "  local_pk as resa_id "
    " FROM nivadatabase.datasource_station "
    "WHERE datasource_id = 11 "
    "AND (1, station_id) in (%s)" % bind_stns
)
df = pd.read_sql(sql, eng)

print(len(df), "stations are already linked to RESA.")

df.head()

981 stations are already linked to RESA.


Unnamed: 0,am_id,resa_id
0,46105,17957
1,46120,18073
2,46323,17854
3,46550,18013
4,46620,17809


In [5]:
# Get details for matching stations in RESA
resa_df = nivapy.da.select_resa_stations(eng)
resa_df = resa_df.query("station_id in @df.resa_id.unique()")

resa_df = resa_df[
    ["station_id", "station_code", "station_name", "latitude", "longitude"]
]
resa_df.columns = ["resa_id", "resa_code", "resa_name", "resa_lat", "resa_lon"]

print(len(resa_df), "matching stations found in RESA.")

resa_df.head()

19688 stations in the RESA database.
981 matching stations found in RESA.


Unnamed: 0,resa_id,resa_code,resa_name,resa_lat,resa_lon
5,9,101-2-7,Hokksjøen,59.004421,11.563584
6,10,221-1-2,Langtjern,59.808647,11.850279
7,11,402-2-13,Sætertjern,60.06022,12.446706
8,12,419-1-25,Mjøgsjøen,60.328576,11.842177
9,13,425-2-2,Kottern,60.590732,12.517008


In [6]:
# Merge results
df = pd.merge(am_df, df, how="left", on="am_id")
df = pd.merge(df, resa_df, how="left", on="resa_id")

print(df["resa_id"].isna().sum(), "stations in AM are not yet linked to RESA.")
print(
    df["resa_lat"].isna().sum(),
    "stations in RESA do not have complete co-ordinate information.",
)

df.head()

21 stations in AM are not yet linked to RESA.
25 stations in RESA do not have complete co-ordinate information.


Unnamed: 0,am_id,am_code,am_name,am_lat,am_lon,resa_id,resa_code,resa_name,resa_lat,resa_lon
0,26070,221-1-2,Langtjern,59.808643,11.850274,10.0,221-1-2,Langtjern,59.808647,11.850279
1,26071,101-2-7,Hokksjøen,59.004423,11.563586,9.0,101-2-7,Hokksjøen,59.004421,11.563584
2,26072,402-2-13,Sætertjern,60.060222,12.446711,11.0,402-2-13,Sætertjern,60.06022,12.446706
3,26073,419-1-25,Mjøgsjøen,60.328578,11.842186,12.0,419-1-25,Mjøgsjøen,60.328576,11.842177
4,26074,425-2-2,Kottern,60.590729,12.517008,13.0,425-2-2,Kottern,60.590732,12.517008


### 1.2. Compare AM and RESA co-ordinates

In [7]:
def f(row):
    """ Calculate geodesic distance between locations in AM and RESA.
    """
    am_coords = (row["am_lat"], row["am_lon"])
    resa_coords = (row["resa_lat"], row["resa_lon"])
    return geodesic(am_coords, resa_coords).meters

In [8]:
# Add distances between points in AM and RESA
coord_df = df.dropna(subset=["am_lat", "am_lon", "resa_lat", "resa_lon"])
coord_df["distance_m"] = coord_df.apply(f, axis=1)

coord_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,am_id,am_code,am_name,am_lat,am_lon,resa_id,resa_code,resa_name,resa_lat,resa_lon,distance_m
0,26070,221-1-2,Langtjern,59.808643,11.850274,10.0,221-1-2,Langtjern,59.808647,11.850279,0.566628
1,26071,101-2-7,Hokksjøen,59.004423,11.563586,9.0,101-2-7,Hokksjøen,59.004421,11.563584,0.316569
2,26072,402-2-13,Sætertjern,60.060222,12.446711,11.0,402-2-13,Sætertjern,60.06022,12.446706,0.388633
3,26073,419-1-25,Mjøgsjøen,60.328578,11.842186,12.0,419-1-25,Mjøgsjøen,60.328576,11.842177,0.547998
4,26074,425-2-2,Kottern,60.590729,12.517008,13.0,425-2-2,Kottern,60.590732,12.517008,0.263258


### 1.3. Compare AM and RESA lake IDs from NVE

In [9]:
# Get layers in NVE lakes GDB
gdb_path = r"../../NVEData.gdb"
layer_list = fiona.listlayers(gdb_path)
print(layer_list)

['Innsjo']


In [10]:
# Read lakes dataset
gdf = gpd.read_file(gdb_path, layer="Innsjo")

# Link AM coords to lakes
coord_df = nivapy.spatial.identify_point_in_polygon(
    coord_df,
    gdf,
    pt_col="am_id",
    poly_col="vatnLnr",
    lat_col="am_lat",
    lon_col="am_lon",
)

# Join NVE name
coord_df = pd.merge(coord_df, gdf[["vatnLnr", "navn"]], how="left", on="vatnLnr")

coord_df.rename({"vatnLnr": "am_nve_id", "navn": "am_nve_name"}, axis=1, inplace=True)

# Link RESA coords to lakes
coord_df = nivapy.spatial.identify_point_in_polygon(
    coord_df,
    gdf,
    pt_col="resa_id",
    poly_col="vatnLnr",
    lat_col="resa_lat",
    lon_col="resa_lon",
)

# Join NVE name
coord_df = pd.merge(coord_df, gdf[["vatnLnr", "navn"]], how="left", on="vatnLnr")

coord_df.rename(
    {"vatnLnr": "resa_nve_id", "navn": "resa_nve_name"}, axis=1, inplace=True
)

coord_df.head()

Unnamed: 0,am_id,am_code,am_name,am_lat,am_lon,resa_id,resa_code,resa_name,resa_lat,resa_lon,distance_m,am_nve_id,am_nve_name,resa_nve_id,resa_nve_name
0,26070,221-1-2,Langtjern,59.808643,11.850274,10.0,221-1-2,Langtjern,59.808647,11.850279,0.566628,3208.0,Langtjenn,3208.0,Langtjenn
1,26071,101-2-7,Hokksjøen,59.004423,11.563586,9.0,101-2-7,Hokksjøen,59.004421,11.563584,0.316569,3608.0,Hokksjøen,3608.0,Hokksjøen
2,26072,402-2-13,Sætertjern,60.060222,12.446711,11.0,402-2-13,Sætertjern,60.06022,12.446706,0.388633,4332.0,Sætertjenn,4332.0,Sætertjenn
3,26073,419-1-25,Mjøgsjøen,60.328578,11.842186,12.0,419-1-25,Mjøgsjøen,60.328576,11.842177,0.547998,4055.0,Mjøgsjøen,4055.0,Mjøgsjøen
4,26074,425-2-2,Kottern,60.590729,12.517008,13.0,425-2-2,Kottern,60.590732,12.517008,0.263258,3794.0,Kottern,3794.0,Kottern


## 2. Quality assess existing AM - RESA links

### 2.1. Identify inconsistent stations

In [11]:
# Stations where codes don't match
coord_df.query("am_code != resa_code")

Unnamed: 0,am_id,am_code,am_name,am_lat,am_lon,resa_id,resa_code,resa_name,resa_lat,resa_lon,distance_m,am_nve_id,am_nve_name,resa_nve_id,resa_nve_name
119,26199,419-3-2,Storbørja,60.091,11.912,191.0,402-604,Storbørja,60.091292,11.927417,858.498227,368.0,Storbørja,,


In [12]:
# Stations where NVE lake IDs don't match
coord_df.query("am_nve_id != resa_nve_id").dropna(subset=["resa_nve_id"])

Unnamed: 0,am_id,am_code,am_name,am_lat,am_lon,resa_id,resa_code,resa_name,resa_lat,resa_lon,distance_m,am_nve_id,am_nve_name,resa_nve_id,resa_nve_name
109,26186,402-1-28,Eintjern,60.0724,12.368182,3027.0,402-1-28,Eintjenn,60.074,12.345,1302.937084,4321.0,Eintjenn,4313.0,Kolbjørnsrudsjøen
328,26423,912-2-27,Ljøsvannet,61.13243,10.870015,3280.0,912-2-27,Ljøsvannet,58.757,8.901,286588.882538,33055.0,Store Ljøsvatnet,9183.0,Ljøsvannet
709,26821,1740-2-9,Storskorovatn,64.647186,13.060158,3707.0,1740-2-9,Storskorovatn,64.67,13.024,3074.629718,39432.0,Storskorovatnet,39394.0,Åttatjønna
748,26860,1836-2-14,Storvatnet,66.656207,13.226925,3749.0,1836-2-14,Storevatnet,66.663,13.247,1167.165665,44357.0,Storvatnet,44350.0,Litlvatnet
770,26882,1854-2-16,Stopålvatnet,68.362866,16.590626,3772.0,1854-2-16,Storpålvatnet,68.37,16.646,2414.06529,48666.0,Stopålvatnet,48662.0,Holmvatnet
944,27064,1825-1-9,Sivtjønna,65.176841,13.240496,3735.0,1825-1-9,Sivtjørna,65.179,13.252,590.397703,43247.0,Sivtjønna,43245.0,


In [13]:
# Stations where NVE lake IDs don't match because resa_nve_id is NaN
coord_df.query(
    "(am_nve_id != resa_nve_id) and (resa_nve_id != resa_nve_id)"
).sort_values("distance_m", ascending=False)

Unnamed: 0,am_id,am_code,am_name,am_lat,am_lon,resa_id,resa_code,resa_name,resa_lat,resa_lon,distance_m,am_nve_id,am_nve_name,resa_nve_id,resa_nve_name
962,45671,2030-623,St. Sametti,69.471997,29.632787,15860.0,2030-623,ST.SAMETTI,69.488029,27.084386,99743.917606,2447.0,Sámetjávri,,
300,26390,829-4-6,Kviteseidvatnet,59.349741,8.580646,3244.0,829-4-6,Kviteseidvatnet,59.382,8.465,7494.002681,8.0,Kviteseidvatnet,,
900,27014,1711-3-1,Skurdalsvatnet,63.364094,12.090835,3918.0,1711-3-1,Skurdalsvatnet,63.372,12.122,1791.074048,736.0,Skurdalssjøen,,
202,26286,545-4-1,Bygdin,61.363,8.386,3134.0,545-4-1,Bygdin,61.349816,8.399882,1646.212866,146.0,Bygdin,,
75,26151,1640-603,Tufsingen,62.606772,11.89418,116.0,1640-603,Tufsingen,62.612294,11.876558,1094.271435,35326.0,Tufsingen,,
498,26606,1228-1-12,HOH 1367,60.028612,6.822722,3474.0,1228-1-12,HOH 1367,60.029401,6.840693,1005.730756,27782.0,,,
119,26199,419-3-2,Storbørja,60.091,11.912,191.0,402-604,Storbørja,60.091292,11.927417,858.498227,368.0,Storbørja,,
74,26150,831-501,Brårvatn,59.29747,7.714567,115.0,831-501,Brårvatn,59.294921,7.727118,769.458227,14277.0,Brårvatn,,
76,26152,1228-501,Steinavatn,59.856352,6.571981,135.0,1228-501,Steinavatn,59.859877,6.578783,547.361873,1705.0,Steinavatnet,,
956,27076,1640-3-7,Midtre Muggsjøen,62.438933,12.172615,3917.0,1640-3-7,Midtre Muggsjøen,62.441,12.18,445.589577,1360.0,Midtre Muggsjøen,,


### 2.2. Map inconsistent sites

In [14]:
# All sites where code or NVE IDs do not match
err_df = coord_df.query("(am_code != resa_code) or (am_nve_id != resa_nve_id)")

# Convert co-ords to UTM Zone 33N
# AM
err_df = nivapy.spatial.wgs84_dd_to_utm(err_df, lat="am_lat", lon="am_lon")
err_df.rename({"utm_north": "am_north", "utm_east": "am_east"}, axis=1, inplace=True)

# RESA
err_df = nivapy.spatial.wgs84_dd_to_utm(err_df, lat="resa_lat", lon="resa_lon")
err_df.rename(
    {"utm_north": "resa_north", "utm_east": "resa_east"}, axis=1, inplace=True
)

err_df.head()

Unnamed: 0,am_id,am_code,am_name,am_lat,am_lon,resa_id,resa_code,resa_name,resa_lat,resa_lon,distance_m,am_nve_id,am_nve_name,resa_nve_id,resa_nve_name,am_north,am_east,utm_zone,resa_north,resa_east
16,26087,604-3-3,Hengsvatnet,59.656185,9.441521,26.0,604-3-3,Hengsvatnet,59.656902,9.443625,143.010561,398.0,Hengsvannet,,,6626241.0,186990.580571,33,6626311.0,187115.547496
37,26110,1201-1-23,Knappatjørna,60.329958,5.262102,53.0,1201-1-23,HOH 60,60.329992,5.262997,49.602367,26786.0,Knappatjørna,,,6727909.0,-36411.253686,33,6727905.0,-36361.622781
74,26150,831-501,Brårvatn,59.29747,7.714567,115.0,831-501,Brårvatn,59.294921,7.727118,769.458227,14277.0,Brårvatn,,,6595877.0,85588.002216,33,6595516.0,86269.002008
75,26151,1640-603,Tufsingen,62.606772,11.89418,116.0,1640-603,Tufsingen,62.612294,11.876558,1094.271435,35326.0,Tufsingen,,,6945615.0,340615.000056,33,6946273.0,339740.999859
76,26152,1228-501,Steinavatn,59.856352,6.571981,135.0,1228-501,Steinavatn,59.859877,6.578783,547.361873,1705.0,Steinavatnet,,,6665470.0,28716.007085,33,6665812.0,29145.007789


In [15]:
def chunker(df, size):
    """ Split a dataframe into chunks of length 'size'.
    """
    return (df[pos: pos + size] for pos in range(0, len(df), size))

In [16]:
# Size of square buffer around monitoring points
buff = 500

# Loop over chunks
for df_idx, df_part in enumerate(chunker(err_df.reset_index(drop=True), 20)):
    fig = plt.figure(figsize=(15, 20))

    # Loop over stations
    for idx, row in df_part.reset_index(drop=True).iterrows():
        zone = row["utm_zone"]
        am_north = row["am_north"]
        am_east = row["am_east"]
        resa_north = row["resa_north"]
        resa_east = row["resa_east"]

        # Get bounding box
        xmin = min(resa_east, am_east)
        xmax = max(resa_east, am_east)
        ymin = min(resa_north, am_north)
        ymax = max(resa_north, am_north)

        # Define co-ord system
        crs = ccrs.UTM(zone)

        # Plot
        ax = fig.add_subplot(5, 4, idx + 1, projection=crs)
        ax.set_extent([xmin - buff, xmax + buff, ymin - buff, ymax + buff], crs=crs)

        ax.add_wms(
            wms="https://openwms.statkart.no/skwms1/wms.topo4", layers=["topo4_WMS"]
        )

        ax.scatter(am_east, am_north, s=50, c="r", edgecolors="k", transform=crs)
        ax.scatter(
            resa_east, resa_north, s=50, c="yellow", edgecolors="k", transform=crs
        )

        ax.set_title(
            f"AM code {row['am_code']}\nSeparation: {int(row['distance_m'])} m",
            fontsize=12,
        )

    # Save
    out_png = r"../../grid_maps/resa_am_mismatch/grid_plot_%03d.png" % (df_idx + 1)
    plt.savefig(out_png, dpi=150)
    plt.clf()
    plt.close()

In [17]:
err_df.query('am_code =="402-1-28"')

Unnamed: 0,am_id,am_code,am_name,am_lat,am_lon,resa_id,resa_code,resa_name,resa_lat,resa_lon,distance_m,am_nve_id,am_nve_name,resa_nve_id,resa_nve_name,am_north,am_east,utm_zone,resa_north,resa_east
109,26186,402-1-28,Eintjern,60.0724,12.368182,3027.0,402-1-28,Eintjenn,60.074,12.345,1302.937084,4321.0,Eintjenn,4313.0,Kolbjørnsrudsjøen,6662390.0,353549.999798,33,6662620.0,352267.65293
