In [1]:
%matplotlib inline
import geopandas as gpd
import geopandas.tools
import matplotlib.pyplot as plt
import nivapy3 as nivapy
import numpy as np
import pandas as pd
import pyproj
import seaborn as sn
import useful_rid_code as rid
from shapely.geometry import Point
from sqlalchemy import types

In [2]:
# Connect to db
eng = nivapy.da.connect()

Username:  ···
Password:  ········


Connection successful.


# Updating co-ordinates and regine IDs for point sources already in the database

`RESA2.RID_PUNKTKILDER` contains major industrial point sources (industry and sewage treatment). Some of these locations were originally reported and added to the database without co-ordinates, and they therefore do not have regine IDs assigned. Subsequent data submissions have provided co-ordinates, but these are not updated if the site ID is already in the database. This code reads the "store anlegg" data for 2016 - 2019 and updates co-ordianates if they are available.

In [3]:
# Get all sites with co-ords submitted 2016 - 2019
df_list = []
for year in range(2016, 2019):
    xl_path = f"../../../Data/point_data_{year}/avlop_stor_anlegg_{year}_raw.xlsx"
    df = pd.read_excel(xl_path, sheet_name=f"store_anlegg_{year}")
    df.dropna(subset=["Sone", "UTM_E", "UTM_N"], inplace=True)
    df = df[["ANLEGGSNR", "ANLEGGSNAVN", "Kommunenr", "Sone", "UTM_E", "UTM_N"]]
    df.columns = ["anlegg_nr", "anlegg_navn", "komm_no", "zone", "east", "north"]
    df.drop_duplicates(inplace=True)
    df = rid.utm_to_wgs84_dd(df, "zone", "east", "north")
    del df["zone"], df["east"], df["north"]
    df_list.append(df)

df = pd.concat(df_list, axis="rows")
df.drop_duplicates(inplace=True)

# Assign regine ID
reg_shp_path = r"../../../Data/gis/shapefiles/reg_minste_f_wgs84.shp"
df = rid.identify_point_in_polygon(
    df, reg_shp_path, "anlegg_nr", "VASSDRAGNR", "lat", "lon"
)
df.drop_duplicates(subset=["anlegg_nr", "VASSDRAGNR"], inplace=True)

# Rename cols to match RESA2
df.rename(
    {
        "komm_no": "kno",
        "VASSDRAGNR": "regine",
        "lon": "lon_utl",
        "lat": "lat_utl",
    },
    inplace=True,
    axis="columns",
)

df.head()

Unnamed: 0,anlegg_nr,anlegg_navn,kno,lat_utl,lon_utl,regine
0,0101AL02,Bakke,101.0,59.019598,11.443762,001.2220
2,0101AL06,Kornsjø,101.0,58.935184,11.668959,001.1J
3,0101AL07,Remmendalen avløpsanlegg,101.0,59.120864,11.360106,001.31Z
4,0104AL01,Kambo,104.0,59.474488,10.686496,003.20
6,0105AL00,Alvim Renseanlegg,105.0,59.273056,11.075773,002.A4


In [4]:
# Get stations already in the database with missing co-ords
sql = "SELECT * from resa2.rid_punktkilder WHERE regine IS NULL"
db_df = pd.read_sql(sql, eng)
print(len(db_df))
db_df.head()

129


Unnamed: 0,anlegg_nr,anlegg_navn,etat,opprettet,nedlagt,type,kno,adresse,postnr,regine,lon_utl,lat_utl,lon_anl,lat_anl,status
0,0215AL39,Båtstø vann og avløp,,,,RENSEANLEGG,,,,,,,,,
1,0301AL33,Wyllerløypa RA,,,,RENSEANLEGG,,,,,,,,,
2,0511AL71,Snøheim turisthytte,,,,RENSEANLEGG,,,,,,,,,
3,0513AL64,Billingen renseanlegg,,,,RENSEANLEGG,,,,,,,,,
4,0544AL41,Bjødnaholet RA,,,,RENSEANLEGG,,,,,,,,,


In [7]:
# Update db records if possible
for idx, row in db_df.iterrows():
    an_nr = row["anlegg_nr"]       
    up_df = df.query("anlegg_nr == @an_nr")       
    assert len(up_df) <= 1
    
    if len(up_df) == 1:
        regine = up_df.iloc[0]["regine"]
        lon = up_df.iloc[0]["lon_utl"]
        lat = up_df.iloc[0]["lat_utl"]
    
        sql = (f"UPDATE resa2.rid_punktkilder "
               f"SET regine = '{regine}', "
               f"  lon_utl = {lon}, "
               f"  lat_utl = {lat} "
               f"WHERE anlegg_nr = '{an_nr}'"
              )
        eng.execute(sql)