In [1]:
%matplotlib inline

import geopandas as gpd
import matplotlib.pyplot as plt
import nivapy3 as nivapy
import pandas as pd

plt.style.use("ggplot")

# TEOTIL2: Update Regine-Kommune links

Kommune definitions in Norway change regularly. This notebook updates `regine_{year}.csv`, which is one of the "core" input datasets for TEOTIL2, located here

    ../../../teotil2/data/core_input_data
    
**This notebook will need running most years**. To find the kommune boundaries for the year of interest, search Geonorge for "[Administrative enheter kommuner](https://kartkatalog.geonorge.no/metadata?text=Administrative%20enheter%20kommuner)" and choose your year of interest. It is best to download the data as a File Geodatabase (in ETRS89 UTM Zone 33) and then export just the kommune polygons as a shapefile. Previous versions are here:

    ../../../Data/gis/shapefiles

Note that regine codes and properties **remain unchanged** in this analysis - the only difference is that I will update the kommune numbers associated with each regine in the `komnr` column.

It looks as though in TEOTIL, each regine should be associated with one *and only one* kommune ID. I will therefore assign the kommune ID that covers the largest part of each catchment by area.

In [2]:
# Year of interest
year = 2022

In [3]:
# Kommune data from Geonorge
kom_shp = f"/home/jovyan/shared/common/JES/teotil2_data/gis/shapefiles/kommune_{year}.shp"
kom_gdf = gpd.read_file(kom_shp)
kom_gdf = kom_gdf[["kommunenum", "geometry"]]
kom_gdf.head()

Unnamed: 0,kommunenum,geometry
0,5031,"POLYGON ((292666.430 7030892.790, 292665.550 7..."
1,4636,"POLYGON ((-83202.730 6837229.930, -68694.430 6..."
2,3019,"POLYGON ((253411.310 6612055.150, 253424.050 6..."
3,3032,"POLYGON ((273757.930 6667604.770, 273744.930 6..."
4,3043,"POLYGON ((132165.210 6747704.890, 132188.050 6..."


In [4]:
# Regine catchment datasets for TEOTIL
reg_shp = r"/home/jovyan/shared/common/JES/teotil2_data/gis/shapefiles/RegMinsteF.shp"
reg_gdf = gpd.read_file(reg_shp).to_crs("epsg:25833")
reg_gdf = reg_gdf[["VASSDRAGNR", "geometry"]]
reg_gdf.head()

Unnamed: 0,VASSDRAGNR,geometry
0,067.62,"POLYGON ((-38501.560 6783604.030, -37807.890 6..."
1,019.F211,"POLYGON ((127984.120 6573462.090, 128145.600 6..."
2,077.1E,"POLYGON ((87091.190 6815443.640, 87094.910 681..."
3,196.2A2A,"POLYGON ((630710.870 7690571.190, 630714.570 7..."
4,152.2D2B3,"POLYGON ((424201.310 7329879.500, 424075.940 7..."


In [5]:
# Intersect
int_gdf = gpd.overlay(reg_gdf, kom_gdf, how="intersection")
int_gdf.head()

Unnamed: 0,VASSDRAGNR,kommunenum,geometry
0,067.62,4631,"POLYGON ((-33266.550 6778409.630, -34690.214 6..."
1,067.2B1B,4631,"POLYGON ((-6745.040 6780008.500, -6788.940 677..."
2,059.4,4631,"MULTIPOLYGON (((-48468.010 6759307.750, -48422..."
3,059.1,4631,"POLYGON ((-45256.660 6762302.620, -44845.060 6..."
4,068.10,4631,"POLYGON ((-38816.950 6783832.480, -39361.440 6..."


In [6]:
print(len(kom_gdf))
print(len(reg_gdf))
print(len(int_gdf))

363
20203
27438


In [7]:
# Calculate area
int_gdf["area_km2"] = int_gdf.to_crs({"proj": "cea"})["geometry"].area / 1e6

# Get cols of interest
df = int_gdf[["VASSDRAGNR", "kommunenum", "area_km2"]].copy()

# Sort ascending by area
df.sort_values("area_km2", inplace=True)

# Drop duplicates, keeping the biggest area for each catchment
df.drop_duplicates("VASSDRAGNR", keep="last", inplace=True)

# Tidy
df.rename(
    {"VASSDRAGNR": "regine", "kommunenum": "komnr2"}, axis="columns", inplace=True
)
del df["area_km2"]

df.head()

Unnamed: 0,regine,komnr2
25093,001.1A6,3001
1553,246.A6,5444
6363,031.AB21A50,1108
13900,159.8AAAAA,1837
6210,173.F,1806


In [8]:
# Read previous TEOTIL input dataset
teo_csv = f"/home/jovyan/shared/common/JES/teotil2/data/core_input_data/regine_{year - 1}.csv"
teo_df = pd.read_csv(teo_csv, sep=";", encoding="utf-8")
teo_df.head()

Unnamed: 0,regine,regine_ned,a_reg_km2,q_sp_m3/s/km2,runoff_mm/yr,q_reg_m3/s,vassom,komnr,fylke,ospar_region
0,001.,1_2,0.0,0.0,0.0,0.0,0,0,0,0
1,001.10,001.,1.41,0.014,441.504,0.01974,1,3001,1,Skagerrak
2,001.1A1,001.10,1.16,0.014,441.504,0.01624,1,3001,1,Skagerrak
3,001.1A20,001.1A1,0.35,0.01,315.36,0.0035,1,3001,1,Skagerrak
4,001.1A2A,001.1A20,17.4,0.012,378.432,0.2088,1,3001,1,Skagerrak


In [9]:
# Join to new result
new_df = pd.merge(teo_df, df, how="left", on="regine")
new_df.head()

Unnamed: 0,regine,regine_ned,a_reg_km2,q_sp_m3/s/km2,runoff_mm/yr,q_reg_m3/s,vassom,komnr,fylke,ospar_region,komnr2
0,001.,1_2,0.0,0.0,0.0,0.0,0,0,0,0,
1,001.10,001.,1.41,0.014,441.504,0.01974,1,3001,1,Skagerrak,3001.0
2,001.1A1,001.10,1.16,0.014,441.504,0.01624,1,3001,1,Skagerrak,3001.0
3,001.1A20,001.1A1,0.35,0.01,315.36,0.0035,1,3001,1,Skagerrak,3001.0
4,001.1A2A,001.1A20,17.4,0.012,378.432,0.2088,1,3001,1,Skagerrak,3001.0


In [10]:
# Patch gaps in the newly calculated values with the old values
new_df["komnr2"] = new_df["komnr2"].combine_first(new_df["komnr"])

# Convert to int for consistency with old dataset
new_df["komnr2"] = new_df["komnr2"].astype(int)

# Replace old col with new and tidy
new_df["komnr"] = new_df["komnr2"]
del new_df["komnr2"]

new_df.head()

Unnamed: 0,regine,regine_ned,a_reg_km2,q_sp_m3/s/km2,runoff_mm/yr,q_reg_m3/s,vassom,komnr,fylke,ospar_region
0,001.,1_2,0.0,0.0,0.0,0.0,0,0,0,0
1,001.10,001.,1.41,0.014,441.504,0.01974,1,3001,1,Skagerrak
2,001.1A1,001.10,1.16,0.014,441.504,0.01624,1,3001,1,Skagerrak
3,001.1A20,001.1A1,0.35,0.01,315.36,0.0035,1,3001,1,Skagerrak
4,001.1A2A,001.1A20,17.4,0.012,378.432,0.2088,1,3001,1,Skagerrak


In [11]:
out_csv = f"/home/jovyan/shared/common/JES/teotil2/data/core_input_data/regine_{year}.csv"
new_df.to_csv(out_csv, index=False, sep=";", encoding="utf-8")