In [1]:
%matplotlib inline

import pandas as pd
import nivapy3 as nivapy
import geopandas as gpd
import matplotlib.pyplot as plt

plt.style.use('ggplot')

# NOPE: Update Regine-Kommune links

Kommune defintiions in Norway seem to change quite regularly. For example, in 2018 Nord-Trøndelag and Sør-Trøndelag merged to create Trøndelag, and many kommune numbers were reassigned. This notebook updates `regine.csv`, which is one of the "core" input datasets for NOPE, located here

    ./NOPE/NOPE_Core_Input_Data
    
Regine codes and properties **remain unchanged** in this analysis - the only difference is that I will update the kommuner numbers associated with each regine in the `komnr` column.

It looks as though in TEOTIL/NOPE, each regine should be associated with one *and only one* kommune ID. I will therefore assign the kommune ID that covers the largest part of each catchment by area.

In [2]:
# 2019 kommune data from https://kartkatalog.geonorge.no/metadata/041f1e6e-bdbc-4091-b48f-8a5990f3cc5b
kom_shp = r'../../../Data/gis/shapefiles/kommune_2019.shp'
kom_gdf = gpd.read_file(kom_shp)
kom_gdf = kom_gdf[['kommunenum', 'geometry']]
kom_gdf.head()

Unnamed: 0,kommunenum,geometry
0,2003,"POLYGON ((1032759.490 7861936.150, 1032760.490..."
1,1244,"POLYGON ((-79575.130 6709023.740, -70967.350 6..."
2,1848,"POLYGON ((463360.000 7536890.000, 490980.000 7..."
3,1845,"POLYGON ((503140.000 7500160.000, 504830.000 7..."
4,2002,"POLYGON ((1070382.500 7881474.170, 1075852.120..."


In [3]:
# Regine catchment datasets for NOPE
reg_shp = r'../../../Data/gis/shapefiles/RegMinsteF.shp'
reg_gdf = gpd.read_file(reg_shp)
reg_gdf = reg_gdf[['VASSDRAGNR', 'geometry']]
reg_gdf.head()

Unnamed: 0,VASSDRAGNR,geometry
0,067.62,"POLYGON ((-38501.560 6783604.030, -37807.890 6..."
1,019.F211,"POLYGON ((127984.120 6573462.090, 128145.600 6..."
2,077.1E,"POLYGON ((87091.190 6815443.640, 87094.910 681..."
3,196.2A2A,"POLYGON ((630710.870 7690571.190, 630714.570 7..."
4,152.2D2B3,"POLYGON ((424201.310 7329879.500, 424075.940 7..."


In [4]:
# Intersect
int_gdf = gpd.overlay(reg_gdf, kom_gdf, how='intersection')
int_gdf.head()

Unnamed: 0,VASSDRAGNR,kommunenum,geometry
0,067.62,1263,"POLYGON ((-33251.257 6778434.435, -33266.550 6..."
1,067.2B1B,1263,"POLYGON ((-6688.550 6780123.350, -6745.040 678..."
2,059.1,1263,"MULTIPOLYGON (((-32445.341 6752051.538, -32432..."
3,068.10,1263,"POLYGON ((-38712.340 6783756.707, -38816.950 6..."
4,066.2,1263,"POLYGON ((-42044.054 6775027.609, -41935.110 6..."


In [5]:
print(len(kom_gdf))
print(len(reg_gdf))
print(len(int_gdf))

428
20203
28091


In [6]:
# Calculate area 
int_gdf['area_km2'] = int_gdf['geometry'].area / 1E6

# Get cols of interest
df = int_gdf[['VASSDRAGNR', 'kommunenum', 'area_km2']]

# Sort ascending by area
df.sort_values('area_km2', inplace=True)

# Drop duplicates, keeping the biggest area for each catchment
df.drop_duplicates('VASSDRAGNR', keep='last', inplace=True)

# Tidy
df.rename({'VASSDRAGNR':'regine',
           'kommunenum':'komnr2'},
          axis='columns',
          inplace=True)
del df['area_km2']

df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(**kwargs)


Unnamed: 0,regine,komnr2
25143,001.1A6,101
1408,246.A6,2030
9462,031.AB21A50,1129
13143,159.8AAAAA,1837
5728,173.F,1805


In [7]:
# Read original NOPE input dataset
nope_csv = r'../../../NOPE/NOPE_Core_Input_Data/regine.csv'
nope_df = pd.read_csv(nope_csv, sep=';', encoding='utf-8')
nope_df.head()

Unnamed: 0,regine,regine_ned,a_reg_km2,q_sp_m3/s/km2,runoff_mm/yr,q_reg_m3/s,vassom,komnr,fylke
0,001.,1_2,0.0,0.0,0.0,0.0,0,0,0
1,001.10,001.,1.41,0.014,441.504,0.01974,1,101,1
2,001.1A1,001.10,1.16,0.014,441.504,0.01624,1,101,1
3,001.1A20,001.1A1,0.35,0.01,315.36,0.0035,1,101,1
4,001.1A2A,001.1A20,17.4,0.012,378.432,0.2088,1,101,1


In [8]:
# Join to new result
new_df = pd.merge(nope_df, df, how='left', on='regine')
new_df.head()

Unnamed: 0,regine,regine_ned,a_reg_km2,q_sp_m3/s/km2,runoff_mm/yr,q_reg_m3/s,vassom,komnr,fylke,komnr2
0,001.,1_2,0.0,0.0,0.0,0.0,0,0,0,
1,001.10,001.,1.41,0.014,441.504,0.01974,1,101,1,101.0
2,001.1A1,001.10,1.16,0.014,441.504,0.01624,1,101,1,101.0
3,001.1A20,001.1A1,0.35,0.01,315.36,0.0035,1,101,1,101.0
4,001.1A2A,001.1A20,17.4,0.012,378.432,0.2088,1,101,1,101.0


In [9]:
# Patch gaps in the newly calculated values with the old values
new_df['komnr2'] = new_df['komnr2'].combine_first(new_df['komnr'])

# Convert to int for consistency with old dataset
new_df['komnr2'] = new_df['komnr2'].astype(int)

# Replace old col with new and tidy
new_df['komnr'] = new_df['komnr2']
del new_df['komnr2']

new_df.head()

Unnamed: 0,regine,regine_ned,a_reg_km2,q_sp_m3/s/km2,runoff_mm/yr,q_reg_m3/s,vassom,komnr,fylke
0,001.,1_2,0.0,0.0,0.0,0.0,0,0,0
1,001.10,001.,1.41,0.014,441.504,0.01974,1,101,1
2,001.1A1,001.10,1.16,0.014,441.504,0.01624,1,101,1
3,001.1A20,001.1A1,0.35,0.01,315.36,0.0035,1,101,1
4,001.1A2A,001.1A20,17.4,0.012,378.432,0.2088,1,101,1


In [11]:
out_csv = r'../../../NOPE/NOPE_Core_Input_Data/regine_2018_onwards.csv'
new_df.to_csv(out_csv, index=False, sep=';', encoding='utf-8')