#### Spring

In [None]:
import geopandas
import pandas as pd
import numpy as np
from _config import *

cluster_file = r'C:\Users\kscar\Documents\GitHub\02807_Project\output\season_clustering\KMeans_clustering_season_1_seasonYear_2022.csv'
station_mapping = pd.read_csv(os.getenv('STATION_MAPPING'), dtype=str)
shape_file = Path(os.getenv('SHAPE_FILE'))
#cmap = "tab20"

In [None]:
all_catchments_gdf = geopandas.read_file(shape_file)
print(all_catchments_gdf.shape)
print(all_catchments_gdf.columns)
all_catchments_gdf.head()

In [None]:
cluster_mapping = pd.read_csv(cluster_file, dtype=str)
cluster_mapping.columns = ['id', 'cluster']
cluster_mapping.head()

In [None]:
new_column = []
for catchment in cluster_mapping['id'].values:
    new_column.append(np.int64(station_mapping[station_mapping["obsstednr"] == catchment]["Id15_v30"].values[0]))

cluster_mapping['Id15_v30'] = new_column
cluster_mapping.drop(columns=['id'], inplace=True)
display(cluster_mapping.head())

In [None]:
# filter out gauged catchments
idxs = all_catchments_gdf['Id15_v30'].isin([np.int64(x) for x in cluster_mapping['Id15_v30'].values])
gauged_catchments_gdf = all_catchments_gdf[idxs]
ungauged_catchments_gdf = all_catchments_gdf[idxs == False]
print(gauged_catchments_gdf.shape)
print(ungauged_catchments_gdf.shape)

In [None]:
merged_gdf = gauged_catchments_gdf.merge(cluster_mapping, on='Id15_v30')
merged_gdf['area'] = merged_gdf.geometry.area
print(merged_gdf.shape)
merged_gdf.head()

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 2, figsize=(8, 5))
merged_gdf.sort_values(by="area", ascending=False).plot(column="cluster", figsize=(25, 15), ax=ax[0], legend=True, cmap='Set1')
merged_gdf.sort_values(by="area", ascending=True).plot(column="cluster", figsize=(25, 15), ax=ax[1], legend=True, cmap='Set1')
ax[0].set_title("Smallest catchments on top")
ax[1].set_title("Largest catchments on top")
plt.tight_layout()

# Add boundary of Denmark using only external edges
denmark_boundary = all_catchments_gdf.boundary
denmark_boundary.plot(ax=ax[0], color='black', linewidth=0.02)
denmark_boundary.plot(ax=ax[1], color='black', linewidth=0.02)
# x-cord limit
ax[0].set_xlim(left=440000, right=730000)
ax[0].set_ylim(top=6.4*1e6, bottom=6.05*1e6)
ax[1].set_xlim(left=440000, right=730000)
ax[1].set_ylim(top=6.4*1e6, bottom=6.05*1e6)
ax[0].tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
ax[1].tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

# plt.savefig(str(Path(cluster_file).parent / Path(cluster_file).stem) +'.png', dpi=300, bbox_inches='tight')
plt.show()

#### Summer

In [None]:
cluster_file = r'C:\Users\kscar\Documents\GitHub\02807_Project\output\season_clustering\KMeans_clustering_season_2_seasonYear_2022.csv'
station_mapping = pd.read_csv(os.getenv('STATION_MAPPING'), dtype=str)
shape_file = Path(os.getenv('SHAPE_FILE'))

In [None]:
all_catchments_gdf = geopandas.read_file(shape_file)
print(all_catchments_gdf.shape)
print(all_catchments_gdf.columns)
all_catchments_gdf.head()

In [None]:
cluster_mapping = pd.read_csv(cluster_file, dtype=str)
cluster_mapping.columns = ['id', 'cluster']
cluster_mapping.head()

In [None]:
new_column = []
for catchment in cluster_mapping['id'].values:
    new_column.append(np.int64(station_mapping[station_mapping["obsstednr"] == catchment]["Id15_v30"].values[0]))

cluster_mapping['Id15_v30'] = new_column
cluster_mapping.drop(columns=['id'], inplace=True)
display(cluster_mapping.head())

In [None]:
# filter out gauged catchments
idxs = all_catchments_gdf['Id15_v30'].isin([np.int64(x) for x in cluster_mapping['Id15_v30'].values])
gauged_catchments_gdf = all_catchments_gdf[idxs]
ungauged_catchments_gdf = all_catchments_gdf[idxs == False]
print(gauged_catchments_gdf.shape)
print(ungauged_catchments_gdf.shape)

In [None]:
merged_gdf = gauged_catchments_gdf.merge(cluster_mapping, on='Id15_v30')
merged_gdf['area'] = merged_gdf.geometry.area
print(merged_gdf.shape)
merged_gdf.head()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(8, 5))
merged_gdf.sort_values(by="area", ascending=False).plot(column="cluster", figsize=(25, 15), ax=ax[0], legend=True, cmap='Set1')
merged_gdf.sort_values(by="area", ascending=True).plot(column="cluster", figsize=(25, 15), ax=ax[1], legend=True, cmap='Set1')
ax[0].set_title("Smallest catchments on top")
ax[1].set_title("Largest catchments on top")
plt.tight_layout()

# Add boundary of Denmark using only external edges
denmark_boundary = all_catchments_gdf.boundary
denmark_boundary.plot(ax=ax[0], color='black', linewidth=0.02)
denmark_boundary.plot(ax=ax[1], color='black', linewidth=0.02)
# x-cord limit
ax[0].set_xlim(left=440000, right=730000)
ax[0].set_ylim(top=6.4*1e6, bottom=6.05*1e6)
ax[1].set_xlim(left=440000, right=730000)
ax[1].set_ylim(top=6.4*1e6, bottom=6.05*1e6)
ax[0].tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
ax[1].tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

# plt.savefig(str(Path(cluster_file).parent / Path(cluster_file).stem) +'.png', dpi=300, bbox_inches='tight')
plt.show()

### Fall

In [None]:
cluster_file = r'C:\Users\kscar\Documents\GitHub\02807_Project\output\season_clustering\KMeans_clustering_season_3_seasonYear_2022.csv'
station_mapping = pd.read_csv(os.getenv('STATION_MAPPING'), dtype=str)
shape_file = Path(os.getenv('SHAPE_FILE'))

In [None]:
all_catchments_gdf = geopandas.read_file(shape_file)
print(all_catchments_gdf.shape)
print(all_catchments_gdf.columns)
all_catchments_gdf.head()

In [None]:
cluster_mapping = pd.read_csv(cluster_file, dtype=str)
cluster_mapping.columns = ['id', 'cluster']
cluster_mapping.head()

In [None]:
new_column = []
for catchment in cluster_mapping['id'].values:
    new_column.append(np.int64(station_mapping[station_mapping["obsstednr"] == catchment]["Id15_v30"].values[0]))

cluster_mapping['Id15_v30'] = new_column
cluster_mapping.drop(columns=['id'], inplace=True)
display(cluster_mapping.head())

In [None]:
# filter out gauged catchments
idxs = all_catchments_gdf['Id15_v30'].isin([np.int64(x) for x in cluster_mapping['Id15_v30'].values])
gauged_catchments_gdf = all_catchments_gdf[idxs]
ungauged_catchments_gdf = all_catchments_gdf[idxs == False]
print(gauged_catchments_gdf.shape)
print(ungauged_catchments_gdf.shape)

In [None]:
merged_gdf = gauged_catchments_gdf.merge(cluster_mapping, on='Id15_v30')
merged_gdf['area'] = merged_gdf.geometry.area
print(merged_gdf.shape)
merged_gdf.head()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(8, 5))
merged_gdf.sort_values(by="area", ascending=False).plot(column="cluster", figsize=(25, 15), ax=ax[0], legend=True, cmap='Set1')
merged_gdf.sort_values(by="area", ascending=True).plot(column="cluster", figsize=(25, 15), ax=ax[1], legend=True, cmap='Set1')
ax[0].set_title("Smallest catchments on top")
ax[1].set_title("Largest catchments on top")
plt.tight_layout()

# Add boundary of Denmark using only external edges
denmark_boundary = all_catchments_gdf.boundary
denmark_boundary.plot(ax=ax[0], color='black', linewidth=0.02)
denmark_boundary.plot(ax=ax[1], color='black', linewidth=0.02)
# x-cord limit
ax[0].set_xlim(left=440000, right=730000)
ax[0].set_ylim(top=6.4*1e6, bottom=6.05*1e6)
ax[1].set_xlim(left=440000, right=730000)
ax[1].set_ylim(top=6.4*1e6, bottom=6.05*1e6)
ax[0].tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
ax[1].tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

# plt.savefig(str(Path(cluster_file).parent / Path(cluster_file).stem) +'.png', dpi=300, bbox_inches='tight')
plt.show()

#### Winter

In [None]:
cluster_file = r'C:\Users\kscar\Documents\GitHub\02807_Project\output\season_clustering\KMeans_clustering_season_4_seasonYear_2022.csv'
station_mapping = pd.read_csv(os.getenv('STATION_MAPPING'), dtype=str)
shape_file = Path(os.getenv('SHAPE_FILE'))

In [None]:
all_catchments_gdf = geopandas.read_file(shape_file)
print(all_catchments_gdf.shape)
print(all_catchments_gdf.columns)
all_catchments_gdf.head()

In [None]:
cluster_mapping = pd.read_csv(cluster_file, dtype=str)
cluster_mapping.columns = ['id', 'cluster']
cluster_mapping.head()

In [None]:
new_column = []
for catchment in cluster_mapping['id'].values:
    new_column.append(np.int64(station_mapping[station_mapping["obsstednr"] == catchment]["Id15_v30"].values[0]))

cluster_mapping['Id15_v30'] = new_column
cluster_mapping.drop(columns=['id'], inplace=True)
display(cluster_mapping.head())

In [None]:
# filter out gauged catchments
idxs = all_catchments_gdf['Id15_v30'].isin([np.int64(x) for x in cluster_mapping['Id15_v30'].values])
gauged_catchments_gdf = all_catchments_gdf[idxs]
ungauged_catchments_gdf = all_catchments_gdf[idxs == False]
print(gauged_catchments_gdf.shape)
print(ungauged_catchments_gdf.shape)

In [None]:
merged_gdf = gauged_catchments_gdf.merge(cluster_mapping, on='Id15_v30')
merged_gdf['area'] = merged_gdf.geometry.area
print(merged_gdf.shape)
merged_gdf.head()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(8, 5))
merged_gdf.sort_values(by="area", ascending=False).plot(column="cluster", figsize=(25, 15), ax=ax[0], legend=True, cmap='Set1')
merged_gdf.sort_values(by="area", ascending=True).plot(column="cluster", figsize=(25, 15), ax=ax[1], legend=True, cmap='Set1')
ax[0].set_title("Smallest catchments on top")
ax[1].set_title("Largest catchments on top")
plt.tight_layout()

# Add boundary of Denmark using only external edges
denmark_boundary = all_catchments_gdf.boundary
denmark_boundary.plot(ax=ax[0], color='black', linewidth=0.02)
denmark_boundary.plot(ax=ax[1], color='black', linewidth=0.02)
# x-cord limit
ax[0].set_xlim(left=440000, right=730000)
ax[0].set_ylim(top=6.4*1e6, bottom=6.05*1e6)
ax[1].set_xlim(left=440000, right=730000)
ax[1].set_ylim(top=6.4*1e6, bottom=6.05*1e6)
ax[0].tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)
ax[1].tick_params(left=False, bottom=False, labelleft=False, labelbottom=False)

# plt.savefig(str(Path(cluster_file).parent / Path(cluster_file).stem) +'.png', dpi=300, bbox_inches='tight')
plt.show()