# Develop climate clusters for the study sites

In [None]:
import pandas as pd
import geopandas as gpd
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import xarray as xr
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import cdist
from sklearn.metrics import silhouette_score
import warnings
warnings.filterwarnings("ignore")

## Load ERA data and glacier boundaries for all sites

In [None]:
# Load ERA data
scm_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/'
era_fn = os.path.join(scm_path, 'compiled_data', 'all_era_data.csv')
era = pd.read_csv(era_fn)
# format dates as datetimes
era['Date'] = pd.to_datetime(era['Date'])
# Add year column
era['Year'] = era['Date'].dt.isocalendar().year

# Load AOIs
aois_fn = os.path.join(scm_path, 'compiled_data', 'all_aois.shp')
aois = gpd.read_file(aois_fn)
# Add centroid coordinate columns for plotting
aois['centroid_x'] = [x.centroid.coords.xy[0][0] for x in aois['geometry']]
aois['centroid_y'] = [x.centroid.coords.xy[1][0] for x in aois['geometry']]

## Calculate the mean of annual max. precipitation and temperature range

In [None]:
# Define output file name
mean_climate_fn = os.path.join(scm_path, 'compiled_data', 'mean_climate.csv')

# Check if file already exists in directory
if os.path.exists(mean_climate_fn):
    # Load from file
    mean_climate_df = pd.read_csv(mean_climate_fn)
    print('Mean climate conditions loaded from file.')

else:
    # Initialize dataframe for results
    mean_climate_df = pd.DataFrame()
    
    # Iterate over site_names
    for site_name in tqdm(era['site_name'].drop_duplicates().values):
        era_site = era.loc[era['site_name']==site_name]
        mean_max_precip = era_site.groupby(by='Year')['Cumulative_Precipitation_mwe'].max().mean()
        mean_max_temp_range = (era_site.groupby(by='Year')['Temperature_Celsius_Adjusted'].max() 
                               - era_site.groupby(by='Year')['Temperature_Celsius_Adjusted'].min()).mean()
        df = pd.DataFrame({'site_name': [site_name], 
                           'max_annual_precip_mean': [mean_max_precip],
                           'max_annual_temp_range_mean': [mean_max_temp_range]})
        mean_climate_df = pd.concat([mean_climate_df, df])
    mean_climate_df.reset_index(drop=True, inplace=True)

    # Save to file
    mean_climate_df.to_csv(mean_climate_fn, index=False)
    print('Mean climate conditions saved to file:', mean_climate_fn)

# Plot
fig, ax = plt.subplots()
ax.plot(mean_climate_df['max_annual_precip_mean'], mean_climate_df['max_annual_temp_range_mean'], '.')
ax.grid()
ax.set_xlabel('Mean annual $\Sigma$(Precipitation) [m.w.e.]')
ax.set_ylabel('Mean annual air temperature range [$^o$C]')
plt.show()

## Scale the data variables to range from 0 to 1

In [None]:
# identify columns to use for clustering
feature_cols = ['max_annual_precip_mean', 'max_annual_temp_range_mean']

# normalize columns
X = mean_climate_df[feature_cols]
X_norm = X.copy(deep=True)
X_norm[feature_cols] = MinMaxScaler().fit(X).transform(X)

# Plot
fig, ax = plt.subplots()
ax.plot(X_norm['max_annual_precip_mean'], X_norm['max_annual_temp_range_mean'], '.')
ax.grid()
ax.set_xlabel('Norm. mean annual $\Sigma$(Precipitation)')
ax.set_ylabel('Norm. mean annual air temperature range [$^o$C]')
plt.show()

## Estimate the optimal number of clusters

In [None]:
# Initialize variables
K = np.arange(2,11)

# Iterate over number of clusters
sil_coefs = []
for k in K:
    model = KMeans(n_clusters=k).fit(X_norm)
    labels = model.predict(X_norm)
    sil_coefs.append(silhouette_score(X_norm, labels))

# Plot the silhouette coefficients
fig, ax = plt.subplots(1, 1, figsize=(6,4))
ax.plot(K, sil_coefs, '-b')
ax.set_xlabel('Number of clusters')
ax.set_ylabel('Silhouette score')
ax.grid()
plt.show()


## Cluster the results

In [None]:
# Define colors for clusters
n = 5
color_palette = sns.color_palette("hls", n)

# Fit clusterer to data
clusterer = KMeans(n_clusters=n)
clusterer = clusterer.fit(X_norm[feature_cols])
X['y_pred'] = clusterer.predict(X_norm[feature_cols])

# Add to results df 
X['y_pred'] = X['y_pred'] + 1 # (add 1 so first cluster = 1)
mean_climate_df['cluster'] = X['y_pred']

# Add cluster number to AOIs
aois['cluster'] = ''
for site_name in tqdm(mean_climate_df['site_name'].drop_duplicates().values):
    aois.loc[aois['RGIId']==site_name, 'cluster'] = mean_climate_df.loc[mean_climate_df['site_name']==site_name, 'cluster'].values[0]
aois = aois.loc[aois['cluster']!='']
aois.sort_values(by='cluster', inplace=True)

# Plot
fig, ax = plt.subplots(1, 2, figsize=(12,6))
sns.scatterplot(data=X, x='max_annual_precip_mean', y='max_annual_temp_range_mean', hue='y_pred', 
                palette=color_palette, legend=False, ax=ax[0])
# ax[0].grid()
ax[0].set_xlabel('Mean annual $\Sigma$(Precipitation) [m.w.e.]')
ax[0].set_ylabel('Mean annual air temperature range [$^o$C]')
sns.scatterplot(data=aois, x='centroid_x', y='centroid_y', hue='cluster', 
                palette=color_palette, legend=True, ax=ax[1])
# ax[1].grid()
fig.tight_layout()
plt.show()

## Assign code names to each cluster based on climate

In [None]:
# Initialize columns for cluster name
mean_climate_df['clustName'] = ''
aois['clustName'] = ''

# Iterate over cluster names
for i in sorted(mean_climate_df['cluster'].drop_duplicates().values):
    # Subset dataframe
    mean_climate_cluster_df = mean_climate_df.loc[mean_climate_df['cluster']==i]
    # Calculate mean precip. and air temp. range
    precip_mean = np.nanmean(mean_climate_cluster_df['max_annual_precip_mean'])
    air_temp_range_mean = np.nanmean(mean_climate_cluster_df['max_annual_temp_range_mean'])
    # Determine cluster name
    if (precip_mean < 0.75) & (air_temp_range_mean < 30):
        cluster_name = 'W. Aleutians'
    elif (precip_mean < 0.75) & (air_temp_range_mean > 40):
        cluster_name = 'Continental'
    elif (precip_mean > 1.5) & (air_temp_range_mean > 25):
        cluster_name = 'Maritime'
    elif (precip_mean < 1.0) & (air_temp_range_mean > 30):
        cluster_name = 'Transitional-Continental'
    else:
        cluster_name = 'Transitional-Maritime'
    print(i, cluster_name)
    # Add to dataframes
    mean_climate_df.loc[mean_climate_df['cluster']==i, 'clustName'] = cluster_name
    aois.loc[aois['cluster']==i, 'clustName'] = cluster_name

# Save CSV and figure to file
mean_climate_fn = os.path.join(scm_path, 'compiled_data', 'climate_clusters.csv')
mean_climate_df.to_csv(mean_climate_fn, index=False)
print('Results saved to file:', mean_climate_fn)
fig_fn = os.path.join(scm_path, 'compiled_data', 'climate_clusters.png')
fig.savefig(fig_fn, dpi=250)
print('Figure saved to file:', fig_fn)

# Save AOIs with cluster variable
aois_cluster_fn = os.path.join(scm_path, 'compiled_data', 'all_aois_climate_cluster.shp')
aois['cluster'] = aois['cluster'].astype(str)
aois.to_file(aois_cluster_fn, index=False)
print('AOIs re-saved to file with climate climate cluster:', aois_cluster_fn)


## Apply Koppen-Geiger climate zone classification to sites 

### _Too broad, almost all sites in one category_

From Beck et al. (2023): https://doi.org/10.1038/s41597-023-02549-6

In [None]:
# -----Load Koppen-Geiger climate zones
kg_fn = os.path.join(scm_path, 'koppen_geiger_nc', '1991_2020', 'koppen_geiger_0p01.nc')
kg = xr.open_dataset(kg_fn)
kg = xr.where(kg==0, np.nan, kg) # set no data values to NaN

# -----Sample KG at each AOI
# add centroid column to aois
aois = aois.to_crs('EPSG:4326')
aois['centroid'] = [x.centroid for x in aois['geometry']]
aois['centroid_lon'] = [x.coords.xy[0][0] for x in aois['centroid']]
aois['centroid_lat'] = [x.coords.xy[1][0] for x in aois['centroid']]
# sample KG at each centroid point
aois['kg_class'] = [int(kg.sel(lon=lon, lat=lat, method='nearest').kg_class.data) 
                    for (lon, lat) in list(zip(aois['centroid_lon'].values, aois['centroid_lat'].values))]

# -----Add KG columns to AOIs 
kg_dict = {19: {'name': 'Dsc: Cold, dry summer, cold summer',
                'color': '#969696'},
           27: {'name': 'Dfc: Cold, no dry season, cold summer',
                'color': '#007d7d'},
           29: {'name': 'ET: Polar, tundra',
                'color': '#b2b2b2'},
           30: {'name': 'EF: Polar, frost',
                'color': '#666666'}
          }
aois[['kg_class_name', 'kg_class_color']] = '', ''
for kg_class in [19, 27, 29, 30]:
    aois.loc[aois['kg_class']==kg_class, 'kg_class_name'] = kg_dict[kg_class]['name']
    aois.loc[aois['kg_class']==kg_class, 'kg_class_color'] = kg_dict[kg_class]['color']
aois


In [None]:
# -----Plot
plt.rcParams.update({'font.sans-serif':'Arial', 'font.size':12})
fig, ax = plt.subplots(2, 1, figsize=(8, 10), gridspec_kw={'height_ratios':[3,1]})
# map plot
sns.scatterplot(ax=ax[0], data=aois, x='centroid_lon', y='centroid_lat', 
                hue='kg_class', s=10, palette='tab10',  legend=True)
ax[0].grid()
ax[0].set_xlabel('')
ax[0].set_ylabel('')
# histogram of counts per class
counts = aois['kg_class'].value_counts().sort_values()
ax[1].bar(counts.index, counts.values, width=1)
for i in range(0,len(counts)):
    ax[1].text(counts.index[i]-0.25, counts.values[i] + 2, str(counts.values[i]))
ax[1].set_xlabel('KG class')
ax[1].set_ylabel('Count')
ax[1].set_xticks(counts.index)
plt.show()