In [1]:
import logging
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
from matplotlib import pyplot as plt

# Using SciPy.cluster for clustering
from scipy.cluster import hierarchy
from scipy.cluster import vq

# Using Scikit Learn for clustering
#from sklearn.cluster import KMeans
import sklearn.cluster as skc
from sklearn import metrics

import FINE.spagat.utils as spu
import FINE.spagat.dataset as spd

import FINE.spagat.grouping_utils as gu  
from ipynb.fs.full import grouping_utils 

## test dataset - (test_dataset2)

In [2]:
space = ['01_reg','02_reg','03_reg']
TimeStep = ['T0','T1']
space_2 = space.copy()
component = ['c1','c2','c3','c4']
Period = [0]

demand = np.stack([[[[np.nan,np.nan, np.nan] for i in range(2)]],
                        [[[1, 0.9,  2],
                          [1, 0,  0.9]]],
                        [[[np.nan,np.nan, np.nan] for i in range(2)]],
                        [[[0,   1, 1],
                          [0.3, 2, 1]]]])
demand = xr.DataArray(demand, coords=[component, Period, TimeStep, space], dims=['component', 'Period', 'TimeStep','space'])
cap_1d = np.stack([[0.9,  1,  0.9],
                        [0,    0,  0],
                        [0.9,  1,  0.9],
                        [np.nan] *3])
cap_1d = xr.DataArray(cap_1d, coords=[component,space], dims=['component','space'])
dist_2d = np.stack([[[0,1,2],[1,0,10],[2,10,0]],
                         [[0,0.1,0.2],[0.1,0,1],[0.2,1,0]],
                         [[np.nan] * 3 for i in range(3)],
                         [[np.nan] * 3 for i in range(3)]])
dist_2d = xr.DataArray(dist_2d, coords=[component,space,space_2], dims=['component','space','space_2'])

ds = xr.Dataset({'operationFixRate': demand, '1d_capacity': cap_1d, '2d_distance': dist_2d})

sds = spd.SpagatDataset()
sds.xr_dataset = ds

sds.xr_dataset

# all_variable_based_clustering()

In [3]:
verbose=False
ax_illustration=None 
save_fig='test' 
dimension_description='space' 
weighting=None

In [4]:
# Original region list
regions_list = sds.xr_dataset[dimension_description].values
n_regions = len(regions_list)

In [5]:
aggregation_dict = {}
aggregation_dict[n_regions] = {region_id: [region_id] for region_id in regions_list}
aggregation_dict

{3: {'01_reg': ['01_reg'], '02_reg': ['02_reg'], '03_reg': ['03_reg']}}

## Option 1.  agg_mode = 'hierarchical' 
#### Clustering method via SciPy.cluster module

In [None]:
agg_mode = 'hierarchical' 

In [None]:
#sds
handle_mode = 'toDissimilarity'
vars='all'
dims='all'
var_weightings=None

#### STEP 1.  Preprocess the whole dataset (grouping_utils - preprocessDataset())

In [None]:
# Obtain the data dictionaries for three var categories after preprocessing
dict_ts, dict_1d, dict_2d = gu.preprocessDataset(sds, handle_mode='toDissimilarity')

#### STEP 2.  Calculate the overall distance between each region pair (uses custom distance)

In [None]:
# Apply clustering methods based on the Custom Distance Function
squared_dist_matrix = gu.selfDistanceMatrix(dict_ts, dict_1d, dict_2d, n_regions)
squared_dist_matrix

#### STEP 3. Clustering

#### STEP 3a.  Hierarchical clustering with average linkage

In [None]:
distance_matrix = hierarchy.distance.squareform(squared_dist_matrix)
Z = hierarchy.linkage(distance_matrix, method='average')
Z

In [None]:
print('The cophenetic correlation coefficient of the hiearchical clustering is ', hierarchy.cophenet(Z, distance_matrix)[0])
 

#### STEP 3b.  Figure for inconsistency check 

In [None]:
fig, ax = plt.subplots(figsize=(18,7))
inconsistency = hierarchy.inconsistent(Z)
ax.plot(range(1,len(Z)+1),list(inconsistency[:,3]),'go-')
ax.set_title('Inconsistency of each Link with the Links Below', fontsize=14)
ax.set_xlabel('Number of disjoint clusters under this link', fontsize=12)
ax.set_ylabel('Inconsistency Coefficients', fontsize=12)

plt.xticks(range(1,len(Z)+1), np.arange(len(Z)+1,1, -1))
plt.show()

#### STEP 3c.  If specified, figure for resulting dendrogram

In [None]:

if ax_illustration is not None:
    R = hierarchy.dendrogram(Z, 
                          orientation="top",
                            labels=sds.xr_dataset[dimension_description].values, 
                            ax=ax_illustration, 
                            leaf_font_size=14
                            )

    if save_fig is not None:
        spu.plt_savefig(save_name=save_fig)

elif save_fig is not None:
    fig, ax = spu.plt.subplots(figsize=(25, 12))

    R = hierarchy.dendrogram(Z, 
                            orientation="top",
                            labels=sds.xr_dataset[dimension_description].values, 
                            ax=ax, 
                            leaf_font_size=14
                            )

    spu.plt_savefig(fig=fig, save_name=save_fig)
 

#### STEP 4.  find the sub_to_sup_region_id_dict for every level in the hierarchy

In [None]:
# regions_dict to record the newest region set after each merging step, regions_dict_complete for all regions appearing during clustering
regions_dict = {region_id: [region_id] for region_id in regions_list}
regions_dict

In [None]:
regions_dict_complete = regions_dict.copy()
regions_dict_complete

In [None]:
Z

In [None]:
range(len(Z))

In [None]:
int(Z[0][1])

In [None]:
# Identify, which regions are merged together (new_merged_region_id_list)
for i in range(len(Z)):
    print(i) 
    print('-------------------------------------------------------------------')
    # identify the keys of the sub regions that will be merged
    key_list = list(regions_dict_complete.keys())
    key_1 = key_list[int(Z[i][0])]
    key_2 = key_list[int(Z[i][1])]
    print(key_1)
    print(key_2)
    print('------------------------------------------------------------------')
    # get the region_id_list_s of the sub regions
    value_list = list(regions_dict_complete.values())
    sub_region_id_list_1 = value_list[int(Z[i][0])]
    sub_region_id_list_2 = value_list[int(Z[i][1])]
    print(sub_region_id_list_1)
    print(sub_region_id_list_2)
    print('------------------------------------------------------------------')
    # add the new region to the dict by merging the two region_id_lists
    sup_region_id = f'{key_1}_{key_2}'
    sup_region_id_list = sub_region_id_list_1.copy()
    sup_region_id_list.extend(sub_region_id_list_2)
    print(sub_region_id_list_2)
    print('------------------------------------------------------------------')
    regions_dict_complete[sup_region_id] = sup_region_id_list
    regions_dict[sup_region_id] = sup_region_id_list
    del regions_dict[key_1]
    del regions_dict[key_2]
    
    if verbose:
        print(i)
        print('\t', 'keys:', key_1, key_2)
        print('\t', 'list_1', sub_region_id_list_1)
        print('\t', 'list_2', sub_region_id_list_2)
        print('\t', 'sup_region_id', sup_region_id)
        print('\t', 'sup_region_id_list', sup_region_id_list)

    aggregation_dict[n_regions - i - 1] = regions_dict.copy()
    


#### STEP 5.  Get Silhouette Coefficient scores

In [None]:
# Silhouette Coefficient scores
silhouette_scores = gu.computeSilhouetteCoefficient(list(regions_list), squared_dist_matrix, aggregation_dict)
print(silhouette_scores)

# computeSilhouetteCoefficient()

In [None]:
regions_list = list(regions_list)
distanceMatrix = squared_dist_matrix
aggregation_dict = aggregation_dict

In [None]:
n_regions = len(regions_list)

In [None]:
# Silhouette Coefficient scores
scores = [0 for i in range(1, n_regions-1)]
scores

In [None]:
# Labels for each region object
labels = [0 for i in range(n_regions)]
labels

In [None]:
aggregation_dict

In [None]:
regions_list

#### STEP 1. Obtain Silhouette scores for all intermediate levels in the hierarchy

In [None]:
for k, regions_dict in aggregation_dict.items():

#     if k == 1 or k == n_regions:
#         continue

    # Obtain labels list for this clustering results
    label = 0
    for sup_region in regions_dict.values():
        #### STEP 1.
        print(sup_region)
        print('-----------------------------------------')
        for reg in sup_region:
            print(reg)
            print('-----------------------------------------')
            ind = regions_list.index(reg)  
            print(ind)
            print('-----------------------------------------')
            labels[ind] = label
            
        label += 1
    print(label)
    print(labels)
    print('--------------------------------------------------')
    # Silhouette score of this clustering
    s = metrics.silhouette_score(distanceMatrix, labels, metric='precomputed')
    scores[k-2] = s



In [None]:
scores

# END - computeSilhouetteCoefficient()

In [None]:
aggregation_dict

## Option 2.  agg_mode = 'hierarchical2'

#### Clustering method via Scikit Learn module'''

In [None]:
agg_mode = 'hierarchical2' 

#### STEP 1.  Preprocess the whole dataset (grouping_utils - preprocessDataset())

In [None]:
# Obtain the data dictionaries for three var categories after preprocessing
ds_ts, ds_1d, ds_2d = gu.preprocessDataset(sds, handle_mode='toDissimilarity')

#### STEP 2.  Calculate the overall distance between each region pair (uses custom distance)

In [None]:
# Precompute the distance matrix according to the Custom Distance Function
squared_distMatrix = gu.selfDistanceMatrix(ds_ts, ds_1d, ds_2d, n_regions)

# generateConnectivityMatrix()

In [None]:
ds_extracted = sds.xr_dataset

In [None]:
ds_extracted

#### STEP 1. obtain a dict of all 2d components 

In [None]:
vars_2d = {}

In [None]:
for varname, da in ds_extracted.data_vars.items():
    if da.dims == ('component','space','space_2'):
        vars_2d[varname] = da

In [None]:
vars_2d

In [None]:
n_regions = len(ds_extracted['space'].values)
component_list = list(ds_extracted['component'].values)
n_components = len(component_list)

#### STEP 2. Preprocess the variables ( calls preprocess2dVariables()  )

In [None]:
# Square matrices for each 2d variable and each valid component
ds_2d = grouping_utils.preprocess2dVariables(vars_2d, component_list, handle_mode='toAffinity')

In [None]:
ds_2d

#### STEP 3. If 'pipeline' is among the list of 2d components, only use that. Else use all components having 2d variables

In [None]:
# The neighboring information is based on the 2d vars with components related to pipeline
connect_components = []

In [None]:
for i in range(len(component_list)):
    if 'pipeline' in component_list[i].lower():
        connect_components.append(i)

In [None]:
connect_components

In [None]:
# If there is no components related to pipelines, then consider all existing components.
if not connect_components:
    connect_components = list(range(len(component_list)))

In [None]:
connect_components

In [None]:
adjacencyMatrix = np.zeros((n_regions,n_regions))
adjacencyMatrix

#### STEP 4. for each region pair, check if they are connected (calls checkConnectivity function)

In [None]:
# Check each index pair of regions to verify, if the two regions are connected to each other
for i in range(n_regions):
    for j in range(i+1,n_regions):
        if grouping_utils.checkConnectivity(i,j, ds_2d, connect_components):
            adjacencyMatrix[i,j] = 1

#### STEP 5. Convert the upper triangualr matrix into a symmetric matrix.

In [None]:
adjacencyMatrix

In [None]:
np.diag(adjacencyMatrix.diagonal())

In [None]:
adjacencyMatrix += adjacencyMatrix.T - np.diag(adjacencyMatrix.diagonal())

In [None]:
adjacencyMatrix

In [None]:
# Set the diagonal values as 1
np.fill_diagonal(adjacencyMatrix, 1)

In [None]:
adjacencyMatrix

# END - generateConnectivityMatrix()

# checkConnectivity()

In [None]:
connect_components

In [None]:
ds_2d

In [None]:
i = 1
j = 2

#### STEP. Returns True if the given region pair has any non-zero value, for each 2d variable and for each of it's component 

In [None]:
for var, var_dict in ds_2d.items():
    for c, data in var_dict.items():
        if (c in connect_components) and (data[i,j] != 0): 
            print('True')
            
print('False')

In [None]:
def checkConnectivity(i,j, ds_2d, connect_components):
    '''Check if region i is neighboring to region j, based on the components related to pipelines.
        - as 1 if there exists at least one non-zero value in any matrix at the position [i,j]
        - if no components related to pipelines, then the connect_components is the list of all existing components.
    '''
    
    for var, var_dict in ds_2d.items():
        for c, data in var_dict.items():
            if (c in connect_components) and (data[i,j] != 0):
                return True
            
    return False    

In [None]:
checkConnectivity(0,1, ds_2d, connect_components)

# END - checkConnectivity()

#### STEP 3.  Obtain a matrix where 1 means two regions are connected and 0 means not (any one of the component and any one of it's 2d variable has to have a positive value)

In [None]:
# Connectivity matrix for neighboring structure
connectMatrix = gu.generateConnectivityMatrix(sds)

In [None]:
connectMatrix

In [None]:
# Silhouette Coefficient scores
silhouette_scores = []

In [None]:
aggregation_dict

#### STEP 3. Clustering for every number of regions from 1 to one less than n_regions 

In [None]:
for i in range(1,n_regions):
    # Computing hierarchical clustering
    model = skc.AgglomerativeClustering(n_clusters=i,affinity='precomputed',linkage='average',connectivity=connectMatrix).fit(squared_distMatrix)
    regions_label_list = model.labels_

    # Silhouette Coefficient score for this clustering results
    if i != 1:
        s = metrics.silhouette_score(squared_distMatrix, regions_label_list, metric='precomputed')
        silhouette_scores.append(s)

    # Create a regions dictionary for the aggregated regions
    regions_dict = {}
    for label in range(i):
        # Group the regions of this regions label
        sup_region_list = list(regions_list[regions_label_list == label])
        sup_region_id = '_'.join(sup_region_list)
        regions_dict[sup_region_id] = sup_region_list.copy()
    print(regions_dict)
    print('===============================================================')
    aggregation_dict[i] = regions_dict.copy()
    print(aggregation_dict)
    print('===============================================================')

#### STEP 4. cophenetic correlation coefficient

In [None]:
# Plot the hierarchical tree dendrogram
clustering_tree = skc.AgglomerativeClustering(distance_threshold=0, 
                                              n_clusters=None, 
                                              affinity='precomputed', 
                                              linkage='average',
                                              connectivity=connectMatrix).fit(squared_distMatrix)
# Create the counts of samples under each node

In [None]:
clustering_tree

In [None]:
counts = np.zeros(clustering_tree.children_.shape[0])
n_samples = len(clustering_tree.labels_)
for i, merge in enumerate(clustering_tree.children_):
    current_count = 0
    for child_idx in merge:
        if child_idx < n_samples:
            current_count += 1  # leaf node
        else:
            current_count += counts[child_idx - n_samples]
    counts[i] = current_count

In [None]:
            
linkage_matrix = np.column_stack([clustering_tree.children_, clustering_tree.distances_, counts]).astype(float)   
# Plot the corresponding dendrogram
#hierarchy.dendrogram(linkage_matrix)

In [None]:
linkage_matrix

In [None]:
distance_matrix = hierarchy.distance.squareform(squared_distMatrix)
print('The cophenetic correlation coefficient of the hiearchical clustering is ', hierarchy.cophenet(linkage_matrix, distance_matrix)[0])
 

In [None]:
       
#fig, ax = plt.subplots(figsize=(18,7))
inconsistency = hierarchy.inconsistent(linkage_matrix)
print('Inconsistencies:',list(inconsistency[:,3]))
# ax.plot(range(1,len(linkage_matrix)+1),list(inconsistency[:,3]),'go-')
# ax.set_title('Inconsistency of each Link with the Links Below', fontsize=14)
# ax.set_xlabel('Number of disjoint clusters under this link', fontsize=12)
# ax.set_ylabel('Inconsistencies', fontsize=12)

# plt.xticks(range(1,len(linkage_matrix)+1), np.arange(len(linkage_matrix)+1,1, -1))
# plt.show()

#### STEP 5. Print a list of Silhouette scores (Only for the intermediate levels of merges (2 to one less than n_regions))

In [None]:
print('Silhouette scores: ',silhouette_scores)       

In [None]:
aggregation_dict

## Option 3.  agg_mode = 'spectral'

In [6]:
agg_mode = 'spectral'

#### STEP 1.  Preprocess the whole dataset (grouping_utils - preprocessDataset())

In [None]:
# Obtain the matrices for three var categories after preprocessing
feature_matrix_ts, feature_matrix_1d, adjacency_matrix_2d = gu.preprocessDataset(sds, handle_mode='toAffinity')

In [None]:
feature_matrix_ts

In [None]:
# List of weighting factors for 3 categories
if weighting:
    weighting = weighting
else:                          #TODO: reduce these lines to 1 line (IF not weighting)
    weighting = [1,1,1]

In [None]:
# Using RBF kernel to construct affinity matrix
delta = 1

#### STEP 2a. (i) Obtain distance matrix for time series variable set (used pdist, which in turn uses default euclidean distance)

In [None]:
hierarchy.distance.pdist(feature_matrix_ts)

In [None]:
##### Obtain affinity matrix for TimeSeries part via RBF kernel applied on distance matrix
distance_matrix_ts = hierarchy.distance.squareform(hierarchy.distance.pdist(feature_matrix_ts))

In [None]:
distance_matrix_ts

#### STEP 2a. (ii) Use RBF kernel to construct affinity matrix based on distance matrix of time series variable set

In [None]:
affinity_ts = np.exp(- distance_matrix_ts ** 2 / (2. * delta ** 2))

In [None]:
affinity_ts

#### STEP 2b. Obtain distance matrix for 1d variable set (used pdist, which in turn uses default euclidean distance)

In [None]:
##### Obtain affinity matrix for 1d-Vars part via RBF kernel applied on distance matrix
distance_matrix_1d = hierarchy.distance.squareform(hierarchy.distance.pdist(feature_matrix_1d))

#### STEP 2b. (ii) Use RBF kernel to construct affinity matrix based on distance matrix of 1d variable set

In [None]:
affinity_1d = np.exp(- distance_matrix_1d ** 2 / (2. * delta ** 2))

#### STEP 2c. (i) Obtain distance matrix for 2d variable set (used pdist, which in turn uses default euclidean distance)

In [None]:
adjacency_matrix_2d

In [None]:
#adjacency_matrix_2d is affinity matrix, convert it into distance matrix by taking it's reciprocal 
adjacency_2d_adverse = 1.0 / adjacency_matrix_2d
adjacency_2d_adverse

In [None]:
max_value = adjacency_2d_adverse[np.isfinite(adjacency_2d_adverse)].max()
max_value

In [None]:
adjacency_2d_adverse[np.isinf(adjacency_2d_adverse)] = max_value + 10
adjacency_2d_adverse

In [None]:
np.fill_diagonal(adjacency_2d_adverse,0)

In [None]:
adjacency_2d_adverse

#### STEP 2c. (ii) Use RBF kernel to construct affinity matrix based on distance matrix of 2d variable set

In [None]:
# Construct the affinity matrix by applying RBF on the dissimilarity matrix
affinity_2d = np.exp(- adjacency_2d_adverse ** 2 / (2. * delta ** 2))
affinity_2d

#### STEP 3. Compute a single affinity matrix

In [None]:
##### The precomputed affinity matrix for spectral clustering
affinity_matrix = (affinity_ts * weighting[0] + affinity_1d * weighting[1] + affinity_2d * weighting[2]) 
affinity_matrix

In [None]:
# ##### Solve the spatial contiguity problem with the connectivity condition
# # Connectivity matrix for neighboring structure
# connectMatrix = gu.generateConnectivityMatrix(sds)
# # Cut down the edges that have zero value in connectivity matrix
# affinity_matrix[connectMatrix==0] = 0

In [None]:
# Evaluation indicators
modularities = []

#### STEP 4. For 1 to one less than n regions: Perform the following sub steps 

In [None]:
for i in range(1,n_regions):
    #### STEP 4a. clustering
    model = skc.SpectralClustering(n_clusters=i,affinity='precomputed').fit(affinity_matrix)
    regions_label_list = model.labels_
    
    #### STEP 4b. compute modulatiy (calls computeModularity() )
    modularity = gu.computeModularity(affinity_matrix, regions_label_list)
    modularities.append(modularity)

    #### STEP 4c. form resulting sub_to_sup_region_id_dict 
    regions_dict = {}
    for label in range(i):
        # Group the regions of this regions label
        sup_region_list = list(regions_list[regions_label_list == label])
        sup_region_id = '_'.join(sup_region_list)
        regions_dict[sup_region_id] = sup_region_list.copy()

    aggregation_dict[i] = regions_dict.copy()
    
# Plotting the modularites according to increase of k values, check if there exists an inflection point
# fig, ax = spu.plt.subplots(figsize=(25, 12))
# ax.plot(range(1,n_regions),modularities,'go-')
# ax.set_title('Impact of aggregated regions on modularity')
# ax.set_xlabel('number of aggregated regions')
# ax.set_ylabel('Modularity')
# plt.show()

print('Modularities',modularities)

#### STEP 5. Obtain Silhouette scores

In [None]:
# Silhouette Coefficient scores
ds_ts, ds_1d, ds_2d = gu.preprocessDataset(sds, handle_mode='toDissimilarity')
distances = gu.selfDistanceMatrix(ds_ts, ds_1d, ds_2d, n_regions)
silhouette_scores = gu.computeSilhouetteCoefficient(list(regions_list), distances, aggregation_dict)
print('Silhouette scores: ',silhouette_scores)

In [None]:
aggregation_dict

# computeModularity()

In [None]:
adjacency = affinity_matrix
#rions_label_list
adjacency

#### STEP 1. set affinity of same region pairs (diagonal elements) to 0 in adjacency matrix

In [None]:
np.fill_diagonal(adjacency, 0)  

In [None]:

n_regions = len(regions_label_list)

In [None]:
# Values in the adjacency matrix as edge weights
edge_weights_sum = np.sum(adjacency)

In [None]:
modularity = 0

#### STEP 2. For every region pair calculate the modularity and add it all up

In [None]:
for v in range(n_regions):
    for w in range(v+1, n_regions):

        # The weighted degree of nodes: sum of node's incident edge weights
        d_v = np.sum(adjacency[v])
        d_w = np.sum(adjacency[w])
        # If the two nodes belong to the same cluster
        delta = 1 if regions_label_list[v] == regions_label_list[w] else 0

        # Sum up the actual fraction of the edges minus the expected fraction of edges inside of each cluster
        modularity += (adjacency[v,w] - (d_v * d_w) / (2 * edge_weights_sum)) * delta




#### STEP 3. Modify the obtained modularity further

In [None]:
modularity = modularity / (2 * edge_weights_sum)

In [None]:
modularity

# END = computeModularity() 

## agg_mode ='spectral2' 

#### Affinity matrix: construct a distance matrix based on selfDistanceMatrix function, transform it to similarity matrix


In [7]:
agg_mode ='spectral2' 

#### STEP 1.  Preprocess the whole dataset (grouping_utils - preprocessDataset())

In [8]:
# Obtain the data dictionaries for three var categories after preprocessing
ds_ts, ds_1d, ds_2d = gu.preprocessDataset(sds, handle_mode='toDissimilarity')

In [9]:
ds_2d

{'2d_distance': {0: array([0.9, 0.8, 0. ]), 1: array([0.9, 0.8, 0. ])}}

#### STEP 2.  Calculate the overall distance between each region pair (uses custom distance)

In [10]:
# Precompute the distance matrix according to the Custom Distance Function
distMatrix = gu.selfDistanceMatrix(ds_ts, ds_1d, ds_2d, n_regions)
distMatrix

array([[0.   , 4.845, 1.905],
       [4.845, 0.   , 2.755],
       [1.905, 2.755, 0.   ]])

#### STEP 3. Scale the distance matrix between 0 and 1

In [11]:
# Rescaling the matrix in order to generate valid affinity_matrix
distMatrix = gu.matrix_MinMaxScaler(distMatrix)
distMatrix

array([[0.        , 1.        , 0.39318885],
       [1.        , 0.        , 0.56862745],
       [0.39318885, 0.56862745, 0.        ]])

#### STEP 4. Use RBF kernel to construct affinity matrix based on distance matrix

In [12]:
# Obtain affinity matrix for part_1 via RBF kernel applied on distance matrix
delta = 1
affinity_matrix = np.exp(- distMatrix ** 2 / (2. * delta ** 2))
affinity_matrix

array([[1.        , 0.60653066, 0.9256133 ],
       [0.60653066, 1.        , 0.8507231 ],
       [0.9256133 , 0.8507231 , 1.        ]])

In [13]:
# # Connectivity matrix for neighboring structure
# connectMatrix = gu.generateConnectivityMatrix(sds)
# # Cut down the edges that have zero value in connectivity matrix
# affinity_matrix[connectMatrix==0] = 0

# Evaluation indicators
modularities = []

In [14]:
# Silhouette Coefficient scores
silhouette_scores = []

#### STEP 5. For 1 to one less than n regions: Perform the following sub steps 

In [15]:
for i in range(1,n_regions):
    #### STEP 5a. clustering
    # Perform the spectral clustering with the precomputed affinity matrix (adjacency matrix)
    model = skc.SpectralClustering(n_clusters=i,affinity='precomputed').fit(affinity_matrix)
    regions_label_list = model.labels_
    
    #### STEP 5b. compute modulatiy (calls computeModularity() )
    # Compute the modularity for evaluation, using affinity matrix as adjacency matrix of a graph
    modularity = gu.computeModularity(affinity_matrix, regions_label_list)
    modularities.append(modularity)
    
    #### STEP 5c. Obtain Silhouette Coefficient score (skip for n_region=1 as this score can be computed only n_regions = 2 : n-1 regions)
    if i != 1:
        s = metrics.silhouette_score(distMatrix, regions_label_list, metric='precomputed')
        silhouette_scores.append(s)
    
    #### STEP 5d. form resulting sub_to_sup_region_id_dict 
    # Create a regions dictionary for the aggregated regions
    regions_dict = {}
    for label in range(i):
        # Group the regions of this regions label
        sup_region_list = list(regions_list[regions_label_list == label])
        sup_region_id = '_'.join(sup_region_list)
        regions_dict[sup_region_id] = sup_region_list.copy()

    aggregation_dict[i] = regions_dict.copy()

# Plotting the modularites according to increase of k values, check if there exists an inflection point
# fig, ax = spu.plt.subplots(figsize=(25, 12))
# ax.plot(range(1,n_regions),modularities,'go-')
# ax.set_title('Impact of aggregated regions on modularity')
# ax.set_xlabel('number of aggregated regions')
# ax.set_ylabel('Modularity')

In [16]:
       
print('Modularites: ',modularities)

Modularites:  [0.16697313202259687, 0.06715382977433099]


In [17]:
print('Silhouette scores: ',silhouette_scores)

Silhouette scores:  [0.30511369702145824]


In [18]:
aggregation_dict

{3: {'01_reg': ['01_reg'], '02_reg': ['02_reg'], '03_reg': ['03_reg']},
 1: {'01_reg_02_reg_03_reg': ['01_reg', '02_reg', '03_reg']},
 2: {'01_reg_03_reg': ['01_reg', '03_reg'], '02_reg': ['02_reg']}}