In [9]:
import pathlib
import numpy as np
import pytest
import xarray as xr 

import FINE.spagat.dataset as spd
import FINE.spagat.representation as spr
import FINE.spagat.grouping as spg

## dataset - test_dataset1

In [3]:
space = ['01_reg','02_reg','03_reg']
timestep = ['T0','T1']
space_2 = space.copy()

opFix = xr.DataArray(np.array([[1,1],
                                    [0.9,1],
                                    [2,2]]), coords=[space, timestep], dims=['space', 'TimeStep'])
cap_1d = xr.DataArray(np.array([0.9,
                                       1,
                                       0.9]), coords=[space], dims=['space'])
dist_2d = xr.DataArray(np.array([[0,1,2],
                                      [1,0,10],
                                      [2,10,0]]), coords=[space,space_2], dims=['space','space_2'])

ds = xr.Dataset({'operationFixRate': opFix,'1d_capacity': cap_1d,'2d_distance': dist_2d})

ds

In [5]:
sds = spd.SpagatDataset()
sds.xr_dataset = ds

test_dataset1 = sds
test_dataset1

<FINE.spagat.dataset.SpagatDataset at 0x120543776c8>

## dataset - test_dataset2

In [6]:
space = ['01_reg','02_reg','03_reg']
TimeStep = ['T0','T1']
space_2 = space.copy()
component = ['c1','c2','c3','c4']
Period = [0]

demand = np.stack([[[[np.nan,np.nan, np.nan] for i in range(2)]],
                        [[[1, 0.9,  2],
                          [1, 0,  0.9]]],
                        [[[np.nan,np.nan, np.nan] for i in range(2)]],
                        [[[0,   1, 1],
                          [0.3, 2, 1]]]])
demand = xr.DataArray(demand, coords=[component, Period, TimeStep, space], dims=['component', 'Period', 'TimeStep','space'])
cap_1d = np.stack([[0.9,  1,  0.9],
                        [0,    0,  0],
                        [0.9,  1,  0.9],
                        [np.nan] *3])
cap_1d = xr.DataArray(cap_1d, coords=[component,space], dims=['component','space'])
dist_2d = np.stack([[[0,1,2],[1,0,10],[2,10,0]],
                         [[0,0.1,0.2],[0.1,0,1],[0.2,1,0]],
                         [[np.nan] * 3 for i in range(3)],
                         [[np.nan] * 3 for i in range(3)]])
dist_2d = xr.DataArray(dist_2d, coords=[component,space,space_2], dims=['component','space','space_2'])

ds = xr.Dataset({'operationFixRate': demand, '1d_capacity': cap_1d, '2d_distance': dist_2d})

ds

In [7]:
sds = spd.SpagatDataset()
sds.xr_dataset = ds

test_dataset2 = sds

## test_all_variable_based_clustering_hierarchical()

In [10]:
clustered_regions1 = spg.all_variable_based_clustering(test_dataset2,agg_mode='hierarchical2')

The cophenetic correlation coefficient of the hiearchical clustering is  0.7231402390092978
Inconsistencies: [0.0, 0.7071067811865472]
Silhouette scores:  [0.30511369702145824]
elapsed time for all_variable_based_clustering: 0.00 minutes


In [11]:
len(clustered_regions1) #== 3

3

In [12]:
clustered_regions1.get(3) #== {'01_reg': ['01_reg'], '02_reg': ['02_reg'], '03_reg': ['03_reg']}
     

{'01_reg': ['01_reg'], '02_reg': ['02_reg'], '03_reg': ['03_reg']}

In [13]:
dict2 = clustered_regions1.get(2)
dict2

{'01_reg_03_reg': ['01_reg', '03_reg'], '02_reg': ['02_reg']}

In [15]:
for sup_reg in dict2:
    print(f'{sup_reg}, {len(sup_reg)}')

01_reg_03_reg, 13
02_reg, 6


In [None]:
for sup_reg in dict2:
     if len(sup_reg) == 2:
          assert sorted(sup_reg) == ['01_reg', '03_reg']
     if len(sup_reg) == 1:
          assert sorted(sup_reg) == ['02_reg']

In [16]:
dict1 = clustered_regions1.get(1)
dict1

{'01_reg_02_reg_03_reg': ['01_reg', '02_reg', '03_reg']}

In [17]:
for sup_reg in dict1:
     if len(sup_reg) == 3:
            print(sup_reg)
          # sorted(sup_reg) == ['01_reg', '02_reg', '03_reg']

## test_all_variable_based_clustering_spectral()

In [18]:
clustered_regions1 = spg.all_variable_based_clustering(test_dataset2,agg_mode='spectral',weighting=[10,1,1])

Modularities [0.16692023623072105, 0.06451653722956459]
Silhouette scores:  [0.30511369702145824]
elapsed time for all_variable_based_clustering: 0.01 minutes


In [19]:
len(clustered_regions1) #== 3

3

In [20]:
dict1_2 = clustered_regions1.get(2)
dict1_2

{'02_reg': ['02_reg'], '01_reg_03_reg': ['01_reg', '03_reg']}

In [24]:
for sup_region in dict1_2.values():
     if len(sup_region) == 2:
          print(sorted(sup_region)) #==  ['01_reg','03_reg']

['01_reg', '03_reg']


In [25]:
clustered_regions2 = spg.all_variable_based_clustering(test_dataset2,agg_mode='spectral',weighting=[1,1,10])

Modularities [0.17543875915620316, 0.142189364184016]
Silhouette scores:  [0.04094753448868963]
elapsed time for all_variable_based_clustering: 0.00 minutes


In [26]:
len(clustered_regions2) == 3

True

In [27]:
dict2_2 = clustered_regions2.get(2)
dict2_2

{'02_reg_03_reg': ['02_reg', '03_reg'], '01_reg': ['01_reg']}

In [28]:
for sup_region in dict2_2.values():
     if len(sup_region) == 2:
        print(sorted(sup_region)) ## ==  ['02_reg','03_reg']

['02_reg', '03_reg']


In [29]:
clustered_regions3 = spg.all_variable_based_clustering(test_dataset2,agg_mode='spectral2')

Modularites:  [0.16697313202259687, 0.06715382977433099]
Silhouette scores:  [0.30511369702145824]
elapsed time for all_variable_based_clustering: 0.00 minutes


In [30]:
len(clustered_regions3) #== 3

3

In [31]:
dict3_2 = clustered_regions3.get(2)
dict3_2

{'01_reg_03_reg': ['01_reg', '03_reg'], '02_reg': ['02_reg']}

In [32]:
for sup_region in dict3_2.values():
    if len(sup_region) == 2:
        print(sorted(sup_region)) #==  ['01_reg','03_reg']

['01_reg', '03_reg']
