In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import geopandas
import warnings

from sklearn import preprocessing as prep
from scipy.cluster import hierarchy
from sklearn import metrics

import FINE.spagat.dataset as spd
from ipynb.fs.full import grouping_utils

## test dataset - (test_dataset2)

In [2]:
space = ['01_reg','02_reg','03_reg']
TimeStep = ['T0','T1']
space_2 = space.copy()
component = ['c1','c2','c3','c4']
Period = [0]

demand = np.stack([[[[np.nan,np.nan, np.nan] for i in range(2)]],
                        [[[1, 0.9,  2],
                          [1, 0,  0.9]]],
                        [[[np.nan,np.nan, np.nan] for i in range(2)]],
                        [[[0,   1, 1],
                          [0.3, 2, 1]]]])
demand = xr.DataArray(demand, coords=[component, Period, TimeStep, space], dims=['component', 'Period', 'TimeStep','space'])
cap_1d = np.stack([[0.9,  1,  0.9],
                        [0,    0,  0],
                        [0.9,  1,  0.9],
                        [np.nan] *3])
cap_1d = xr.DataArray(cap_1d, coords=[component,space], dims=['component','space'])
dist_2d = np.stack([[[0,1,2],[1,0,10],[2,10,0]],
                         [[0,0.1,0.2],[0.1,0,1],[0.2,1,0]],
                         [[np.nan] * 3 for i in range(3)],
                         [[np.nan] * 3 for i in range(3)]])
dist_2d = xr.DataArray(dist_2d, coords=[component,space,space_2], dims=['component','space','space_2'])

ds = xr.Dataset({'operationFixRate': demand, '1d_capacity': cap_1d, '2d_distance': dist_2d})

sds = spd.SpagatDataset()
sds.xr_dataset = ds

sds.xr_dataset

In [3]:
# Obtain the data dictionaries for three var categories after preprocessing
dict_ts, dict_1d, dict_2d = grouping_utils.preprocessDataset(sds, handle_mode='toDissimilarity')

In [4]:
dict_ts

{'operationFixRate': array([[0.5 , 0.5 , 0.  , 0.15],
        [0.45, 0.  , 0.5 , 1.  ],
        [1.  , 0.45, 0.5 , 0.5 ]])}

In [5]:
dict_1d

{'1d_capacity': array([[0., 0., 0.],
        [1., 0., 1.],
        [0., 0., 0.]])}

In [6]:
dict_2d

{'2d_distance': {0: array([0.9, 0.8, 0. ]), 1: array([0.9, 0.8, 0. ])}}

In [7]:
# Original region list
dimension_description='space' 
regions_list = sds.xr_dataset[dimension_description].values
n_regions = len(regions_list)



In [8]:
# Apply clustering methods based on the Custom Distance Function
squared_dist_matrix = grouping_utils.selfDistanceMatrix(dict_ts, dict_1d, dict_2d, n_regions)

In [9]:
squared_dist_matrix

array([[0.   , 4.845, 1.905],
       [4.845, 0.   , 2.755],
       [1.905, 2.755, 0.   ]])

# selfDistanceMatrix()

In [10]:
ds_ts = dict_ts
ds_1d = dict_1d
ds_2d = dict_2d
n_regions = n_regions
var_weightings=None

In [11]:
distMatrix = np.zeros((n_regions,n_regions))
distMatrix

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [12]:
for i in range(n_regions):
    for j in range(i+1,n_regions):
        print(f'regions are {i} and {j}')
        distMatrix[i,j] = grouping_utils.selfDistance(ds_ts,ds_1d, ds_2d, n_regions, i,j, var_weightings=var_weightings)
        print(distMatrix[i,j])
        print('-----------------------------------------------------------------------------------------')
print(distMatrix)
print('-----------------------------------------------------------------------------------------')
distMatrix += distMatrix.T - np.diag(distMatrix.diagonal())

regions are 0 and 1
4.845
-----------------------------------------------------------------------------------------
regions are 0 and 2
1.9050000000000002
-----------------------------------------------------------------------------------------
regions are 1 and 2
2.755
-----------------------------------------------------------------------------------------
[[0.    4.845 1.905]
 [0.    0.    2.755]
 [0.    0.    0.   ]]
-----------------------------------------------------------------------------------------


In [13]:
distMatrix

array([[0.   , 4.845, 1.905],
       [4.845, 0.   , 2.755],
       [1.905, 2.755, 0.   ]])

# selfDistance()

In [14]:
ds_ts = dict_ts
ds_1d = dict_1d
ds_2d = dict_2d
n_regions = n_regions
a = 0
b = 1
var_weightings=None
part_weightings=None

#### STEP 1. If weights for variables are not passed (via var_weightings) then assign default 1 to each 

In [15]:
# Weighting factors of each variable 
if var_weightings:                       
    var_weightings = var_weightings
else:                                    #TODO: Skip the if statement and "if not var_weightings" here
    vars_list = list(ds_ts.keys()) + list(ds_1d.keys()) + list(ds_2d.keys())
    var_weightings = dict.fromkeys(vars_list,1)

#### STEP 2. If weights for the 3 variable categories are not passed (via part_weightings) then assign default 1 to each

In [16]:

if part_weightings:
    part_weightings = part_weightings
else:                              #TODO: similar to the above change
    part_weightings = [1,1,1]

#### STEP 3. Find distance for each variable category separately 

#### STEP 3a. Distance of Time Series Part

In [17]:
distance_ts = 0

In [14]:
for var, var_matr in ds_ts.items():
#     print(var)
#     print(var_matr)
#     print('---------------------------------------------------------------------')
    var_weight_factor = var_weightings[var]
    # Vectors for the two data points (regions), each feature refers to [one valid component & one timestep] for this var
    reg_a = var_matr[a]
    reg_b = var_matr[b]
    print(reg_a)
    print(reg_b)
    print('---------------------------------------------------------------------')
    print(reg_a - reg_b)
    print('---------------------------------------------------------------------')
    print(np.power((reg_a - reg_b),2))
    print('---------------------------------------------------------------------')
    print(sum(np.power((reg_a - reg_b),2)))
    print('---------------------------------------------------------------------')
    distance_ts += sum(np.power((reg_a - reg_b),2)) * var_weight_factor
    print(distance_ts)

[0.5  0.5  0.   0.15]
[0.45 0.   0.5  1.  ]
---------------------------------------------------------------------
[ 0.05  0.5  -0.5  -0.85]
---------------------------------------------------------------------
[0.0025 0.25   0.25   0.7225]
---------------------------------------------------------------------
1.2249999999999999
---------------------------------------------------------------------
1.2249999999999999


In [15]:
distance_ts

1.2249999999999999

#### STEP 3b. Distance of 1d Variables Part

In [None]:
distance_1d = 0

In [None]:
for var, var_matr in ds_1d.items():
    print(var)
    print(var_matr)
    print('---------------------------------------------------------------------')
    var_weight_factor = var_weightings[var]

    # Vectors for the two data points (regions), each feature refers to one valid component for this var
    reg_a = var_matr[a]
    reg_b = var_matr[b]
    print(reg_a)
    print(reg_b)
    print('---------------------------------------------------------------------')
    # dist_1d(a,b) = sum_var{var_weight * sum_c( [value_var_c(a) - value_var_c(b)]^2 ) }
    distance_1d += sum(np.power((reg_a - reg_b),2)) * var_weight_factor

#### STEP 3c. Distance of 2d Variables Part

In [None]:
dict_2d

In [None]:
distance_2d = 0

In [None]:
# The index of corresponding value for region[a] and region[b] in the distance vectors
index_regA_regB = a * (n_regions - a) + (b - a) -1
index_regA_regB

In [None]:
for var, var_dict in ds_2d.items():
   

    var_weight_factor = var_weightings[var]
    for component, data in var_dict.items():
        # Find the corresponding distance value for region_a and region_b 
        value_var_c = data[index_regA_regB]
        print(value_var_c)
        print('--------------------------------------------------------------')
        if not np.isnan(value_var_c):
            
            distance_2d += (value_var_c*value_var_c) * var_weight_factor



#### STEP 4. Add all three distances part_weightings of each category

In [None]:
distance_ts * part_weightings[0] + distance_1d * part_weightings[1] + distance_2d * part_weightings[2]