In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import geopandas
import warnings

from sklearn import preprocessing as prep
from scipy.cluster import hierarchy
from sklearn import metrics

import FINE.spagat.dataset as spd
from ipynb.fs.full import grouping_utils

## test dataset - (test_dataset2)

In [2]:
space = ['01_reg','02_reg','03_reg']
TimeStep = ['T0','T1']
space_2 = space.copy()
component = ['c1','c2','c3','c4']
Period = [0]

demand = np.stack([[[[np.nan,np.nan, np.nan] for i in range(2)]],
                        [[[1, 0.9,  2],
                          [1, 0,  0.9]]],
                        [[[np.nan,np.nan, np.nan] for i in range(2)]],
                        [[[0,   1, 1],
                          [0.3, 2, 1]]]])
demand = xr.DataArray(demand, coords=[component, Period, TimeStep, space], dims=['component', 'Period', 'TimeStep','space'])
cap_1d = np.stack([[0.9,  1,  0.9],
                        [0,    0,  0],
                        [0.9,  1,  0.9],
                        [np.nan] *3])
cap_1d = xr.DataArray(cap_1d, coords=[component,space], dims=['component','space'])
dist_2d = np.stack([[[0,1,2],[1,0,10],[2,10,0]],
                         [[0,0.1,0.2],[0.1,0,1],[0.2,1,0]],
                         [[np.nan] * 3 for i in range(3)],
                         [[np.nan] * 3 for i in range(3)]])
dist_2d = xr.DataArray(dist_2d, coords=[component,space,space_2], dims=['component','space','space_2'])

ds = xr.Dataset({'operationFixRate': demand, '1d_capacity': cap_1d, '2d_distance': dist_2d})

sds = spd.SpagatDataset()
sds.xr_dataset = ds

sds.xr_dataset

In [3]:
#sds
#handle_mode
vars='all'
dims='all'
var_weightings=None

In [4]:
dataset = sds.xr_dataset

#### STEP 1. Traverse all variables in the dataset, and put them in separate categories

In [5]:
vars_ts = {}
vars_1d = {}
vars_2d = {}

In [6]:
# for varname, da in dataset.data_vars.items():
#     #print(f'variable is {varname}')
#     #print(f'corresponding data is {da}')
    
#     if sorted(da.dims) == sorted(('component','Period','TimeStep', 'space')):  
# #         print(f'variable is {varname}')
# #         print(f'corresponding data is {da}')
        
#         da = da.transpose('Period','component','space','TimeStep')[0]  
#         #print(f'transposed da is {da}')
#         vars_ts[varname] = da
# vars_ts

In [7]:
for varname, da in dataset.data_vars.items():
        # sort the dimensions
        if sorted(da.dims) == sorted(('component','Period','TimeStep', 'space')):   #TODO: maybe space should be generalized with additional variable - dimension_description ?
            # Period is not considered -> TODO: consider the Period dimension.
            da = da.transpose('Period','component','space','TimeStep')[0]  
            vars_ts[varname] = da

        elif sorted(da.dims) == sorted(('component','space')):
            vars_1d[varname] = da

        elif sorted(da.dims) == sorted(('component','space','space_2')):
            vars_2d[varname] = da

        else:
            warnings.warn("Variable '" + varname + "' has dimensions + '" + str(da.dims) + "' which are not considered for spatial aggregation.")

In [8]:
component_list = list(dataset['component'].values)
n_regions = len(dataset['space'].values)

In [9]:
vars_ts

{'operationFixRate': <xarray.DataArray 'operationFixRate' (component: 4, space: 3, TimeStep: 2)>
 array([[[nan, nan],
         [nan, nan],
         [nan, nan]],
 
        [[1. , 1. ],
         [0.9, 0. ],
         [2. , 0.9]],
 
        [[nan, nan],
         [nan, nan],
         [nan, nan]],
 
        [[0. , 0.3],
         [1. , 2. ],
         [1. , 1. ]]])
 Coordinates:
   * component  (component) <U2 'c1' 'c2' 'c3' 'c4'
     Period     int32 0
   * TimeStep   (TimeStep) <U2 'T0' 'T1'
   * space      (space) <U6 '01_reg' '02_reg' '03_reg'}

#### STEP 2. preprocess Time Series

In [10]:
ds_timeseries = grouping_utils.preprocessTimeSeries(vars_ts, n_regions, len(component_list))
ds_timeseries

{'operationFixRate': array([[0.5 , 0.5 , 0.  , 0.15],
        [0.45, 0.  , 0.5 , 1.  ],
        [1.  , 0.45, 0.5 , 0.5 ]])}

#### STEP 3. preprocess 1d Variables

In [11]:
vars_1d

{'1d_capacity': <xarray.DataArray '1d_capacity' (component: 4, space: 3)>
 array([[0.9, 1. , 0.9],
        [0. , 0. , 0. ],
        [0.9, 1. , 0.9],
        [nan, nan, nan]])
 Coordinates:
   * component  (component) <U2 'c1' 'c2' 'c3' 'c4'
   * space      (space) <U6 '01_reg' '02_reg' '03_reg'}

In [12]:
ds_1d_vars = grouping_utils.preprocess1dVariables(vars_1d, len(component_list))
ds_1d_vars

{'1d_capacity': array([[0., 0., 0.],
        [1., 0., 1.],
        [0., 0., 0.]])}

#### STEP 4. Varies based on handle_mode

##### STEP 4a. if handle_mode == 'toDissimilarity', call preprocess2dVariables directly

In [13]:
handle_mode = 'toDissimilarity'

In [14]:
vars_2d

{'2d_distance': <xarray.DataArray '2d_distance' (component: 4, space: 3, space_2: 3)>
 array([[[ 0. ,  1. ,  2. ],
         [ 1. ,  0. , 10. ],
         [ 2. , 10. ,  0. ]],
 
        [[ 0. ,  0.1,  0.2],
         [ 0.1,  0. ,  1. ],
         [ 0.2,  1. ,  0. ]],
 
        [[ nan,  nan,  nan],
         [ nan,  nan,  nan],
         [ nan,  nan,  nan]],
 
        [[ nan,  nan,  nan],
         [ nan,  nan,  nan],
         [ nan,  nan,  nan]]])
 Coordinates:
   * component  (component) <U2 'c1' 'c2' 'c3' 'c4'
   * space      (space) <U6 '01_reg' '02_reg' '03_reg'
   * space_2    (space_2) <U6 '01_reg' '02_reg' '03_reg'}

In [15]:
ds_2d_vars = grouping_utils.preprocess2dVariables(vars_2d, component_list, handle_mode='toDissimilarity')
ds_2d_vars

{'2d_distance': {0: array([0.9, 0.8, 0. ]), 1: array([0.9, 0.8, 0. ])}}

In [16]:
#return ds_timeseries, ds_1d_vars, ds_2d_vars

##### STEP 4b. if handle_mode == 'toAffinity' -> convert matrix in weighted matrix based on var_weightings

In [17]:
handle_mode = 'toAffinity'

In [18]:
# Weighting factors of each variable 
if var_weightings:
    var_weightings = var_weightings               #TODO: reduce the lines here by using 'if var_weightings is None' 
else:
    vars_list = list(vars_ts.keys()) + list(vars_1d.keys()) + list(vars_2d.keys())
    var_weightings = dict.fromkeys(vars_list,1)

In [19]:
var_weightings

{'operationFixRate': 1, '1d_capacity': 1, '2d_distance': 1}

##### STEP 4b. (i)  For each Time series varriable:  convert it's corresponding matrix into weighted matrix (weights are from var_weightings)

In [20]:
matrix_ts = np.array([np.zeros(n_regions)]).T
matrix_ts

array([[0.],
       [0.],
       [0.]])

In [21]:
for var, var_matrix in ds_timeseries.items():

    weight = var_weightings[var]
            
    # Concatenate the matrix of this var to the final matrix with its weighting factor
    matrix_ts = np.concatenate((matrix_ts, var_matrix * weight), axis=1)       

In [22]:
matrix_ts = np.delete(matrix_ts,0,1)


In [23]:
matrix_ts

array([[0.5 , 0.5 , 0.  , 0.15],
       [0.45, 0.  , 0.5 , 1.  ],
       [1.  , 0.45, 0.5 , 0.5 ]])

In [24]:
ds_timeseries.get('operationFixRate')


array([[0.5 , 0.5 , 0.  , 0.15],
       [0.45, 0.  , 0.5 , 1.  ],
       [1.  , 0.45, 0.5 , 0.5 ]])

In [25]:
matrix_ts == ds_timeseries.get('operationFixRate')

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

##### STEP 4b. (ii)  For each 1d varriable:  convert it's corresponding matrix into weighted matrix (weights are from var_weightings)

In [27]:
###### For 1d vars: obtain the single matrix - matrix_1d
matrix_1d = np.array([np.zeros(n_regions)]).T
matrix_1d

array([[0.],
       [0.],
       [0.]])

In [28]:
for var, var_matrix in ds_1d_vars.items():

    weight = var_weightings[var]

    # Concatenate the matrix of this vars to one single 1d matrix with weight factor
    matrix_1d = np.concatenate((matrix_1d, var_matrix * weight),axis=1)
        
matrix_1d = np.delete(matrix_1d,0,1)

In [29]:
matrix_1d == ds_1d_vars.get('1d_capacity')

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])

##### STEP 4b. (iii)  a. Preprocess 2d variables

In [30]:
ds_2d_vars = grouping_utils.preprocess2dVariables(vars_2d, component_list, handle_mode='toAffinity')

In [31]:
ds_2d_vars

{'2d_distance': {0: array([[0. , 0.1, 0.2],
         [0.1, 0. , 1. ],
         [0.2, 1. , 0. ]]),
  1: array([[0. , 0.1, 0.2],
         [0.1, 0. , 1. ],
         [0.2, 1. , 0. ]])}}

##### STEP 4b. (iii)  b. For each variable, convert the matrix corresponding to each component to weighted matrix and Add each components weighted matrices to obtain one single weighted affinity matrix

In [32]:
###### For 2d vars: obtain a single square matrix of size n_regions*regions
matrix_2d = np.zeros((n_regions,n_regions))
matrix_2d

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [33]:
# After adding, the value in matrix_2d is not in the range [0,1] any more
for var, var_dict in ds_2d_vars.items():

    weight = var_weightings[var]

    # Add the matrices of different components for one var to a single matrix
    for component, data in var_dict.items():
        matrix_2d += data * weight


In [34]:
matrix_2d

array([[0. , 0.2, 0.4],
       [0.2, 0. , 2. ],
       [0.4, 2. , 0. ]])

In [None]:

###### Return 3 separate matrices
#return matrix_ts, matrix_1d, matrix_2d