In [1]:
from geopy.distance import geodesic
import numpy as np

In [None]:
vl_madalena = (-23.553039, -46.686074)
urubici = (-27.999667, -49.586892)
mamangua = (-23.233871, -44.613234)

In [None]:
geodesic(vl_madalena, urubici, ellipsoid='WGS-84').km #573.37 km

In [None]:
geodesic(vl_madalena, mamangua, ellipsoid='WGS-84').km #214.49 km

In [None]:
geodesic(mamangua, urubici, ellipsoid='WGS-84').km #727.50 km

In [2]:
from typing import Dict, Tuple


def geodistance(nodeA: str, nodeB: str, nodes_coords: Dict[str, Tuple]) -> float:
    nodeA_coords = nodes_coords[nodeA]
    nodeB_coords = nodes_coords[nodeB]
    distance = geodesic(nodeA_coords, nodeB_coords, ellipsoid='WGS-84').km 
    return distance


def geodistance_from_pair(nodes_pair: Tuple[str], nodes_coord: Dict[str, Tuple]) -> float:
    distance = geodistance(nodes_pair[0], nodes_pair[1], nodes_coord)
    return distance

In [None]:
geodistance(nodeA='DE111', nodeB='DEF0C', nodes_coords=nodes_coords)

In [None]:
geodistance_from_pair(('DE111', 'DEF0C'), nodes_coords)

# Getting Specific

In [3]:
import pandas as pd

In [80]:
centroids = pd.read_hdf('../data/04_feature/power-centroids-positions-2000-2015.hdf')

In [85]:
all_districts = centroids.columns.get_level_values('nuts_id').unique()

In [5]:
targets = ['DEF07', 'DEF0C', 'DEF05', 'DEF0E', 'DEF0B']

In [86]:
centroids_median = centroids.loc['2000-01-01':'2015-12-31', all_districts].median()

centroids_median

nuts_id  coords
DEF0C    lat       54.631738
         lon        9.395936
DEF08    lat       54.304763
         lon       10.990861
DEA43    lat       52.170185
                     ...    
DE239    lon       12.259649
DE21L    lat       47.985235
         lon       11.406016
DE715    lat       49.454318
         lon        8.836523
Length: 606, dtype: float64

In [87]:
nodes_coords = {district_id: tuple( centroids_median[district_id] ) for district_id in all_districts}

nodes_coords

{'DEF0C': (54.63173774352925, 9.39593596801835),
 'DEF08': (54.30476287841092, 10.990861264961348),
 'DEA43': (52.170185478656535, 8.675836018274389),
 'DEF07': (54.65531672517318, 8.974126972180605),
 'DEA5B': (51.54057330543451, 8.1774564896163),
 'DEA34': (51.97389408568155, 6.943760876165726),
 'DEF0B': (54.29644448857303, 9.789379082515651),
 'DEE0E': (51.800884515654175, 12.750215577808452),
 'DE40F': (53.162051904748246, 12.046425301321092),
 'DEF05': (54.110200194884904, 9.007904152162004),
 'DEA5C': (51.60168946653989, 7.667751904576686),
 'DE40H': (51.97492529106579, 13.224487478722464),
 'DEE09': (51.91511807627348, 11.102788802606984),
 'DED2D': (51.115737851512314, 14.859680901113709),
 'DED42': (50.68797941250948, 13.231246863058036),
 'DED52': (51.209025585453276, 12.600709023858782),
 'DEA47': (51.63658212198467, 8.814360703467148),
 'DEA44': (51.65643033890103, 9.23240677217417),
 'DE716': (49.82354299157895, 8.848172902631578),
 'DEA46': (52.37722697985635, 8.75400003

In [88]:
from itertools import combinations

In [90]:
node_pairs = list( combinations(all_districts, 2) )

node_pairs

[('DEF0C', 'DEF08'),
 ('DEF0C', 'DEA43'),
 ('DEF0C', 'DEF07'),
 ('DEF0C', 'DEA5B'),
 ('DEF0C', 'DEA34'),
 ('DEF0C', 'DEF0B'),
 ('DEF0C', 'DEE0E'),
 ('DEF0C', 'DE40F'),
 ('DEF0C', 'DEF05'),
 ('DEF0C', 'DEA5C'),
 ('DEF0C', 'DE40H'),
 ('DEF0C', 'DEE09'),
 ('DEF0C', 'DED2D'),
 ('DEF0C', 'DED42'),
 ('DEF0C', 'DED52'),
 ('DEF0C', 'DEA47'),
 ('DEF0C', 'DEA44'),
 ('DEF0C', 'DE716'),
 ('DEF0C', 'DEA46'),
 ('DEF0C', 'DED43'),
 ('DEF0C', 'DE40I'),
 ('DEF0C', 'DE137'),
 ('DEF0C', 'DE405'),
 ('DEF0C', 'DEA57'),
 ('DEF0C', 'DE725'),
 ('DEF0C', 'DEA35'),
 ('DEF0C', 'DEA45'),
 ('DEF0C', 'DE409'),
 ('DEF0C', 'DE932'),
 ('DEF0C', 'DE925'),
 ('DEF0C', 'DE91B'),
 ('DEF0C', 'DEA58'),
 ('DEF0C', 'DE80L'),
 ('DEF0C', 'DEA2A'),
 ('DEF0C', 'DE40D'),
 ('DEF0C', 'DEE07'),
 ('DEF0C', 'DE941'),
 ('DEF0C', 'DEB1B'),
 ('DEF0C', 'DE736'),
 ('DEF0C', 'DE948'),
 ('DEF0C', 'DEE0B'),
 ('DEF0C', 'DEE08'),
 ('DEF0C', 'DE149'),
 ('DEF0C', 'DEC06'),
 ('DEF0C', 'DE722'),
 ('DEF0C', 'DE916'),
 ('DEF0C', 'DE939'),
 ('DEF0C', 'D

In [91]:
nodes_pairwise_distances = {pair: geodistance_from_pair(pair, nodes_coords) for pair in node_pairs}

nodes_pairwise_distances

{('DEF0C', 'DEF08'): 109.62639937584876,
 ('DEF0C', 'DEA43'): 278.10669265481124,
 ('DEF0C', 'DEF07'): 27.358242338513392,
 ('DEF0C', 'DEA5B'): 353.54949148169624,
 ('DEF0C', 'DEA34'): 337.9160964957726,
 ('DEF0C', 'DEF0B'): 45.20905335272591,
 ('DEF0C', 'DEE0E'): 386.50645820982436,
 ('DEF0C', 'DE40F'): 238.9614022743313,
 ('DEF0C', 'DEF05'): 63.29408872529291,
 ('DEF0C', 'DEA5C'): 356.48137200737796,
 ('DEF0C', 'DE40H'): 390.4885247927443,
 ('DEF0C', 'DEE09'): 323.04335074941224,
 ('DEF0C', 'DED2D'): 536.7566615359514,
 ('DEF0C', 'DED42'): 509.6711184058335,
 ('DEF0C', 'DED52'): 437.55124849811705,
 ('DEF0C', 'DEA47'): 335.58876270582283,
 ('DEF0C', 'DEA44'): 331.29921900656507,
 ('DEF0C', 'DE716'): 536.3180906371344,
 ('DEF0C', 'DEA46'): 254.5038689912445,
 ('DEF0C', 'DED43'): 474.2940551572856,
 ('DEF0C', 'DE40I'): 334.04951254095346,
 ('DEF0C', 'DE137'): 741.2884899347122,
 ('DEF0C', 'DE405'): 355.078600286902,
 ('DEF0C', 'DEA57'): 356.96475982969173,
 ('DEF0C', 'DE725'): 448.0899

In [128]:
def build_distances_mx(node_pairs: Tuple[str], nodes_coords: Dict[str, Tuple[float]]) -> pd.DataFrame:
    # initialize 
    distances_mx = pd.DataFrame(
        columns=nodes_coords.keys(),
        index=nodes_coords.keys(),
        data=np.nan,
    )
    
    # calculate pairwise distances for upper triangle
    for pair in node_pairs:
        nodeA, nodeB = pair[0], pair[1]
        distances_mx.loc[nodeA, nodeB] = geodistance_from_pair(pair, nodes_coords)
    
    # mirror upper into lower triangle
    distances_mx.update(distances_mx.T)  # distance B-A = distance A-B 
    
    # fill diagonal with zeroes
    np.fill_diagonal( distances_mx.values, 0.0 )  # distance A-A = 0.0
    
    return distances_mx

In [95]:
def build_adjacency_mx(dist_dataframe: pd.DataFrame) -> pd.DataFrame:
    D = dist_dataframe
    
    std = D.values.std()
    A = np.exp( -np.square(D / std) )
    return A

In [129]:
D = build_distances_mx(node_pairs, nodes_coords)

# Distances Matrix should be squared

In [130]:
if D.shape[0] != D.shape[1]:
    raise RuntimeError('D is not squared')

# Building Adj Matrix

In [131]:
A = build_adjacency_mx(D)

A

Unnamed: 0,DEF0C,DEF08,DEA43,DEF07,DEA5B,DEA34,DEF0B,DEE0E,DE40F,DEF05,...,DE21C,DE251,DE943,DE24C,DE266,DEA2C,DE221,DE239,DE21L,DE715
DEF0C,1.000000e+00,5.782925e-01,0.029462,9.664661e-01,0.003358,0.005496,9.110645e-01,0.001105,7.410687e-02,8.331299e-01,...,3.505591e-11,5.413260e-08,2.406099e-01,0.000003,0.000015,0.000036,1.388038e-10,7.122642e-08,6.194547e-12,2.528066e-07
DEF08,5.782925e-01,1.000000e+00,0.025741,4.282425e-01,0.002628,0.001653,7.566526e-01,0.015421,3.835672e-01,4.564739e-01,...,7.090379e-10,5.046979e-07,1.139285e-01,0.000035,0.000082,0.000015,5.016839e-09,1.796044e-06,1.587894e-10,6.332084e-07
DEA43,2.946214e-02,2.574083e-02,1.000000,3.007769e-02,0.757773,0.514536,6.059482e-02,0.026083,5.371451e-02,1.168640e-01,...,3.137553e-05,3.296193e-03,5.373967e-01,0.017079,0.064208,0.133613,3.259842e-05,1.067177e-03,9.067702e-06,1.552041e-02
DEF07,9.664661e-01,4.282425e-01,0.030078,1.000000e+00,0.003679,0.007508,8.187296e-01,0.000557,4.434712e-02,8.453489e-01,...,2.042012e-11,3.606270e-08,2.695392e-01,0.000002,0.000011,0.000048,6.778841e-11,3.579243e-08,3.437431e-12,2.344678e-07
DEA5B,3.358343e-03,2.627570e-03,0.757773,3.678641e-03,1.000000,0.646228,8.055945e-03,0.010079,9.573807e-03,2.087801e-02,...,2.170159e-04,1.256918e-02,2.152464e-01,0.031665,0.129617,0.438504,1.374152e-04,2.236376e-03,6.858609e-05,7.777320e-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DEA2C,3.554162e-05,1.547189e-05,0.133613,4.807033e-05,0.438504,0.376018,9.464884e-05,0.000218,8.932602e-05,4.559938e-04,...,3.758774e-04,1.144748e-02,1.760329e-02,0.008840,0.053593,1.000000,8.802313e-05,5.480497e-04,1.250414e-04,1.781956e-01
DE221,1.388038e-10,5.016839e-09,0.000033,6.778841e-11,0.000137,0.000002,2.029197e-09,0.002275,5.640347e-06,2.523427e-09,...,7.198661e-01,4.117775e-01,1.488303e-07,0.224653,0.086540,0.000088,1.000000e+00,5.739143e-01,7.177267e-01,3.868299e-02
DE239,7.122642e-08,1.796044e-06,0.001067,3.579243e-08,0.002236,0.000056,7.148553e-07,0.051141,5.713864e-04,7.233706e-07,...,2.769469e-01,5.068495e-01,1.532733e-05,0.687294,0.324831,0.000548,5.739143e-01,1.000000e+00,2.192873e-01,6.047967e-02
DE21L,6.194547e-12,1.587894e-10,0.000009,3.437431e-12,0.000069,0.000001,9.801442e-11,0.000178,2.489346e-07,1.795323e-10,...,9.633638e-01,3.555798e-01,2.472880e-08,0.090896,0.044151,0.000125,7.177267e-01,2.192873e-01,1.000000e+00,5.815257e-02


In [132]:
A_sparse = A.copy(deep=True)
A_sparse[D > 100] = 0

A_sparse

Unnamed: 0,DEF0C,DEF08,DEA43,DEF07,DEA5B,DEA34,DEF0B,DEE0E,DE40F,DEF05,...,DE21C,DE251,DE943,DE24C,DE266,DEA2C,DE221,DE239,DE21L,DE715
DEF0C,1.000000,0.0,0.000000,0.966466,0.000000,0.000000,0.911064,0.0,0.0,0.833130,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEF08,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.756653,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEA43,0.000000,0.0,1.000000,0.000000,0.757773,0.000000,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEF07,0.966466,0.0,0.000000,1.000000,0.000000,0.000000,0.818730,0.0,0.0,0.845349,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEA5B,0.000000,0.0,0.757773,0.000000,1.000000,0.646228,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DEA2C,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,1.0,0.000000,0.0,0.000000,0.0
DE221,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.719866,0.0,0.0,0.000000,0.0,0.0,1.000000,0.0,0.717727,0.0
DE239,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.687294,0.0,0.0,0.000000,1.0,0.000000,0.0
DE21L,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.963364,0.0,0.0,0.000000,0.0,0.0,0.717727,0.0,1.000000,0.0


In [139]:
A.size - (A==0).sum().sum() 

91809

In [138]:
A_sparse.size - (A_sparse==0).sum().sum() 

7645

In [140]:
A_tril = pd.DataFrame(
    data= np.tril(A_sparse) - np.identity(len(A_sparse)), # exclusive lower triangle matrix of A 
    columns=A_sparse.columns,
    index=A_sparse.index
)

A_tril

Unnamed: 0,DEF0C,DEF08,DEA43,DEF07,DEA5B,DEA34,DEF0B,DEE0E,DE40F,DEF05,...,DE21C,DE251,DE943,DE24C,DE266,DEA2C,DE221,DE239,DE21L,DE715
DEF0C,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DEF08,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DEA43,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DEF07,0.966466,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DEA5B,0.000000,0.0,0.757773,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DEA2C,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DE221,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.719866,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0
DE239,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.687294,0.0,0.0,0.000000,0.0,0.0,0.0
DE21L,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.963364,0.0,0.0,0.000000,0.0,0.0,0.717727,0.0,0.0,0.0


In [2]:
import pickle

In [None]:

pickle.dump(A_sparse, open('../data/05_model_input/adj_mx.pkl', 'wb'))

In [None]:
A_sparse = pickle.load(open('../data/05_model_input/adj_mx.pkl','rb'))

A_sparse

In [None]:
targets = ['DEF07', 'DEF0C', 'DEF05', 'DEF0E', 'DEF0B']

A_sparse_short = A_sparse.loc[ A_sparse.columns.isin(targets), A_sparse.index.isin(targets)]

pickle.dump(A_sparse_short, open('../data/05_model_input/adj_mx_short.pkl', 'wb'))

In [None]:
A_sparse_short

In [5]:
A_sparse = pickle.load(open('../data/05_model_input/adj_mx.pkl','rb'))

A_sparse

Unnamed: 0,DEF0C,DEF08,DEA43,DEF07,DEA5B,DEA34,DEF0B,DEE0E,DE40F,DEF05,...,DE21C,DE251,DE943,DE24C,DE266,DEA2C,DE221,DE239,DE21L,DE715
DEF0C,1.000000,0.0,0.000000,0.966466,0.000000,0.000000,0.911064,0.0,0.0,0.833130,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEF08,0.000000,1.0,0.000000,0.000000,0.000000,0.000000,0.756653,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEA43,0.000000,0.0,1.000000,0.000000,0.757773,0.000000,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEF07,0.966466,0.0,0.000000,1.000000,0.000000,0.000000,0.818730,0.0,0.0,0.845349,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
DEA5B,0.000000,0.0,0.757773,0.000000,1.000000,0.646228,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DEA2C,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.000000,0.0,1.0,0.000000,0.0,0.000000,0.0
DE221,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.719866,0.0,0.0,0.000000,0.0,0.0,1.000000,0.0,0.717727,0.0
DE239,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.687294,0.0,0.0,0.000000,1.0,0.000000,0.0
DE21L,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,...,0.963364,0.0,0.0,0.000000,0.0,0.0,0.717727,0.0,1.000000,0.0


In [8]:
targets = ['DEF07', 'DEF0C', 'DEF05', 'DEF0E', 'DEF0B']

A_sparse_short = A_sparse.loc[ A_sparse.columns.isin(targets), A_sparse.index.isin(targets)]

pickle.dump(A_sparse_short, open('../data/05_model_input/adj_mx_short.pkl', 'wb'))

In [9]:
A_sparse_short

Unnamed: 0,DEF0C,DEF07,DEF0B,DEF05,DEF0E
DEF0C,1.0,0.966466,0.911064,0.83313,0.764974
DEF07,0.966466,1.0,0.81873,0.845349,0.715571
DEF0B,0.911064,0.81873,1.0,0.871016,0.915358
DEF05,0.83313,0.845349,0.871016,1.0,0.942527
DEF0E,0.764974,0.715571,0.915358,0.942527,1.0
