In [13]:
import networkx as nx
import pandas as pd
import fastdtw
import numpy as np

In [2]:
df_main = pd.read_excel("../../Data/edgelist/UN_full.xlsx")

In [3]:
edgelist = [(origin, destination, {"weight":weight}) for [origin, destination, weight] in df_main[["origin","destination", "weight_scaled"]].values]

I will use the directed graph to calculate in and out degree, but I will use a undirected graph to retrieve neighbourhoods for each node.

In [4]:
G = nx.DiGraph(edgelist)

In [5]:
G_UD = nx.Graph(edgelist)

In [50]:
nx.diameter(G_UD)

3

From the adjecency matrix I can calculate the nodes indegree from the corresponding column and the out-degree from the row

In [6]:
A = nx.adjacency_matrix(G, weight=None) # All cells are either 0 or 1 to support counting for degree

In [7]:
ind2node = {i:node for i, node in enumerate(G.nodes())}

In [8]:
degreeDict = {}

for i in range(A.shape[0]):
    inDegree = A[:,[i]].sum()
    outDegree = A[[i],:].sum()
    degreeDict[ind2node[i]] = {"inDegree":inDegree, "outDegree":outDegree}

For the preprocess I will create a dictionary with the neighborhoods of all nodes.

The format is {origin node : {layer_1:\[neighbors\], ... , layer_n:\[neighbors\]}}

This makes it possible to quickly get all neighbors within a given layer. This can also be converted to the degree vectors for in and out degree.

In [9]:
all_shortest_paths = {node:paths for node, paths in nx.all_pairs_shortest_path_length(G_UD)}

In [10]:
neighborhood_by_layer = {}
for origin_node, neighbors in all_shortest_paths.items():
    neighborhood_by_layer[origin_node] = {}
    for node, layer in neighbors.items():
        if layer == 0:
            continue
        if layer not in neighborhood_by_layer[origin_node]:
            neighborhood_by_layer[origin_node][layer] = []
        neighborhood_by_layer[origin_node][layer].append(node)

With the same syntax from above we replace neighborhoods with an in degree array and an out degree array, these will always incorporate the previous arrays such that all arrays include all nodes _within_ a given layer

In [19]:
degree_vectors = {}

for origin_node, neighborhoods in neighborhood_by_layer.items():
    degree_vectors[origin_node] = {}
    for layer, nodes in neighborhoods.items():
        vector_in = [degreeDict[node]["inDegree"] for node in nodes]
        vector_out = [degreeDict[node]["outDegree"] for node in nodes]
        if layer == 1:
            degree_vectors[origin_node][layer] = {"in": vector_in, "out":vector_out}
        else:
            vec_in_prev = degree_vectors[origin_node][layer-1]["in"].copy()
            vec_in_prev.extend(vector_in)

            vec_out_prev = degree_vectors[origin_node][layer-1]["out"].copy()
            vec_out_prev.extend(vector_out)

            degree_vectors[origin_node][layer] = {}
            degree_vectors[origin_node][layer]["in"] = vec_in_prev
            degree_vectors[origin_node][layer]["out"] = vec_out_prev

## Calculating distances between node pairs combining in- and out-degree

In [117]:
# I'll define two nodes as an example
v0 = "SWE"
v1 = "DNK"
n_steps = 1

In [118]:
arr0_in = np.array(sorted(degree_vectors[v0][n_steps]["in"], reverse=True))
arr0_out = np.array(sorted(degree_vectors[v0][n_steps]["out"], reverse=True))
arr1_in = np.array(sorted(degree_vectors[v1][n_steps]["in"], reverse=True))
arr1_out = np.array(sorted(degree_vectors[v1][n_steps]["out"], reverse=True))

In [119]:
def d_func(a,b):
    '''
    Calculate distance
    '''
    return float((max(a,b)/min(a,b))-1)

In [120]:
arr0_in = arr0_in.reshape(len(arr0_in),1)
arr1_in = arr1_in.reshape(len(arr1_in),1)
arr0_out = arr0_out.reshape(len(arr0_out),1)
arr1_out = arr1_out.reshape(len(arr1_out),1)

In [121]:
arr0_in = arr0_in + np.ones((len(arr0_in),1))
arr1_in = arr1_in + np.ones((len(arr1_in),1))
arr0_out = arr0_out + np.ones((len(arr0_out),1))
arr1_out = arr1_out + np.ones((len(arr1_out),1))

In [122]:
dist_in, conv_vect = fastdtw.fastdtw(arr0_in, arr1_in, dist=d_func)

  return float((max(a,b)/min(a,b))-1)


In [123]:
dist_out, conv_vect = fastdtw.fastdtw(arr0_out, arr1_out, dist=d_func)

  return float((max(a,b)/min(a,b))-1)


In [124]:
dist_in

0.1494588744588743

In [125]:
dist_out

0.32415240343629104

In [126]:
np.exp(-np.mean([dist_in, dist_out]))

0.7891446520982162