In [1]:
import trees as t
from collections import deque 
import math
from tqdm import tqdm

In [2]:
f = 'a'
graph = t.build(f, '2016-01-01', '2016-01-02')
print(graph)




# Functionality 2

First, we define shortest_path function, using the Dijkstra’s Algorithm:

<p>1) We set a the starting node.<p>
<p>2) We set the distances between starting node and all other nodes to infinity, except for the distance between starting node and itself, which we set to 0.<p>
<p>3) We choose the node with the smallest value as the “current node” and visit all of its neighboring nodes. As we visit each neighbor, we update their tentative distance from the starting node.<p>
<p>4) Once we visit all of the current node’s neighbors and update their distances, we mark the current node as “visited.” Marking a node as “visited” means that we’ve arrived at its final cost.<p>
<p>5) We go back to step one. The algorithm loops until it visits all the nodes in the graph.<p> 

In [3]:
def shortest_path(graph, s, t):
    
    if s == t:
        return "There is no path, source and target nodes are the same", -1
    
    visited, shortest_path, predecessor = list(), dict(), dict()
    
    # second point
    for node in graph.nodes():
        shortest_path[node] = math.inf
        visited.append(node)
    shortest_path[s] = 0
    
    while visited:                          
        current_node = None
        for node in visited: 
            if current_node == None:
                current_node = node
            elif shortest_path[node] < shortest_path[current_node]:
                current_node = node
        for neighbor in graph.neighbors(current_node):
            value = shortest_path[current_node] + graph[current_node][neighbor]['weight']
            if value < shortest_path[neighbor]:
                shortest_path[neighbor] = value
                predecessor[neighbor] = current_node
 
        visited.remove(current_node)
    
    # now we have to return the path using predecessor dictionary
    if t not in predecessor:
        return "Not possible, there is no path between target and source", -1
    last = t
    path = list([last])
    while last != s:
        path.append(predecessor[last])
        last = predecessor[last]
        
    return path, shortest_path[t]  

In [4]:
print(shortest_path(graph, 5730934, 1507691))
print(shortest_path(graph, 5730934, 5730934))
print(shortest_path(graph, 5730934, 3519202))
print(shortest_path(graph, 1507691, 5730934))

([1507691, 1172798, 5734198, 5730934], 4.0)
('There is no path, source and target nodes are the same', -1)
('Not possible, there is no path between target and source', -1)
([5730934, 1172798, 4046367, 1507691], 5.0)


### Betweenness centrality
Let $n_{s,t}^{i}$ be the number of shortest paths from $s$ to $t$ that pass through $i$ and let $n_{s,t}$ be the total number of shortest paths from $s$ to $t$. Then the betweenness centrality of vertex $i$ is:

$\displaystyle{b_i = \sum_{s, t} w_{s,t}^{i} = \sum_{s, t} \frac{n_{s,t}^{i}}{n_{s,t}}}$

In [5]:
def betweenness(v, graph):
    num = 0
    den = 0
    for source in tqdm(graph.nodes()):
        for target in graph.nodes():
            if source != target and source != v:
                path, dist = shortest_path(graph, source, target)
                if dist != -1:
                    den += 1
                    if v in path:
                        num += 1
    if den == 0:
        den = 1
    betweenness_centrality = num/den
    return betweenness_centrality          

In [None]:
print(betweenness(5730934, graph))

<p>I add this function betweenness_prova to understand if the code was right without running the betweenness centrality on all the graph, because this takes a lot of time! I run the betweenness considering only six nodes of the graph, so pretending in the graph there are only these six nodes: $[1507691, 1172798, 5734198, 5730934, 4046367, 3519202]$<p>


In [8]:
nodes = list([1507691, 1172798, 5734198, 5730934, 4046367, 3519202])
def betweenness_prova(v, graph):
    num = 0
    den = 0
    for source in tqdm(nodes):
        for target in nodes:
            if source != target and source != v:
                path, dist = shortest_path(graph, source, target)
                if dist != -1:
                    den += 1
                    if v in path:
                        num += 1
    if den == 0:
        den = 1 
    betweenness_centrality = num/den
    return betweenness_centrality 

In [9]:
print(betweenness_prova(3519202, graph))

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [03:03<00:00, 30.65s/it]

0.0





In [10]:
print(betweenness_prova(1507691, graph))

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [03:00<00:00, 30.09s/it]

0.25





In [11]:
print(betweenness_prova(5734198, graph))

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [02:57<00:00, 29.51s/it]

0.4375





In [12]:
print(betweenness_prova(1172798, graph))

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [02:56<00:00, 29.49s/it]

0.8125





In [13]:
print(betweenness_prova(5730934, graph))

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [02:58<00:00, 29.72s/it]

0.25





### Closeness centrality
$C(v)={\frac  {N-1}{\sum _{u}d(u,v)}}$
- N is the number of total nodes in the graph
- $d(u,v)$ is the distance between the nodes u and v

In [16]:
N = len(list(graph.nodes()))

In [24]:
def closeness(v, graph):
    count = 0
    for node in tqdm(graph.nodes()):
        path, dist = shortest_path(graph, v, node)
        if dist != -1:
            count += dist
    if count == 0:
        return "There is no path between the node in input and all other nodes in the graph!"
    
    closeness_centrality = (N - 1) / count
    return closeness_centrality

In [None]:
print(closeness(5730934, graph))

In [25]:
def closeness_prova(v, graph):
    count = 0
    for node in tqdm(nodes):
        path, dist = shortest_path(graph, v, node)
        if dist != -1:
            count += dist
    if count == 0:
        return "There is no path between the node in input and all other nodes in the graph!"
    
    closeness_centrality = (6 - 1) / count
    return closeness_centrality

In [26]:
print(closeness_prova(5730934, graph))

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:37<00:00,  6.20s/it]

0.3333333333333333





### Degree Centrality

$D(v)={\frac  {degree(v)}{N-1}}$

In [20]:
def degree_centrality(v, graph):
    degree_centrality = graph.degree(v) / (N-1)
    return degree_centrality

In [21]:
print(degree_centrality(5730934, graph))

0.0005077688636132832


### Page rank
The algorithm steps are:
- Initialize the PageRank of every node with a value of 1
- For each iteration, update the PageRank of every node in the graph
- The new PageRank is the sum of the proportional rank of all of its parents
- Apply random walk to the new PageRank
- PageRank value will converge after enough iterations

In [None]:
def pagerank_centrality(v, graph, alpha, max_iter):
    for node in tqdm(graph.nodes()):
        
    