In [None]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from operator import itemgetter
import random
from collections import OrderedDict 
from tqdm import trange
%matplotlib inline

# Network Robustness Analysis

Questions: 
- How many nodes can we remove while the network preserves its functioning condition?
- How many nodes do we need to remove to fragment the network into isolated components?

*Notes: 
- Attack order was computed beforehand instead of on the fly, because it drastically increased computation time.
- Should we take avg. or max. values?*

In [None]:
G = nx.read_gml('Graphs/airlines.gml')
# 1% of data (rounded, 14 nodes will be left at the end)
REMOVAL_SIZE = int(len(G)/100)
# last "batch" needs to include the remaining nodes too
LAST_REMOVAL_SIZE = REMOVAL_SIZE + len(G) % REMOVAL_SIZE
# upper bound % of node to be removed
REMOVAL_COUNT = 100

In [None]:
def percentage_of_removed_links(G):
    return int(len(G)/100)

def get_by_index(sorted_dict, i):
   return sorted_dict[i][0]
 

def robustness_attack(g, sorted_dict, removal_size, img_name):
    
    graph = g.copy()
    node_idx = 0
    node_size = len(g)

    # network metrics
    avg_in_degree = []
    avg_out_degree = []
    SCC = []
    avg_closeness = []
    avg_betweenness = []
    avg_clustering = []
    avg_eigvec_centrality = []
    num_of_steps = []

    # initial network statistics:
    # compute avg. degree
    avg_in_degree.append(sum([d for (n, d) in graph.in_degree()]) / float(graph.number_of_nodes()))
    avg_out_degree.append(sum([d for (n, d) in graph.out_degree()]) / float(graph.number_of_nodes()))
    # clustering
    avg_clustering.append(nx.average_clustering(graph))
    # largest strongly connected components (relative to number of nodes)
    SCC.append(len(max(nx.strongly_connected_components(graph), key=len)) / node_size)
    # closeness
    avg_closeness.append(np.average(list(nx.closeness_centrality(graph).values())))
    # betweenness
    avg_betweenness.append(np.average(list(nx.betweenness_centrality(graph).values())))
    # eigenvector centrality: because of convergance issues we use numpy method
    avg_eigvec_centrality.append(np.average(list(nx.eigenvector_centrality_numpy(graph).values())))
    # initial step
    num_of_steps.append(0)
    
    for percentage in trange(1, REMOVAL_COUNT+1):
        # remove nodes
        delete_size = removal_size

        if percentage == 100:
            # last batch needs to remove all remaining nodes
            delete_size = LAST_REMOVAL_SIZE
        
        for i in range(delete_size):
            target = get_by_index(sorted_dict, node_idx)
            graph.remove_node(target)
            node_idx +=1

        # compute network statistics
        if graph.number_of_nodes() != 0:
            # in and out degree
            avg_in_degree.append(sum([d for (n, d) in graph.in_degree()]) / float(graph.number_of_nodes()))
            avg_out_degree.append(sum([d for (n, d) in graph.out_degree()]) / float(graph.number_of_nodes()))
            # clustering
            avg_clustering.append(nx.average_clustering(graph))
            # largest strongly connected components (relative to number of nodes)
            SCC.append(len(max(nx.strongly_connected_components(graph), key=len)) / node_size)
            # closeness
            avg_closeness.append(np.average(list(nx.closeness_centrality(graph).values())))
            # betweenness
            avg_betweenness.append(np.average(list(nx.betweenness_centrality(graph).values())))
            # eigenvector centrality: because of convergance issues we use numpy method
            avg_eigvec_centrality.append(np.average(list(nx.eigenvector_centrality_numpy(graph).values())))
        else:
            # avoid division by zero
            avg_in_degree.append(0)
            avg_out_degree.append(0)
            avg_clustering.append(0)
            SCC.append(0)
            avg_closeness.append(0)
            avg_betweenness.append(0)
            avg_eigvec_centrality.append(0)
        
        # keep track of steps
        num_of_steps.append(percentage)
    
    # plot statistics
    fig, axs = plt.subplots(3, figsize=(6,12))
    fig.suptitle('Network statistics after attack', y=0.9)
    axs[0].plot(num_of_steps, avg_in_degree, color='blue', label='avg. in degree')
    axs[0].plot(num_of_steps, avg_out_degree, color='cyan', linestyle='--', label='avg. out degree')
    axs[0].legend()
    axs[0].set_ylabel('<k>')
    axs[0].set_xlabel('% of removed nodes')
    axs[0].set_yscale('log')
    axs[1].plot(num_of_steps, SCC, color='red', label='SCC')
    axs[1].set_ylabel('norm. SCC size')
    axs[1].set_xlabel('% of removed nodes')
    axs[1].set_yscale('log')
    axs[1].legend()
    axs[2].plot(num_of_steps, avg_closeness, color='green', label='avg. closeness')
    axs[2].plot(num_of_steps, avg_betweenness, color='orange', label='avg. betweenness')
    axs[2].plot(num_of_steps, avg_clustering, color='purple', label='avg. clustering')
    axs[2].plot(num_of_steps, avg_eigvec_centrality, color='brown', label='avg. eigvec. centr.')
    axs[2].set_ylabel('CC,CB,C,EC')
    axs[2].set_xlabel('% of removed nodes')
    axs[2].set_yscale('log')
    axs[2].legend()
    fig.savefig('Figures/robustness/' + img_name)
    fig.show()
    return avg_in_degree, avg_out_degree, SCC, avg_closeness, avg_betweenness, avg_clustering, avg_eigvec_centrality, num_of_steps

## Random Attacks

In [None]:
# load network
G = nx.read_gml('Graphs/airlines.gml')
# load node dict
G_dict = dict(G.nodes())
# shuffle dict
key_list = list(G_dict)
random.shuffle(key_list)
# reconstruct random dict
random_dict = dict()
for key in key_list:
        random_dict[key] = G_dict[key]
# sort to list
sorted_list = list(random_dict.items())
# random attack
rdm_id, rdm_od, rdm_SCC, rdm_cls, rdm_bet, rdm_clst, rdm_ev, rdm_steps = robustness_attack(G, sorted_list, REMOVAL_SIZE, 'random.pdf')

## Targeted Attack

### Betweenness Centrality

In [None]:
# load network
G = nx.read_gml('Graphs/airlines.gml')
# Optimisation: precompute closeness dict instead of doing it continuously on the fly
sorted_betweenness = sorted(dict(nx.betweenness_centrality(G)).items(), key=itemgetter(1), reverse=True)
# betweenness attack
bet_id, bet_od, bet_SCC, bet_cls, bet_bet, bet_clst, bet_ev, bet_steps = robustness_attack(G, sorted_betweenness, REMOVAL_SIZE, 'betweenness.pdf')

### Closeness Centrality

In [None]:
# load network
G = nx.read_gml('Graphs/airlines.gml')
# Optimisation: precompute closeness dict instead of doing it continuously on the fly
sorted_closeness = sorted(dict(nx.closeness_centrality(G)).items(), key=itemgetter(1), reverse=True)
# closeness attack
clo_id, clo_od, clo_SCC, clo_cls, clo_bet, clo_clst, clo_ev, clo_steps = robustness_attack(G, sorted_closeness, REMOVAL_SIZE, 'closeness.pdf')

### Degree

In [None]:
# load network
G = nx.read_gml('Graphs/airlines.gml')
# Optimisation: precompute degree dict instead of doing it continuously on the fly
sorted_degree_dict = sorted(dict(nx.degree(G)).items(), key=itemgetter(1), reverse=True)
# degree attack
deg_id, deg_od, deg_SCC, deg_cls, deg_bet, deg_clst, deg_ev, deg_steps = robustness_attack(G, sorted_degree_dict, REMOVAL_SIZE, 'degree.pdf')

### Eigenvector Centrality

*Note: Does not converge!*

In [None]:
# load network
G = nx.read_gml('Graphs/airlines.gml')
# Optimisation: precompute degree dict instead of doing it continuously on the fly
sorted_eigvec_dict = sorted(dict(nx.eigenvector_centrality_numpy(G)).items(), key=itemgetter(1), reverse=True)
# eigenvector centrality attack
evc_id, evc_od, evc_SCC, evc_cls, evc_bet, evc_clst, evc_ev, evc_steps = robustness_attack(G, sorted_eigvec_dict, REMOVAL_SIZE, 'eigvec.pdf')

# Attack Comparison

In [None]:
fig, axs = plt.subplots(7, figsize=(8,16))
fig.suptitle('Network attack comparison', y=0.90)
# in degree
axs[0].plot(rdm_steps, rdm_id, color='blue', label='random')
axs[0].plot(bet_steps, bet_id, color='green', label='betweenness')
axs[0].plot(clo_steps, clo_id, color='red', label='closeness')
axs[0].plot(deg_steps, deg_id, color='purple', label='degree')
axs[0].plot(evc_steps, evc_id, color='orange', label='eigenvector')
axs[0].set_ylabel('$<k_{in}>$')
axs[0].set_yscale('log')
box = axs[0].get_position()
axs[0].set_position([box.x0, box.y0, box.width * 0.8, box.height])
# out degree
axs[1].plot(rdm_steps, rdm_od, color='blue', label='random')
axs[1].plot(bet_steps, bet_od, color='green', label='betweenness')
axs[1].plot(clo_steps, clo_od, color='red', label='closeness')
axs[1].plot(deg_steps, deg_od, color='purple', label='degree')
axs[1].plot(evc_steps, evc_od, color='orange', label='eigenvector')
axs[1].set_ylabel('$<k_{out}>$')
axs[1].set_yscale('log')
box = axs[1].get_position()
axs[1].set_position([box.x0, box.y0, box.width * 0.8, box.height])
# SCC
axs[2].plot(rdm_steps, rdm_SCC, color='blue', label='random')
axs[2].plot(bet_steps, bet_SCC, color='green', label='betweenness')
axs[2].plot(clo_steps, clo_SCC, color='red', label='closeness')
axs[2].plot(deg_steps, deg_SCC, color='purple', label='degree')
axs[2].plot(evc_steps, evc_SCC, color='orange', label='eigenvector')
axs[2].set_ylabel('SCC')
axs[2].set_yscale('log')
box = axs[2].get_position()
axs[2].set_position([box.x0, box.y0, box.width * 0.8, box.height])
# closeness centrality
axs[3].plot(rdm_steps, rdm_cls, color='blue', label='random')
axs[3].plot(bet_steps, bet_cls, color='green', label='betweenness')
axs[3].plot(clo_steps, clo_cls, color='red', label='closeness')
axs[3].plot(deg_steps, deg_cls, color='purple', label='degree')
axs[3].plot(evc_steps, evc_cls, color='orange', label='eigenvector')
axs[3].set_ylabel('CC')
axs[3].set_yscale('log')
box = axs[3].get_position()
axs[3].set_position([box.x0, box.y0, box.width * 0.8, box.height])
# betweenness centrality
axs[4].plot(rdm_steps, rdm_bet, color='blue', label='random')
axs[4].plot(bet_steps, bet_bet, color='green', label='betweenness')
axs[4].plot(clo_steps, clo_bet, color='red', label='closeness')
axs[4].plot(deg_steps, deg_bet, color='purple', label='degree')
axs[4].plot(evc_steps, evc_bet, color='orange', label='eigenvector')
axs[4].set_ylabel('BC')
axs[4].set_yscale('log')
box = axs[4].get_position()
axs[4].set_position([box.x0, box.y0, box.width * 0.8, box.height])
# clustering
axs[5].plot(rdm_steps, rdm_clst, color='blue', label='random')
axs[5].plot(bet_steps, bet_clst, color='green', label='betweenness')
axs[5].plot(clo_steps, clo_clst, color='red', label='closeness')
axs[5].plot(deg_steps, deg_clst, color='purple', label='degree')
axs[5].plot(evc_steps, evc_clst, color='orange', label='eigenvector')
axs[5].set_ylabel('CC')
axs[5].set_yscale('log')
box = axs[5].get_position()
axs[5].set_position([box.x0, box.y0, box.width * 0.8, box.height])
# eigenvector centrality
axs[6].plot(rdm_steps, rdm_ev, color='blue', label='random')
axs[6].plot(bet_steps, bet_ev, color='green', label='betweenness')
axs[6].plot(clo_steps, clo_ev, color='red', label='closeness')
axs[6].plot(deg_steps, deg_ev, color='purple', label='degree')
axs[6].plot(evc_steps, evc_ev, color='orange', label='eigenvector')
axs[6].set_ylabel('EC')
axs[6].set_xlabel('% of removed nodes')
axs[6].set_yscale('log')
box = axs[6].get_position()
axs[6].set_position([box.x0, box.y0, box.width * 0.8, box.height])

axs[6].legend(bbox_to_anchor=(1.05, 4.4), loc=2, borderaxespad=0.)
fig.savefig('Figures/robustness/comparison.pdf')
fig.show()