In [95]:
import networkx as nx
import numpy as np
import os
from joblib import Parallel, delayed

In [96]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# Import Data

In [97]:
data_path = '../data/facebook/facebook_combined.txt'
graph = nx.read_edgelist(data_path)

print(f'Graph with {len(graph.nodes)} nodes and {len(graph.edges)} edges was correctly loaded')

Graph with 4039 nodes and 88234 edges was correctly loaded


# Network characteristics

In [98]:
print('Graph main characteristics :')

print(f'Nodes : {len(graph.nodes)}')
print(f'Edges : {len(graph.edges)}')
print(f'Diameter : {nx.diameter(graph)}')

Graph main characteristics :
Nodes : 4039
Edges : 88234
Diameter : 8


# Metrics

In [99]:
metrics_to_functions = {
    'degree': lambda graph : graph.degree, 
    'eigenvector_centrality': nx.eigenvector_centrality, 
    'page_rank': nx.pagerank,
    'clustering_coef': nx.clustering, 
    'closeness': nx.closeness_centrality, 
    'betweenness' : nx.betweenness_centrality
    }
## neighborhood_connectivity missing

def compute_centrality(metric, graph):
    print(f'Metric {metric} is being measured')
    return {metric: metrics_to_functions[metric](graph)}

def get_centralities(graph, metrics_to_functions):
    metric_list = Parallel(n_jobs=4)(delayed(compute_centrality)(metric, graph) for metric in metrics_to_functions.keys())
    metric_dict = {}
    for item in metric_list:
        key = list(item.keys())[0]
        metric_dict[key] = dict(item[key])
    return metric_dict

In [100]:
graph_metrics = get_centralities(graph, metrics_to_functions)

In [101]:
print(graph_metrics.keys())

dict_keys(['degree', 'eigenvector_centrality', 'page_rank', 'clustering_coef', 'closeness', 'betweenness'])


In [102]:
def get_n_maxima_for_metric(n, metric, graph_metrics) :
    temp = graph_metrics[metric].copy()
    max_keys = []
    for i in range(n):
        key = max(temp, key=temp.get)
        max_keys.append(key)
        temp[key] = 0
    return max_keys

max_degree_nodes = get_n_maxima_for_metric(10, "degree", graph_metrics)
max_eigenvector_centrality_nodes = get_n_maxima_for_metric(10, "eigenvector_centrality", graph_metrics)
max_page_rank_nodes = get_n_maxima_for_metric(10, "page_rank", graph_metrics)
max_clustering_coef_nodes = get_n_maxima_for_metric(10, "clustering_coef", graph_metrics)
max_closeness_nodes = get_n_maxima_for_metric(10, "closeness", graph_metrics)
max_betweenness_nodes = get_n_maxima_for_metric(10, "betweenness", graph_metrics)

print(f"Maximum degree nodes : {max_degree_nodes}")
print(f"Maximum eigenvector_centrality nodes : {max_eigenvector_centrality_nodes}")
print(f"Maximum page_rank nodes : {max_page_rank_nodes}")
print(f"Maximum clustering_coef nodes : {max_clustering_coef_nodes}")
print(f"Maximum closeness nodes : {max_closeness_nodes}")
print(f"Maximum betweenness nodes : {max_betweenness_nodes}")

Maximum degree nodes : ['107', '1684', '1912', '3437', '0', '2543', '2347', '1888', '1800', '1663']
Maximum eigenvector_centrality nodes : ['1912', '2266', '2206', '2233', '2464', '2142', '2218', '2078', '2123', '1993']
Maximum page_rank nodes : ['3437', '107', '1684', '0', '1912', '348', '686', '3980', '414', '698']
Maximum clustering_coef nodes : ['32', '33', '35', '42', '44', '46', '47', '52', '63', '70']
Maximum closeness nodes : ['107', '58', '428', '563', '1684', '171', '348', '483', '414', '376']
Maximum betweenness nodes : ['107', '1684', '3437', '1912', '1085', '0', '698', '567', '58', '428']


# Propagation d'une rumeur

In [112]:
def label_propagation(graph, labeled_nodes, max_iter=2):
    """
    Label propagation using the random walk method.

    Parameters
    ----------
    G : NetworkX graph
        The graph to run label propagation on.
    labeled_nodes : dict
        A dictionary containing the labels of some nodes. The keys are the node indices and the values are the labels.
    max_iter : int, optional (default=1000)
        The maximum number of iterations to run the algorithm.
    alpha : float, optional (default=0.1)
        The weight given to the original label during the propagation.

    Returns
    -------
    labels : dict
        A dictionary containing the labels of all nodes. The keys are the node indices and the values are the labels.
    """
    # Create a dictionary of node indices to their corresponding row indices in the transition matrix.
    node_to_row = {n: i for i, n in enumerate(graph.nodes())}

    # Create transition matrix
    adjacency_matrix = nx.to_numpy_array(graph)
    row_sums = adjacency_matrix.sum(axis=1)
    transition_matrix = adjacency_matrix / row_sums[:, np.newaxis]

    # Initialize the label matrix.
    Y = np.zeros((len(graph.nodes()), len(labeled_nodes)))
    for i, (node, label) in enumerate(labeled_nodes.items()):
        Y[node_to_row[node], i] = label

    # Propagate the labels using the transition matrix.
    for i in range(max_iter):
        Y_new = transition_matrix.dot(Y)
        if np.allclose(Y, Y_new):
            break
        Y = Y_new

    # Create a dictionary of node indices to their labels.
    labels = {node: np.argmax(Y[node_to_row[node]]) for node in graph.nodes()}

    return labels

In [113]:
node_0 = max_eigenvector_centrality_nodes[0]
node_1 = max_eigenvector_centrality_nodes[1]
labels = label_propagation(graph, {node_0: 0, node_1: 1})

In [114]:
np.sum(list(labels.values()))

766