In [16]:
import networkx as nx
import numpy as np
import os
from joblib import Parallel, delayed

In [35]:
from sklearn.semi_supervised import LabelPropagation
from sklearn.neighbors import KNeighborsClassifier

In [4]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# Import Data

In [5]:
data_path = '../data/facebook/facebook_combined.txt'
graph = nx.read_edgelist(data_path)

print(f'Graph with {len(graph.nodes)} nodes and {len(graph.edges)} edges was correctly loaded')

Graph with 4039 nodes and 88234 edges was correctly loaded


# Network characteristics

In [6]:
print('Graph main characteristics :')

print(f'Nodes : {len(graph.nodes)}')
print(f'Edges : {len(graph.edges)}')
print(f'Diameter : {nx.diameter(graph)}')

Graph main characteristics :
Nodes : 4039
Edges : 88234
Diameter : 8


# Metrics

In [7]:
metrics_to_functions = {
    'degree': lambda graph : graph.degree, 
    'eigenvector_centrality': nx.eigenvector_centrality, 
    'page_rank': nx.pagerank,
    'clustering_coef': nx.clustering, 
    'closeness': nx.closeness_centrality, 
    'betweenness' : nx.betweenness_centrality
    }
## neighborhood_connectivity missing

def compute_centrality(metric, graph):
    print(f'Metric {metric} is being measured')
    return {metric: metrics_to_functions[metric](graph)}

def get_centralities(graph, metrics_to_functions):
    metric_list = Parallel(n_jobs=4)(delayed(compute_centrality)(metric, graph) for metric in metrics_to_functions.keys())
    metric_dict = {}
    for item in metric_list:
        key = list(item.keys())[0]
        metric_dict[key] = dict(item[key])
    return metric_dict

In [8]:
graph_metrics = get_centralities(graph, metrics_to_functions)

In [9]:
print(graph_metrics.keys())

dict_keys(['degree', 'eigenvector_centrality', 'page_rank', 'clustering_coef', 'closeness', 'betweenness'])


In [10]:
def get_n_maxima_for_metric(n, metric, graph_metrics) :
    temp = graph_metrics[metric].copy()
    max_keys = []
    for i in range(n):
        key = max(temp, key=temp.get)
        max_keys.append(key)
        temp[key] = 0
    return max_keys

max_degree_nodes = get_n_maxima_for_metric(10, "degree", graph_metrics)
max_eigenvector_centrality_nodes = get_n_maxima_for_metric(10, "eigenvector_centrality", graph_metrics)
max_page_rank_nodes = get_n_maxima_for_metric(10, "page_rank", graph_metrics)
max_clustering_coef_nodes = get_n_maxima_for_metric(10, "clustering_coef", graph_metrics)
max_closeness_nodes = get_n_maxima_for_metric(10, "closeness", graph_metrics)
max_betweenness_nodes = get_n_maxima_for_metric(10, "betweenness", graph_metrics)

print(f"Maximum degree nodes : {max_degree_nodes}")
print(f"Maximum eigenvector_centrality nodes : {max_eigenvector_centrality_nodes}")
print(f"Maximum page_rank nodes : {max_page_rank_nodes}")
print(f"Maximum clustering_coef nodes : {max_clustering_coef_nodes}")
print(f"Maximum closeness nodes : {max_closeness_nodes}")
print(f"Maximum betweenness nodes : {max_betweenness_nodes}")

Maximum degree nodes : ['107', '1684', '1912', '3437', '0', '2543', '2347', '1888', '1800', '1663']
Maximum eigenvector_centrality nodes : ['1912', '2266', '2206', '2233', '2464', '2142', '2218', '2078', '2123', '1993']
Maximum page_rank nodes : ['3437', '107', '1684', '0', '1912', '348', '686', '3980', '414', '698']
Maximum clustering_coef nodes : ['32', '33', '35', '42', '44', '46', '47', '52', '63', '70']
Maximum closeness nodes : ['107', '58', '428', '563', '1684', '171', '348', '483', '414', '376']
Maximum betweenness nodes : ['107', '1684', '3437', '1912', '1085', '0', '698', '567', '58', '428']


# Propagation d'une rumeur

## 1. Par random walk

In [11]:
def label_propagation_rw(graph, labeled_nodes, max_iter=100):
    """
    Label propagation using the random walk method.
    """
    # Create a dictionary of node indices to their corresponding row indices in the transition matrix.
    node_to_row = {n: i for i, n in enumerate(graph.nodes())}

    # Create transition matrix
    adjacency_matrix = nx.to_numpy_array(graph)
    row_sums = adjacency_matrix.sum(axis=1)
    transition_matrix = adjacency_matrix / row_sums[:, np.newaxis]

    # Initialize the label matrix and a mask
    Y = np.zeros((len(graph.nodes()), len(labeled_nodes)))
    mask = np.ones((len(graph.nodes()), len(labeled_nodes))) * False
    for label, nodes in labeled_nodes.items():
        for node in nodes:
            # One hot encode each label
            Y[node_to_row[node], label] = 1
            mask[node_to_row[node]] = True

    # Propagate the labels using the transition matrix.
    for i in range(max_iter):
        Y_new = transition_matrix.dot(Y)
        if np.allclose(Y, Y_new):
            break
        Y = Y_new * (1 - mask) + Y * (mask)

        # Create a dictionary of node indices to their labels.
        labels = {node: np.argmax(Y[node_to_row[node]]) for node in graph.nodes()}

        print(np.sum(list(labels.values())))

    return labels

In [12]:
labels = label_propagation_rw(
    graph, 
    {0: np.array(max_eigenvector_centrality_nodes)[0], 1: np.array(max_eigenvector_centrality_nodes)[1:]}
    )

263
765
1891
1837
2541
2659
2658
2650
2645
2645
2649
2650
2652
2660
2668
2677
2686
2694
2715
2737
2754
2777
2828
2873
2912
2926
2948
2987
3019
3037
3055
3094
3172
3196
3211
3226
3231
3235
3240
3247
3251
3252
3254
3255
3261
3265
3269
3270
3271
3272
3274
3276
3278
3286
3288
3295
3302
3314
3329
3348
3372
3391
3407
3436
3472
3581
3605
3607
3607
3608
3609
3610
3610
3612
3612
3614
3618
3628
3631
3632
3635
3639
3639
3640
3641
3641
3642
3664
3664
3664
3664
3664
3664
3664
3664
3664
3665
3665
3665
3665


In [13]:
np.sum(list(labels.values()))

3665

## 2. Par Supervised learning

In [57]:
def label_propagation_lp(graph, labeled_nodes, max_iter=1000):
    """
    Label propagation using a supervised learning approach.
    """
    # Create the feature matrix X and label vector y.
    X = np.array([graph.degree[node] for node in graph.nodes()]).reshape(-1, 1)
    y = np.array([labeled_nodes.get(node, -1) for node in graph.nodes()])

    # Propagate the labels using the label propagation algorithm.
    for i in range(max_iter):
        lp = LabelPropagation(kernel='knn', n_neighbors=5)
        lp.fit(X, y)
        y_new = lp.predict(X)
        if np.allclose(y, y_new):
            break
        y = y_new

    # Create a dictionary of node indices to their labels.
    labels = {node: lp.transduction_[i] for i, node in enumerate(graph.nodes())}

    return labels

In [53]:
def label_propagation_knn(graph, graph_metrics, labeled_nodes, max_iter=100):
    """
    Label propagation using a supervised learning approach.
    """
    # Create a dictionary of node indices to their corresponding row indices in the transition matrix.
    node_to_row = {n: i for i, n in enumerate(graph.nodes())}

    # Create the feature matrix X and label vector y and mask
    X = np.array([[graph_metrics[metric][node]  for metric in graph_metrics.keys()] for node in graph.nodes()])
    y = np.zeros((len(graph.nodes()), len(labeled_nodes)))
    mask_X = np.zeros((len(graph.nodes())), dtype=bool)
    mask_y = np.zeros((len(graph.nodes())), dtype=bool)
    for label, nodes in labeled_nodes.items():
        for node in nodes:
            y[node_to_row[node], label] = 1
            mask_X[node_to_row[node]] = True
            mask_y[node_to_row[node]] = True

    # First learning iteration is specific so we do it outside the loop
    model = KNeighborsClassifier(n_neighbors=5)
    model.fit(X[mask_X], y[mask_y])
    y = model.predict(X) * ~np.transpose(np.array([mask_y, mask_y])) + y * np.transpose(np.array([mask_y, mask_y]))

    # Propagate the labels using the label propagation algorithm.
    for i in range(1, max_iter):
        model = KNeighborsClassifier(n_neighbors=5)
        model.fit(X, y)
        y_new = model.predict(X)
        if np.allclose(y, y_new):
            break
        y = y_new * ~np.transpose(np.array([mask_y, mask_y])) + y * np.transpose(np.array([mask_y, mask_y]))

    # Create a dictionary of node indices to their labels.
    labels = {node: y[i] for i, node in enumerate(graph.nodes())}

    return labels

In [54]:
labels = label_propagation_knn(
    graph,
    graph_metrics, 
    {0: np.array(max_eigenvector_centrality_nodes)[[0, 9, 8, 7, 6]], 1: np.array(max_eigenvector_centrality_nodes)[[1, 2, 3, 4, 5]]}
    )

In [55]:
np.sum(list(labels.values()))

4039.0

In [56]:
print(labels)

{'0': array([0., 1.]), '1': array([1., 0.]), '2': array([1., 0.]), '3': array([1., 0.]), '4': array([1., 0.]), '5': array([1., 0.]), '6': array([1., 0.]), '7': array([1., 0.]), '8': array([1., 0.]), '9': array([1., 0.]), '10': array([1., 0.]), '11': array([1., 0.]), '12': array([1., 0.]), '13': array([1., 0.]), '14': array([1., 0.]), '15': array([1., 0.]), '16': array([1., 0.]), '17': array([1., 0.]), '18': array([1., 0.]), '19': array([1., 0.]), '20': array([1., 0.]), '21': array([1., 0.]), '22': array([1., 0.]), '23': array([1., 0.]), '24': array([1., 0.]), '25': array([1., 0.]), '26': array([1., 0.]), '27': array([1., 0.]), '28': array([1., 0.]), '29': array([1., 0.]), '30': array([1., 0.]), '31': array([1., 0.]), '32': array([1., 0.]), '33': array([1., 0.]), '34': array([1., 0.]), '35': array([1., 0.]), '36': array([1., 0.]), '37': array([1., 0.]), '38': array([1., 0.]), '39': array([1., 0.]), '40': array([1., 0.]), '41': array([1., 0.]), '42': array([1., 0.]), '43': array([1., 0.]