In [1]:
import pandas as pd
import networkx as nx
import random
import plotly.graph_objects as go
from collections import Counter
import numpy as np

In [2]:
campaigns = pd.read_csv('data/campaigns.csv', sep=';', header=None)
campaigns.columns = ['campaign_id', 'timestamp', 'sender_id', 'receiver_id']
friends = pd.read_csv('data/friends.csv', sep=';', header=None)
friends.columns = ['timestamp', 'sender_id', 'receiver_id']
messages = pd.read_csv('data/messages.csv', sep=';', header=None)
messages.columns = ['timestamp', 'sender_id', 'receiver_id']
transactions = pd.read_csv('data/transactions.csv', sep=';', header=None)
transactions.columns = ['timestamp', 'sender_id', 'receiver_id', 'amount']
visits = pd.read_csv('data/visits.csv', sep=';', header=None)
visits.columns = ['timestamp', 'sender_id', 'receiver_id']

In [3]:
file_paths = {
    "friends": "./data/friends.csv",
    "messages": "./data/messages.csv",
    "transactions": "./data/transactions.csv",
    "visits": "./data/visits.csv",
    "campaigns": "./data/campaigns.csv"
}

THRESHOLD = 1
SAMPLE_SIZE = 10000
BC_K = 1000

In [4]:
def extract_edges(path, directed=True):
    df = pd.read_csv(path, sep=';', header=None)
    if 'campaigns' in path:
        df.columns = ['campaign_id', 'timestamp', 'sender_id', 'receiver_id']
        edges = list(zip(df['sender_id'], df['receiver_id']))
    elif 'friends' in path:
        df.columns = ['timestamp', 'sender_id', 'receiver_id']
        edges = list(zip(df['sender_id'], df['receiver_id']))
        if not directed:
            edges += [(b, a) for (a, b) in edges]
    elif 'messages' in path or 'visits' in path:
        df.columns = ['timestamp', 'sender_id', 'receiver_id']
        edges = list(zip(df['sender_id'], df['receiver_id']))
    elif 'transactions' in path:
        df.columns = ['timestamp', 'sender_id', 'receiver_id', 'amount']
        edges = list(zip(df['sender_id'], df['receiver_id']))
    else:
        edges = []
    
    return edges

In [5]:
edge_counter = Counter()

for layer, path in file_paths.items():
    directed = (layer != "friends")
    edges = extract_edges(path, directed)
    edge_counter.update(edges)

filtered_edges = [edge for edge, freq in edge_counter.items() if freq >= THRESHOLD]

In [6]:
G = nx.Graph()
G.add_edges_from(filtered_edges)

sampled_nodes = random.sample(list(G.nodes()), min(SAMPLE_SIZE, len(G.nodes())))
G_sampled = G.subgraph(sampled_nodes).copy()

In [7]:
bc = nx.betweenness_centrality(G_sampled, k=BC_K, normalized=True)

In [8]:
top_100 = sorted(bc.items(), key=lambda x: x[1], reverse=True)[:100]
top_100_nodes = [node for node, _ in top_100]
df_top100 = pd.DataFrame(top_100, columns=["UserID", "BetweennessCentrality"])
df_top100.to_csv("./table/top100_inter_between.csv", index=False)

In [49]:
top_100_nodes = sorted(bc.items(), key=lambda x: x[1], reverse=True)[:100]
nodes_to_plot = [node for node, _ in top_100_nodes]

centrality_values = np.array([bc[node] for node in nodes_to_plot])
centrality_values_log = np.log10(centrality_values + 1e-5)
colors = (centrality_values_log**2 - (centrality_values_log**2).min()) / (
    (centrality_values_log**2).ptp() + 1e-9
)

pos = nx.spring_layout(G_sampled, dim=3, k=0.3, iterations=100, seed=42)

In [None]:
node_x, node_y, node_z, hover_text = [], [], [], []
for node in nodes_to_plot:
    x, y, z = pos[node]
    node_x.append(x)
    node_y.append(y)
    node_z.append(z)
    hover_text.append(f"User ID: {node}<br>Centrality: {bc[node]:.4f}")

top_100_edges = [
    (u, v) for (u, v) in G_sampled.edges()
    if u in top_100_set and v in top_100_set
]
sampled_edges = random.sample(top_100_edges, min(500, len(top_100_edges)))
edge_x, edge_y, edge_z = [], [], []
top_100_set = set(nodes_to_plot)
for u, v in G_sampled.edges():
    if u in top_100_set and v in top_100_set:
        x0, y0, z0 = pos[u]
        x1, y1, z1 = pos[v]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]
        edge_z += [z0, z1, None]

In [63]:
fig = go.Figure()

fig.add_trace(go.Scatter3d(
    x=edge_x, y=edge_y, z=edge_z,
    mode='lines',
    line=dict(color='lightgray', width=1),
    hoverinfo='none'
))

fig.add_trace(go.Scatter3d(
    x=node_x, y=node_y, z=node_z,
    mode='markers',
    marker=dict(
        size=5,
        color=colors,
        colorscale='Viridis',
        colorbar=dict(title='Betweenness'),
        opacity=0.9
    ),
    hovertext=hover_text,
    hoverinfo='text'
))

fig.update_layout(
    title="Top 100 Betweenness Centrality Nodes in Fused Multilayer Network",
    margin=dict(l=0, r=0, b=0, t=30)
)

In [57]:
fig.write_html("./html/top100_network_plot.html")

In [78]:
files = {
    "Betweenness Centrality": "./table/top100_multilayer_between.csv",
    "Degree Centrality": "./table/top100_multi_degree.csv",
    "Eigenvector Centrality": "./table/top100_multilayer_eigen.csv"
}

for title, filepath in files.items():
    df = pd.read_csv(filepath, header=0)
    df.columns = ['UserID', 'Centrality']
    df['Centrality'] = df['Centrality'].astype(float)
    node_ids = df['UserID'].astype(str).tolist()
    centrality_values = df['Centrality'].values

    G = nx.Graph()
    G.add_nodes_from(node_ids)

    all_possible_edges = [(u, v) for i, u in enumerate(node_ids) for v in node_ids[i+1:]]
    sampled_edges = random.sample(all_possible_edges, min(500, len(all_possible_edges)))
    G.add_edges_from(sampled_edges)

    centrality_log = np.log10(centrality_values + 1e-5)
    colors = (centrality_log**2 - (centrality_log**2).min()) / ((centrality_log**2).ptp() + 1e-9)

    pos = nx.spring_layout(G, dim=3, k=0.3, iterations=100, seed=42)

    node_x, node_y, node_z, hover_text = [], [], [], []
    for node, cent in zip(node_ids, centrality_values):
        x, y, z = pos[node]
        node_x.append(x)
        node_y.append(y)
        node_z.append(z)
        hover_text.append(f"User ID: {node}<br>{title}: {cent:.4f}")

    edge_x, edge_y, edge_z = [], [], []
    for u, v in sampled_edges:
        x0, y0, z0 = pos[u]
        x1, y1, z1 = pos[v]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]
        edge_z += [z0, z1, None]

    fig = go.Figure()

    fig.add_trace(go.Scatter3d(
        x=edge_x, y=edge_y, z=edge_z,
        mode='lines',
        line=dict(color='lightgray', width=1),
        hoverinfo='none'
    ))

    fig.add_trace(go.Scatter3d(
        x=node_x, y=node_y, z=node_z,
        mode='markers',
        marker=dict(
            size=5,
            color=colors,
            colorscale='Viridis',
            colorbar=dict(title=title),
            opacity=0.9
        ),
        hovertext=hover_text,
        hoverinfo='text'
    ))

    fig.update_layout(
        title=f"Top 100 Nodes by {title} with 500 Sampled Edges",
        margin=dict(l=0, r=0, b=0, t=30)
    )

    output_file = f"./html/top100_{title.replace(' ', '_').lower()}.html"
    fig.write_html(output_file)
    print(f"{title} Saved as：{output_file}")


Betweenness Centrality Saved as：./html/top100_betweenness_centrality.html
Degree Centrality Saved as：./html/top100_degree_centrality.html
Eigenvector Centrality Saved as：./html/top100_eigenvector_centrality.html


In [79]:
from IPython.display import IFrame
IFrame('./html/top100_betweenness_centrality.html', width=900, height=600)

In [80]:
IFrame('./html/top100_degree_centrality.html', width=900, height=600)

In [81]:
IFrame('./html/top100_eigenvector_centrality.html', width=900, height=600)