In [3]:
import random
import networkx as nx
import numpy as np
import karateclub
from tkinter import _flatten
import community
import MGCF

In [5]:
import pandas as pd

### 1~4. Data Generation

In [1]:
sample_num = 2000

In [None]:
# 构造图节点
nodes_list = list(range(sample_num))
G1 = nx.Graph()
G1.add_nodes_from(nodes_list)

# 读取邻接矩阵adjacent
# 以工作日(周一)的早高峰时段为例
adj_modul = pd.read_csv('../../data/adj/adj_20151109_0002.csv')
adj_modul = adj_modul.iloc[:, 1:].values
adj_modul = adj_modul[:sample_num:, :sample_num:]
adj_modul = adj_modul + adj_modul.T
adj_modul = adj_modul - np.diag(adj_modul.diagonal())
for i in range(sample_num):
    if (adj_modul[i]==0).all():
        adj_modul[i, :] = 0.0001
        adj_modul[:, i] = 0.0001
    
D_12 = np.diag(np.power(np.sum(adj_modul, axis=0)+0.00001, -1/2))
adj_modul_norm = np.dot(np.dot(D_12, adj_modul), D_12)

for i in range(sample_num):
    for j in range(sample_num):
        tmp = adj_modul_norm[i, j]
        if tmp>0: G1.add_edge(i, j, weight=tmp)

In [None]:
adj_reach = pd.read_csv('../../data/adj_reachability/adj_reachability.csv')
adj_reach = adj_reach.iloc[::, 1::].values
adj_reach = adj_reach[:sample_num:, :sample_num:]
adj_reach = adj_reach - np.diag(adj_reach.diagonal())
for i in range(sample_num):
    if (adj_reach[i]==0).all():
        adj_reach[i, :] = 0.0001
        adj_reach[:, i] = 0.0001
    
G2 = nx.Graph()
G2.add_nodes_from(nodes_list)

D_12 = np.diag(np.power(np.sum(adj_reach, axis=0)+0.00001, -1/2))
adj_reach_norm = np.dot(np.dot(D_12, adj_reach), D_12)

for i in range(sample_num):
    for j in range(sample_num):
        tmp = adj_reach_norm[i, j]
        if tmp>0: G2.add_edge(i, j, weight=tmp)

In [8]:
feature = pd.read_csv('../../data/features/features_wf_new.csv')
feature = feature.iloc[:, 1::].values
feature = feature[:sample_num:, :]

### 5. Combine Multi-Graph

In [None]:
G = nx.Graph()
G.add_nodes_from(nodes_list)

adj_combine = adj_modul_norm + adj_reach_norm

D_12 = np.diag(np.power(np.sum(adj_combine, axis=0)+0.00001, -1/2))
adj_combine_norm = np.dot(np.dot(D_12, adj_combine), D_12)

for i in range(sample_num):
    for j in range(sample_num):
        tmp = adj_combine_norm[i, j]
        if tmp>0: G2.add_edge(i, j, weight=tmp)

### 6. for Algorithm Process & Evaluation

In [None]:
# 1. DANMF

model1 = karateclub.DANMF()
model1.fit(G)
cluster_membership = model1.get_memberships()
clusters = cluster_membership
labels_pred = list(_flatten(list(clusters.values())))
partition = dict(zip(list(range(len(labels_pred))), labels_pred))
partition_2 = {}
for i in range(len(labels_pred)):
    if labels_pred[i] not in partition_2:
        partition_2[labels_pred[i]] = [i]
    else:
        partition_2[labels_pred[i]].append(i)
        
cn = len(partition_2)
U = np.zeros((cn, N))
for k, v in partition.items():
    U[v, k] = 1
    
print(round(community.modularity(partition, G, weight='weight'), 3))
print(round(np.sqrt(np.sum(np.square(np.dot(np.dot(U, X), beta) / np.sum(np.dot(np.dot(U, X), beta))  - np.ones(cn)/cn))), 3))

In [None]:
# 2. NNSED

model1 = karateclub.NNSED(dimensions=K)
model1.fit(G)
cluster_membership = model1.get_memberships()
clusters = cluster_membership
labels_pred = list(_flatten(list(clusters.values())))
partition = dict(zip(list(range(len(labels_pred))), labels_pred))
partition_2 = {}
for i in range(len(labels_pred)):
    if labels_pred[i] not in partition_2:
        partition_2[labels_pred[i]] = [i]
    else:
        partition_2[labels_pred[i]].append(i)
        
cn = len(partition_2)
U = np.zeros((cn, N))
for k, v in partition.items():
    U[v, k] = 1
    
print(round(community.modularity(partition, G, weight='weight'), 3))
print(round(np.sqrt(np.sum(np.square(np.dot(np.dot(U, X), beta) / np.sum(np.dot(np.dot(U, X), beta))  - np.ones(cn)/cn))), 3))

In [None]:
# 3. MNMF

model1 = karateclub.MNMF(clusters=K)
model1.fit(G)
cluster_membership = model1.get_memberships()
clusters = cluster_membership
labels_pred = list(_flatten(list(clusters.values())))

remap = list(set(labels_pred))
for i in range(len(labels_pred)):
    labels_pred[i] = remap.index(labels_pred[i])

partition = dict(zip(list(range(len(labels_pred))), labels_pred))
partition_2 = {}
for i in range(len(labels_pred)):
    if labels_pred[i] not in partition_2:
        partition_2[labels_pred[i]] = [i]
    else:
        partition_2[labels_pred[i]].append(i)
        
cn = len(partition_2)
U = np.zeros((cn, N))
for k, v in partition.items():
    U[v, k] = 1
    
print(round(community.modularity(partition, G, weight='weight'), 3))
print(round(np.sqrt(np.sum(np.square(np.dot(np.dot(U, X), beta) / np.sum(np.dot(np.dot(U, X), beta))  - np.ones(cn)/cn))), 3))

In [None]:
# 4. BigClam

model1 = karateclub.BigClam(dimensions=K)
model1.fit(G)
cluster_membership = model1.get_memberships()
clusters = cluster_membership
labels_pred = list(_flatten(list(clusters.values())))

remap = list(set(labels_pred))
for i in range(len(labels_pred)):
    labels_pred[i] = remap.index(labels_pred[i])

partition = dict(zip(list(range(len(labels_pred))), labels_pred))
partition_2 = {}
for i in range(len(labels_pred)):
    if labels_pred[i] not in partition_2:
        partition_2[labels_pred[i]] = [i]
    else:
        partition_2[labels_pred[i]].append(i)
        
cn = len(partition_2)
U = np.zeros((cn, N))
for k, v in partition.items():
    U[v, k] = 1
    
print(round(community.modularity(partition, G, weight='weight'), 3))
print(round(np.sqrt(np.sum(np.square(np.dot(np.dot(U, X), beta) / np.sum(np.dot(np.dot(U, X), beta))  - np.ones(cn)/cn))), 3))

In [None]:
# 5. SNMF

model1 = karateclub.SymmNMF(dimensions=K)
model1.fit(G)
cluster_membership = model1.get_memberships()
clusters = cluster_membership
labels_pred = list(_flatten(list(clusters.values())))

remap = list(set(labels_pred))
for i in range(len(labels_pred)):
    labels_pred[i] = remap.index(labels_pred[i])

partition = dict(zip(list(range(len(labels_pred))), labels_pred))
partition_2 = {}
for i in range(len(labels_pred)):
    if labels_pred[i] not in partition_2:
        partition_2[labels_pred[i]] = [i]
    else:
        partition_2[labels_pred[i]].append(i)
        
cn = len(partition_2)
U = np.zeros((cn, N))
for k, v in partition.items():
    U[v, k] = 1
    
print(round(community.modularity(partition, G, weight='weight'), 3))
print(round(np.sqrt(np.sum(np.square(np.dot(np.dot(U, X), beta) / np.sum(np.dot(np.dot(U, X), beta))  - np.ones(cn)/cn))), 3))

In [None]:
# 6. GEMSEC

model1 = karateclub.GEMSEC(dimensions=K)
model1.fit(G)
cluster_membership = model1.get_memberships()
clusters = cluster_membership
labels_pred = list(_flatten(list(clusters.values())))

remap = list(set(labels_pred))
for i in range(len(labels_pred)):
    labels_pred[i] = remap.index(labels_pred[i])

partition = dict(zip(list(range(len(labels_pred))), labels_pred))
partition_2 = {}
for i in range(len(labels_pred)):
    if labels_pred[i] not in partition_2:
        partition_2[labels_pred[i]] = [i]
    else:
        partition_2[labels_pred[i]].append(i)
        
cn = len(partition_2)
U = np.zeros((cn, N))
for k, v in partition.items():
    U[v, k] = 1
    
print(round(community.modularity(partition, G, weight='weight'), 3))
print(round(np.sqrt(np.sum(np.square(np.dot(np.dot(U, X), beta) / np.sum(np.dot(np.dot(U, X), beta))  - np.ones(cn)/cn))), 3))

In [None]:
# 7. SCD

model1 = karateclub.SCD()
model1.fit(G)
cluster_membership = model1.get_memberships()
clusters = cluster_membership
labels_pred = list(_flatten(list(clusters.values())))

remap = list(set(labels_pred))
for i in range(len(labels_pred)):
    labels_pred[i] = remap.index(labels_pred[i])

partition = dict(zip(list(range(len(labels_pred))), labels_pred))
partition_2 = {}
for i in range(len(labels_pred)):
    if labels_pred[i] not in partition_2:
        partition_2[labels_pred[i]] = [i]
    else:
        partition_2[labels_pred[i]].append(i)
        
cn = len(partition_2)
U = np.zeros((cn, N))
for k, v in partition.items():
    U[v, k] = 1
    
print(round(community.modularity(partition, G, weight='weight'), 3))
print(round(np.sqrt(np.sum(np.square(np.dot(np.dot(U, X), beta) / np.sum(np.dot(np.dot(U, X), beta))  - np.ones(cn)/cn))), 3))

In [None]:
# 8. LPA

model1 = karateclub.LabelPropagation(seed=10, iterations=100)
model1.fit(G)
cluster_membership = model1.get_memberships()
clusters = cluster_membership
labels_pred = list(_flatten(list(clusters.values())))

remap = list(set(labels_pred))
for i in range(len(labels_pred)):
    labels_pred[i] = remap.index(labels_pred[i])

partition = dict(zip(list(range(len(labels_pred))), labels_pred))
partition_2 = {}
for i in range(len(labels_pred)):
    if labels_pred[i] not in partition_2:
        partition_2[labels_pred[i]] = [i]
    else:
        partition_2[labels_pred[i]].append(i)
        
cn = len(partition_2)
U = np.zeros((cn, N))
for k, v in partition.items():
    U[v, k] = 1
    
print(round(community.modularity(partition, G, weight='weight'), 3))
print(round(np.sqrt(np.sum(np.square(np.dot(np.dot(U, X), beta) / np.sum(np.dot(np.dot(U, X), beta))  - np.ones(cn)/cn))), 3))

In [None]:
# 9. Louvain

partition = community.best_partition(G, weight='weight')
labels_pred = partition
partition_2 = {}
for i in range(len(labels_pred)):
    if labels_pred[i] not in partition_2:
        partition_2[labels_pred[i]] = [i]
    else:
        partition_2[labels_pred[i]].append(i)
        
cn = len(partition_2)
U = np.zeros((cn, N))
for k, v in partition.items():
    U[v, k] = 1
    
print(round(community.modularity(partition, G, weight='weight'), 3))
print(round(np.sqrt(np.sum(np.square(np.dot(np.dot(U, X), beta) / np.sum(np.dot(np.dot(U, X), beta))  - np.ones(cn)/cn))), 3))

In [None]:
# Ours
partition = MGCF.main(clusters_number=K, feature=X, multi_graph=[G1, G2], alpha_1=1, alpha_2=0.1, alpha_3=0.01, weight_list=[1], alpha=2, epoch_num=700)

In [None]:
labels_pred = partition
partition_2 = {}
for i in range(len(labels_pred)):
    if labels_pred[i] not in partition_2:
        partition_2[labels_pred[i]] = [i]
    else:
        partition_2[labels_pred[i]].append(i)
        
cn = len(partition_2)
U = np.zeros((cn, N))
for k, v in partition.items():
    U[v, k] = 1
    
print(round(community.modularity(partition, G, weight='weight'), 3))
print(round(np.sqrt(np.sum(np.square(np.dot(np.dot(U, X), beta) / np.sum(np.dot(np.dot(U, X), beta))  - np.ones(cn)/cn))), 3))