In [0]:
import pandas as pd
import os

In [4]:
datapath = 'gdrive/My Drive/data/'
data_folder = 'rdyn_data_6/results/'
output_folder = datapath+'facetnet/'
algorithm_name = 'facetnet'
!pip install nf1


Collecting nf1
Installing collected packages: nf1
Successfully installed nf1-0.0.3


In [0]:
#coding=utf-8
import networkx as nx
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
#from util import *
from sklearn.metrics import mutual_info_score

def param_update(X, A, Y, W, alpha):
    W_apprx = X * A * X.T
    N, M = Y.shape
    X_new, A_new = np.zeros(X.shape), np.zeros(A.shape)
    for k in range(M):
        for i in range(N):
            for j in range(N):
                X_new[i, k] += W[i, j] * A[k, k] * X[j, k] / W_apprx[i, j]
                A_new[k, k] += W[i, j] * X[i, k] * X[j, k] / W_apprx[i, j]
            X_new[i, k] *= (2 * alpha * X[i, k])
            A_new[k, k] *= (alpha * A[k, k])
            X_new[i, k] += (1 - alpha) * Y[i, k]
            A_new[k, k] += (1 - alpha) * Y[i, k]
    X_new, A_new = np.matrix(X_new / np.sum(X_new, axis=0).reshape(1, M)), np.matrix(A / np.sum(A_new))
    Y = X_new * A_new
    return X_new, A_new, Y

def read_edgelist(filename,weighted = False):
    idmap = set()
    edge_cache = {}
    with open(filename) as f:
        for line in f:
            if weighted:
                u,v,w = [int(x) for x in line.strip().split()]
            else:
                tmp = [int(x) for x in line.strip().split()]
                u,v,w  = tmp[0],tmp[1],1.0
            edge_cache[(u,v)] = w
            idmap.add(u)
            idmap.add(v)
    idmap = list(idmap)                                   # 数组下标与结点唯一id标识的映射
    idmap_inv = {nid: i for i,nid in enumerate(idmap)}   # 结点唯一id标识与数组下标的映射
    N = len(idmap)
    adj_mat = np.zeros((N,N))
    for (u,v),w in edge_cache.items():
        adj_mat[idmap_inv[u],idmap_inv[v]] = w
    adj_mat += adj_mat.T
    return idmap, idmap_inv, adj_mat

def alg(net_path, alpha,tsteps,N,M,with_truth=True):        # FacetNet with # of nodes and communities fixed
    X, A = np.random.rand(N, M), np.diag(np.random.rand(M))
    X, A = np.matrix(X / np.sum(X, axis=0).reshape(1, M)), np.matrix(A / np.sum(A))
    Y = X * A
    for t in range(tsteps):
        # G = nx.read_weighted_edgelist(net_path+"%d.edgelist" % t)
        # idmap, mapping: nodeid → array_id
        idmap, idmap_inv, adj_mat = read_edgelist(net_path + "%d.edgelist" % t, weighted=False)
        if with_truth:
            with open(net_path+"%d.comm" % t) as f:
                comm_map = {}                       # mapping: nodeid → its community
                for line in f:
                    id0, comm0 = line.strip().split()
                    comm_map[int(id0)] = int(comm0)
        W = Sim(adj_mat, weighted=False)
        X_new, A_new, Y = param_update(X, A, Y, W, alpha)
        D = np.zeros((N,))
        for i in range(N):
            D[i] = np.sum(Y[i, :])
        D = np.matrix(np.diag(D))
        soft_comm = D.I * X_new * A_new
        comm_pred = np.array(np.argmax(soft_comm, axis=1)).ravel()
        print("time:", t)
        if with_truth:
            comm = np.array([comm_map[idmap_inv[i]] for i in range(N)])
            print("mutual_info:", mutual_info_score(comm, comm_pred))
        print("soft_modularity:", soft_modularity(soft_comm, W))
        # community_net = A_new * X_new.T * soft_comm
        # print("community_net")
        # print(community_net)
        # evolution_net = X.T * soft_comm
        # print("evolution_net")
        # print(evolution_net)
        X, A = X_new, A_new

# do experiment with network stated in 4.1.2
def exp1():
    tsteps = 15
    from synthetic import generate_evolution
    print("generating synthetic graph")
    generate_evolution("./data/syntetic1/", tsteps=tsteps)
    print("start the algorithm")
    alpha = 0.9
    N, M = 128, 4
    np.random.seed(0)
    alg("./data/syntetic1/",alpha,tsteps,N,M)

# FacetNet with # of nodes and communities changed
def alg_extended(net_path, alpha,tsteps,M,with_truth=True):
    idmap0,idmap_inv0 = [],{}
    for t in range(tsteps):
        print("time:", t)
        idmap, idmap_inv, adj_mat = read_edgelist(net_path+"%d.edgelist" % t, weighted=False)
        if with_truth:
            with open(net_path+"%d.comm" % t) as f:
                comm_map = {}
                for line in f:
                    id0, comm0 = line.strip().split()
                    comm_map[int(id0)] = int(comm0)
        N = len(idmap)
        W = Sim(adj_mat, weighted=False)
        if t == 0:
            X, A = np.random.rand(N, M), np.diag(np.random.rand(M))
            X, A = np.matrix(X / np.sum(X, axis=0).reshape(1, M)), np.matrix(A / np.sum(A))
            Y = X * A
        else:                    # adjustment for changing of nodes
            reserved_rows = [idmap_inv0[x] for x in idmap0 if x in idmap]
            num_new,num_old = len(set(idmap) - set(idmap0)),len(reserved_rows)
            Y = Y[reserved_rows,:]
            Y /= np.sum(Y)
            Y = np.pad(Y,((0,num_new),(0,0)),mode='constant',constant_values=(0,0))
            # not mentioned on the paper, but are necessary for node changing
            X = X[reserved_rows,:]
            X = np.matrix(X / np.sum(X, axis=0).reshape(1, M))
            X *= num_old/(num_old+num_new)
            if num_new != 0:
                X = np.pad(X, ((0, num_new), (0, 0)), mode='constant', constant_values=(1/num_new, 1/num_new))
            print ("this is x: ", X)
            #else:
                #X = np.pad(X, ((0, num_new), (0, 0)), mode='constant', constant_values=(1/num_new, 1/num_new))
        X_new, A_new, Y = param_update(X, A, Y, W, alpha)
        print ("xnew: ", X_new)
        D = np.zeros((N,))
        for i in range(N):
            D[i] = np.sum(Y[i, :])
        D = np.matrix(np.diag(D))
        soft_comm = D.I * X_new * A_new

        comm_pred = np.array(np.argmax(soft_comm, axis=1)).ravel()
        if with_truth:
            comm = np.array([comm_map[idmap[i]] for i in range(N)])
            print("mutual_info:", mutual_info_score(comm, comm_pred))
        s_modu = soft_modularity(soft_comm, W)
        print("soft_modularity: %f" % s_modu)
        #community_net = A_new * X_new.T * soft_comm
        #print("community_net")
        #print(community_net)
        # evolution_net = X.T * soft_comm
        # print("evolution_net")
        # print(evolution_net)
        X, A = X_new, A_new
        idmap0, idmap_inv0 = idmap, idmap_inv

# do experiment with adding and removing nodes
def exp2():
    tsteps = 15
    from synthetic import generate_evolution2
    print("generating synthetic graph")
    generate_evolution2("./data/syntetic2/", tsteps=tsteps)
    print("start the algorithm")
    alpha = 0.5
    np.random.seed(0)
    alg_extended("./data/syntetic2/",alpha,tsteps,4, with_truth = False)




In [0]:
import numpy as np
import networkx as nx

def Sim(adj_mat, weighted,gamma=0.2):
    N = adj_mat.shape[0]
    sim = np.matrix(np.eye(N,N))
    if weighted:                           # as stated in Section 4.2
        sim = np.exp(-1.0/(gamma*adj_mat))
    else:
        # similarity measure for unweighted graph is not mentioned in the paper, below is my assumption, which seems to work.
        sim = adj_mat/2.0                   # if linked, sim=0.5
    for i in range(N):
        sim[i,i] = 1.0
    sim /= np.sum(sim)
    return sim

def KL(A,B):
    ret = 0.0
    for a in np.ravel(A):
        for b in np.ravel(B):
            ret += a*np.log(a/b) - a + b
    return ret

def soft_modularity(soft_comm,W):
    N = W.shape[0]
    ret = np.trace(soft_comm.T*W*soft_comm)
    one = np.matrix(np.ones((N,1)))
    ret -= np.array(one.T*W.T*soft_comm*soft_comm.T*W*one).squeeze()
    return ret

In [20]:
print(os.getcwd())
tsteps = 100
alpha = 0.5
alg_extended('gdrive/My Drive/data/facetnet/',alpha,tsteps,4, with_truth = False)


/content
('time:', 0)
('xnew: ', matrix([[8.51674756e-04, 4.49288859e-04, 5.86733181e-04, 5.01225147e-05],
        [7.82834634e-05, 7.04346042e-04, 1.36419085e-04, 6.83685551e-04],
        [3.79975816e-04, 2.71252274e-04, 2.14411360e-04, 8.37665088e-04],
        ...,
        [2.23863278e-04, 8.02358096e-04, 5.67867780e-04, 1.19560244e-04],
        [1.00059500e-03, 1.58882630e-04, 5.02945269e-04, 5.61273827e-04],
        [3.48892829e-04, 6.68619391e-05, 6.98231762e-04, 3.81993141e-04]]))
soft_modularity: 0.067920
('time:', 1)
('this is x: ', matrix([[0.20030289, 0.04078083, 0.04303731, 0.04877106],
        [0.15654533, 0.08289088, 0.21471058, 0.09041502],
        [0.13911255, 0.20825436, 0.0022954 , 0.09439209],
        [0.10689196, 0.1334898 , 0.018195  , 0.0676523 ],
        [0.04302193, 0.12065349, 0.12652033, 0.02804483],
        [0.01550724, 0.02602588, 0.09510936, 0.25594125],
        [0.06357428, 0.11393334, 0.12708345, 0.07394813],
        [0.03134152, 0.03387248, 0.15183627, 0.

  app.launch_new_instance()


ValueError: ignored

In [0]:
data = pd.read_csv('gdrive/My Drive/data/rdyn_data_6/results/2000_100_15_0.6_0.8_0.2_1_2.5_0/interactions.txt', sep="\t", header=None)
data.columns = ["iteration", "timestamp", "action", "node_id1", "node_id2"]
data['node_id1'] = data['node_id1'].astype(str)
data["placeholder"] = " "
data['node_id2'] = data['node_id2'].astype(str)
data['edge'] = data[['node_id1','placeholder', 'node_id2']].apply(lambda x: ''.join(x), axis=1)
data['edge2'] = data[['node_id2','placeholder', 'node_id1']].apply(lambda x: ''.join(x), axis=1)

count = 0
res = []
changes = {}
for el in range(len(data["edge"])):
    #print (data["iteration"][el], data["edge"][el], data["edge2"][el])


    if data["iteration"][el] not in changes.keys():
        changes[data["iteration"][el]] = []
        changes[data["iteration"][el]].append("{0}".format(data["edge"][el]))
    else:
        changes[data["iteration"][el]].append("{0}".format(data["edge"][el]))

#os.mkdir(datapath + 'facetnet/')#)+rdyn_instance)

for k in changes.keys():
    print(k, changes[k])
    
    with open(datapath + 'facetnet/'+ "/{0}.edgelist".format(k), 'w') as f:
        for ed in changes[k]:
            f.write("{0}\n".format(ed))

