In [1]:
import numpy as np
import scipy as sp
import functools

In [2]:
nodes = []  
nodesnum = 0
nodes_index = {}  
damp = 0.8  
trans_matrix = np.matrix(0)  
sim_matrix = np.matrix(0)  
 
 
def initParam(graphFile):
   
    global nodes
    global nodes_index
    global trans_matrix
    global sim_matrix
    global damp
    global nodesnum
 
    link_in = {}
    for line in open(graphFile, "r", 1024):
        arr = line.strip("\n").split()
        node = arr[0]
        nodeid = -1
        if node in nodes_index:
            nodeid = nodes_index[node]
        else:
            nodeid = len(nodes)
            nodes_index[node] = nodeid
            nodes.append(node)
        for ele in arr[1:]:
            outneighbor = ele
            outneighborid = -1
            if outneighbor in nodes_index:
                outneighborid = nodes_index[outneighbor]
            else:
                outneighborid = len(nodes)
                nodes_index[outneighbor] = outneighborid
                nodes.append(outneighbor)
            inneighbors = []
            if outneighborid in link_in:
                inneighbors = link_in[outneighborid]
            inneighbors.append(nodeid)
            link_in[outneighborid] = inneighbors
 
    nodesnum = len(nodes)
    trans_matrix = np.zeros((nodesnum, nodesnum))
    for node, inneighbors in link_in.items():
        num = len(inneighbors)
        prob = 1.0 / num
        for neighbor in inneighbors:
            trans_matrix[neighbor, node] = prob
 
    sim_matrix = np.identity(nodesnum) * (1 - damp)
 
 
def iterate():

    global trans_matrix
    global sim_matrix
    global damp
    global nodesnum
 
    sim_matrix = damp * np.dot(np.dot(trans_matrix.transpose(),
                                      sim_matrix), trans_matrix) + (1 - damp) * np.identity(nodesnum)
 
 
def printResult(sim_node_file):
    
    global sim_matrix
    global link_out
    global link_in
    global nodes
    global nodesnum
    
    f_out_user = open(sim_node_file, "w")
    for i in range(nodesnum):
#         f_out_user.write(nodes[i] + "\t")
        neighbour = []
        for j in range(nodesnum):
            if i != j:
                sim = sim_matrix[i, j]
                if sim == None:
                    sim = 0
                if sim > 0:
                    neighbour.append((i, j, sim))
        for (v , u, sim) in neighbour:
            f_out_user.write(nodes[v] + ","+ nodes[u] + "," + str(sim) + "\t")
            f_out_user.write("\n")
    f_out_user.close()
 
 
def simrank(graphFile, maxIteration):
    global nodes_index
    global trans_matrix
    global sim_matrix
 
    initParam(graphFile)
    print( "nodes:")
    print(nodes_index)
    print("trans ratio:")
    print(trans_matrix)
    for i in range(maxIteration):
        print("iteration %d:" % (i + 1))
        iterate()
        print(sim_matrix)


In [3]:
if __name__ == '__main__':
    graphFile = "/Users/ninashao/Downloads/data_sample/MS_sample/relationship_total_update.txt"
    sim_node_file = "/Users/ninashao/Downloads/data_sample/MS_sample/nodesim_naive.txt"
    maxIteration = 5
    simrank(graphFile, maxIteration)
    printResult(sim_node_file)

nodes:
{'18722': 1343, '16044': 1026, '19202': 1397, '1178': 298, '17715': 1237, '36494': 3624, '32922': 3189, '37082': 3687, '1094': 97, '1096': 99, '23877': 2001, '1152': 353, '16934': 1142, '27096': 2429, '29863': 2808, '13643': 709, '24384': 2057, '1164': 248, '18375': 1301, '1044': 179, '36443': 3616, '16379': 1075, '26602': 2364, '36445': 3617, '10021': 19, '22367': 1804, '1212': 534, '28284': 2594, '35234': 3476, '32652': 3160, '1027': 35, '35320': 3481, '16976': 1149, '26196': 2301, '1215': 751, '42501': 4392, '25770': 2240, '30746': 2932, '25070': 2144, '22343': 1801, '27797': 2525, '30415': 2892, '25406': 2183, '16239': 1051, '25905': 2259, '17255': 1178, '24653': 2088, '31445': 2994, '12863': 600, '29049': 2692, '1055': 105, '25531': 2205, '42523': 4395, '32252': 3109, '29901': 2813, '18445': 1315, '11904': 462, '42445': 4385, '26420': 2338, '32462': 3132, '20644': 1584, '24613': 2081, '11544': 402, '13135': 633, '21725': 1715, '36050': 3560, '17388': 1194, '23630': 1970, '2

[[  2.14545455e-01   0.00000000e+00   0.00000000e+00 ...,   9.69696970e-03
    2.42424242e-03   2.42424242e-03]
 [  0.00000000e+00   2.00567376e-01   2.57248513e-05 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 [  0.00000000e+00   2.57248513e-05   2.00403023e-01 ...,   0.00000000e+00
    0.00000000e+00   0.00000000e+00]
 ..., 
 [  9.69696970e-03   0.00000000e+00   0.00000000e+00 ...,   2.26666667e-01
    4.44444444e-03   4.44444444e-03]
 [  2.42424242e-03   0.00000000e+00   0.00000000e+00 ...,   4.44444444e-03
    2.26666667e-01   2.66666667e-02]
 [  2.42424242e-03   0.00000000e+00   0.00000000e+00 ...,   4.44444444e-03
    2.66666667e-02   2.26666667e-01]]
iteration 2:
[[ 0.21470479  0.          0.         ...,  0.00973384  0.00244713
   0.00244713]
 [ 0.          0.20471143  0.00408524 ...,  0.          0.          0.        ]
 [ 0.          0.00408524  0.20444465 ...,  0.          0.          0.        ]
 ..., 
 [ 0.00973384  0.          0.         ...,  0.22672014  0.