In [4]:
import pandas as pd
import numpy as np

In [5]:
import networkx as nx

In [6]:
import csv

In [7]:
def PageRank(M, alpha, root):
    """
    Personal Rank in matrix formation
    :param M: transfer probability matrix
    :param index2node: index2node dictionary
    :param node2index: node2index dictionary
    :return:type of list of tuple, ex.
    [(node1, prob1),(node2, prob2),...]
    """
    result = []
    n = len(M)
    v = np.zeros(n)
    v[node2index[root]] = 1
    while np.sum(abs(v - (alpha*np.matmul(M,v) + (1-alpha)*v))) > 0.0001:
        v = alpha * np.matmul(M, v) + (1-alpha)*v
    for ind, prob in enumerate(v):
        result.append((index2node[ind], prob))
    result = sorted(result, key=lambda x:x[1], reverse=True)[:num_candidates]
    return result


In [8]:
def Generate_Transfer_Matrix(G):
    """generate transfer matrix given graph"""
    index2node = dict()
    node2index = dict()
    for index,node in enumerate(G.keys()):
        node2index[node] = index
        index2node[index] = node
    # num of nodes
    n = len(node2index)
    # generate Transfer probability matrix M, shape of (n,n)
    M = np.zeros([n,n])
    for node1 in G.keys():
        for node2 in G[node1]:
            # FIXME: some nodes not in the Graphs.keys, may incur some errors
            try:
                M[node2index[node2],node2index[node1]] = 1/len(G[node1])
            except:
                continue
    return M, node2index, index2node



In [9]:
alpha = 0.85
root = '176626373'
num_iter = 2
num_candidates = 10

In [10]:
G = {'A' : 'a',
     'A' : 'c',
     'B' : 'a',
     'B': 'b', 
     'B': 'c', 
     'B': 'd',
     'C': 'c' , 
     'C': 'd' ,
     'a' : 'A' ,
     'a': 'B' ,
     'b' : 'B' ,
     'c' : 'A' ,
     'c': 'B', 
     'c': 'C',
     'd' : 'B' ,
     'd':'C'}

In [13]:
G0 = {'A' : {'a', 'c'},
     'B' : {'a', 'b', 'c', 'd'},
     'C' : {'c' : 1, 'd' : 1},
     'a' : {'A' : 1, 'B' : 1},
     'b' : {'B' : 1},
     'c' : {'A' : 1, 'B' : 1, 'C':1},
     'd' : {'B' : 1, 'C' : 1}}

In [15]:
G0['A']

{'a': 1, 'c': 1}

In [11]:
print(G)

{'A': 'c', 'B': 'd', 'C': 'd', 'a': 'B', 'b': 'B', 'c': 'C', 'd': 'C'}


In [57]:
with open('simple.csv', mode='r') as infile:
    reader = csv.reader(infile)
    with open('simple_new.csv', mode='w') as outfile:
        writer = csv.writer(outfile)
        mydict = {rows[0]:rows[1] for rows in reader}

In [63]:
print(mydict)

{'176626373': '28500', '976644587': '28500', '1507898991': '28500', '273560006': '13100876', '174465990': '109653', '530240181': '109653', '71506844': '109810', '375217769': '109810', '158712983': '140244', '344927741': '140244', '362769815': '140244', '841524089': '140244', '289208499': '140244', '465094171': '140244', '467344870': '140244', '292615367': '140244', '279807026': '140244', '944439269': '140244', '507967': '7292059', '37617743': '214661', '243547761': '10885504', '2036289': '428872', '121726011': '8150024', '183781266': '429341', '249397861': '429341', '18094352': '429341', '407008273': '429341', '601069052': '429341', '1808805418': '429341', '107339568': '454084', '381919841': '8150024', '113006619': '454084', '61022665': '507967', '924294794': '507967', '2321081471': '507967', '7292059': '507967', '214661': '507967', '30215691': '517835', '4330660': '11399933', '4652003': '4330660', '22141191': '597276', '57244649': '597276', '48844489': '597276', '782767233': '694812',

In [58]:
M1, node2index1, index2node1 = Generate_Transfer_Matrix(mydict)

In [46]:
# print(M1)

In [50]:
print(type(index2node1))

<class 'dict'>


In [48]:
# print(node2index1)

In [59]:
result1 = PageRank(M1, alpha, root)

In [60]:
print(result1)

[('176626373', 7.593750000000005e-05), ('976644587', 0.0), ('1507898991', 0.0), ('273560006', 0.0), ('174465990', 0.0), ('530240181', 0.0), ('71506844', 0.0), ('375217769', 0.0), ('158712983', 0.0), ('344927741', 0.0)]


In [70]:
root = 'B'
M, node2index, index2node = Generate_Transfer_Matrix(G)
# print transfer matrix
print(pd.DataFrame(M, index=G.keys(), columns=G.keys()))
result = PageRank(M, alpha, root)
# print results
print(result)

     A    B    C    a    b         c    d
A  0.0  0.0  0.0  0.5  0.0  0.333333  0.0
B  0.0  0.0  0.0  0.5  1.0  0.333333  0.5
C  0.0  0.0  0.0  0.0  0.0  0.333333  0.5
a  0.0  0.0  0.0  0.0  0.0  0.000000  0.0
b  0.0  0.0  0.0  0.0  0.0  0.000000  0.0
c  1.0  0.0  0.0  0.0  0.0  0.000000  0.0
d  0.0  1.0  1.0  0.0  0.0  0.000000  0.0
[('d', 0.49997700067317274), ('B', 0.2500114996634136), ('C', 0.2500114996634136), ('A', 0.0), ('a', 0.0), ('b', 0.0), ('c', 0.0)]


In [12]:
root = 'A'
M, node2index, index2node = Generate_Transfer_Matrix(G)
# print transfer matrix
print(pd.DataFrame(M, index=G.keys(), columns=G.keys()))
result = PageRank(M, alpha, root)
# print results
print(result)

     A    B    C    a    b    c    d
A  0.0  0.0  0.0  0.0  0.0  0.0  0.0
B  0.0  0.0  0.0  1.0  1.0  0.0  0.0
C  0.0  0.0  0.0  0.0  0.0  1.0  1.0
a  0.0  0.0  0.0  0.0  0.0  0.0  0.0
b  0.0  0.0  0.0  0.0  0.0  0.0  0.0
c  1.0  0.0  0.0  0.0  0.0  0.0  0.0
d  0.0  1.0  1.0  0.0  0.0  0.0  0.0
[('C', 0.5000229993268273), ('d', 0.49997700067317286), ('c', 1.352200062145973e-21), ('A', 8.52226929923933e-24), ('B', 0.0), ('a', 0.0), ('b', 0.0)]
