In [1]:
import numpy as np
import pandas as pd

In [2]:
%%time
#Function definitions

def modify_ad_mat(a):
    #make diagonals 0
    np.fill_diagonal(a, 0)
    print("After diagonals: \n", a) 

    # sum of colm and multiple each value
    sum_a = sum(a)
    print("Colm Sum: \n", sum_a)

    for i in range(len(a)):
            for j in range(len(a[i])):
                if(a[i,j] != 0):
                    a[i,j] = a[i,j]/sum_a[j]
    return a


def get_dangling_nodes(a):
    # dangling nodes
    sum_a = sum(a)
    d= np.logical_not(sum_a)
    d = d.astype(int)
    return d

def get_article_vec(articles, sum_art):
    #Article Vector
    print("\nSum article: \n", sum_art)
    Atot = (articles/sum_art).transpose().reshape((-1, 1))
    return Atot

def get_init_vec(Atot):
    #initial start vector
    size = len(Atot)
    pi0 = np.full((1,size), 1/size).reshape((-1, 1))
    return pi0

# no need to use this
# def get_h0(H0, d, Atot):
#     for i in range(len(H0)):
#         for j in range(len(H0[i])):
#             if(d[j] == 1):
#                 H0[i,j] = Atot[i]
#     return H0

#Formula : π(k+1) = α H π(k) + [α d.π(k) + (1 − α)]a
def influenec_vec(max_itr, H, pi_, alpha, d):
    epsilon =  0.00001
    iterations = 0
    for i in range(1, max_itr):
        pi_new  = (alpha*np.dot(H,pi_)) + (((alpha*np.dot(d,pi_)) + (1-alpha))*Atot) 
        if (np.linalg.norm(pi_new-pi_) < epsilon):
            pi_= pi_new
            iterations = i
            break
        pi_ = pi_new

    print("\nIterations: \n", iterations)
    return pi_

def eigenfactor(H, pi_):
    dot = np.dot(H, pi_)
    EF = 100*(dot/sum(dot))
    return EF

Wall time: 0 ns


In [3]:
%%time
# WITH TOY MATRIX

#Create Adjacency Matrix
a = np.zeros((6, 6))

with open('links.txt/toy.txt') as f:
    lines=f.readlines()
    for line in lines:
        myarray = np.fromstring(line, dtype=int, sep=',')
        a[myarray[0],myarray[1]] = myarray[2]
               
print(a)
print(a.shape)

# Modifying the Adjacency Matrix
H = modify_ad_mat(a)
print("\nModified adjacency mat: \n", a)

# Dangling nodes
d = get_dangling_nodes(H)
print("\nDangling mat: \n", d)

#Article Vector
articles = np.array([3,2,5,1,2,1])
Atot = get_article_vec(articles, sum(articles))
print("\nArticle Vector: \n", Atot)

#Initial vector
pi0 = get_init_vec(Atot)
print("\nInitial vector: \n", pi0)

#Calculating the influence vector
alpha =  0.85
max_itr = 100
pi_final = influenec_vec(max_itr, H, pi0, alpha, d)
print("\nFinal influence vector: \n", pi_final)
    
# Calculating Eigenfactor
EF = eigenfactor(H, pi_final)
print("\nEigenfactor: \n", EF)


[[1. 0. 2. 0. 4. 3.]
 [3. 0. 1. 1. 0. 0.]
 [2. 0. 4. 0. 1. 0.]
 [0. 0. 1. 0. 0. 1.]
 [8. 0. 3. 0. 5. 2.]
 [0. 0. 0. 0. 0. 0.]]
(6, 6)
After diagonals: 
 [[0. 0. 2. 0. 4. 3.]
 [3. 0. 1. 1. 0. 0.]
 [2. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 1.]
 [8. 0. 3. 0. 0. 2.]
 [0. 0. 0. 0. 0. 0.]]
Colm Sum: 
 [13.  0.  7.  1.  5.  6.]

Modified adjacency mat: 
 [[0.         0.         0.28571429 0.         0.8        0.5       ]
 [0.23076923 0.         0.14285714 1.         0.         0.        ]
 [0.15384615 0.         0.         0.         0.2        0.        ]
 [0.         0.         0.14285714 0.         0.         0.16666667]
 [0.61538462 0.         0.42857143 0.         0.         0.33333333]
 [0.         0.         0.         0.         0.         0.        ]]

Dangling mat: 
 [0 1 0 0 0 0]

Sum article: 
 14

Article Vector: 
 [[0.21428571]
 [0.14285714]
 [0.35714286]
 [0.07142857]
 [0.14285714]
 [0.07142857]]

Initial vector: 
 [[0.16666667]
 [0.16666667]
 [0.16666667]
 [0.16666667]
 [0.1666666

In [7]:
%%time
# WITH MAIN DATA

#Create Adjacency Matrix
a = np.zeros((10748, 10748)) #size given in file

with open('links.txt/links.txt') as f:
    lines=f.readlines()
    for line in lines:
        myarray = np.fromstring(line, dtype=int, sep=',')
        a[myarray[0],myarray[1]] = myarray[2]

print(a)
print(a.shape)

# Modifying the Adjacency Matrix
H = modify_ad_mat(a)
print("\nModified adjacency mat: \n", a)

# Dangling nodes
d = get_dangling_nodes(H)
print("\nDangling mat: \n", d)

#Article Vector
articles = np.full((1,10748), 1)
sum_art = np.array(articles, dtype=np.uint64).sum()
Atot = get_article_vec(articles, sum_art)
print("\nArticle Vector: \n", Atot)

#Initial vector
pi0 = get_init_vec(Atot)
print("\nInitial vector: \n", pi0)

#Calculating the influence vector
alpha =  0.85
max_itr = 100
pi_final = influenec_vec(max_itr, H, pi0, alpha, d)
print("\nFinal influence vector: \n", pi_final)
    
# Calculating Eigenfactor
EF = eigenfactor(H, pi_final)
print("\nEigenfactor: \n", EF)

[[  34.    0.    0. ...    0.    0.    0.]
 [   0.   21.    0. ...    0.    0.    0.]
 [   0.    0. 1594. ...    0.    0.    0.]
 ...
 [   0.    0.    0. ...   20.    0.    0.]
 [   0.    0.    0. ...    0.    0.    0.]
 [   0.    0.    0. ...    0.    0.   40.]]
(10748, 10748)
After diagonals: 
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
Colm Sum: 
 [ 388.   80. 4200. ...  422.   13.  437.]

Modified adjacency mat: 
 [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

Dangling mat: 
 [0 0 0 ... 0 0 0]

Sum article: 
 10748

Article Vector: 
 [[9.30405657e-05]
 [9.30405657e-05]
 [9.30405657e-05]
 ...
 [9.30405657e-05]
 [9.30405657e-05]
 [9.30405657e-05]]

Initial vector: 
 [[9.30405657e-05]
 [9.30405657e-05]
 [9.30405657e-05]
 ...
 [9.30405657e-05]
 [9.30405657e-05]
 [9.3

In [12]:
# Getting top 20 for report
np.set_printoptions(suppress=True) #suppressing e power

EF_with_journal = np.c_[EF,0:len(EF)] #adding journal number as another column in array
EF_sorted_desc = EF_with_journal[EF_with_journal[:,0].argsort()[::-1]] #sorting new array by EF values in desc
print(EF_sorted_desc[0:20]) #print top 20

[[   1.10905233 8930.        ]
 [   0.24683124  725.        ]
 [   0.24326813  239.        ]
 [   0.23463002 6523.        ]
 [   0.22609582 6569.        ]
 [   0.22470328 6697.        ]
 [   0.21674271 6667.        ]
 [   0.20650839 4408.        ]
 [   0.20099694 1994.        ]
 [   0.18516008 2992.        ]
 [   0.18240841 5966.        ]
 [   0.18061044 6179.        ]
 [   0.17490486 1922.        ]
 [   0.16999618 7580.        ]
 [   0.16989526  900.        ]
 [   0.16760067 1559.        ]
 [   0.16351126 1383.        ]
 [   0.1504432  1223.        ]
 [   0.14916735  422.        ]
 [   0.14858248 5002.        ]]


### Report for toy data

#### Time: 30 ms
#### Iterations: 17


### Report for main data

#### Time: 1min 13s
#### Iterations: 21