## PageRank Python Implementation

In [1]:
#Loading required packages
import numpy as np
from scipy.sparse import csc_matrix

In [4]:
def PageRankAlgo(G, d = .85, errorlim = .00001, tutor=False):
    
    #G: is the link matrix representing transitions
    #Each element is a binary value representing a transition from web page i to j.
    #d: damping factor
    #errorlim : if the sum of PageRanks between iterations is less than this, the algorithm converges
    
    n = G.shape[0]
    A = csc_matrix(G,dtype=np.float)
    if tutor:
        print ("Representation of sparse matrix as compressed column matrix A:", A)
    rowsums = np.array(A.sum(1))[:,0]
    if tutor:
        print ("Row sums for A:",rowsums)
    ri, ci = A.nonzero()
    A.data /= rowsums[ri]
    if tutor:
        print ("Final Hyperlink matrix A:", A)
    
    # boolean array of dangling nodes
    dnodes = rowsums==0
    if tutor:
        print ("Dangling nodes:", dnodes)

    # PageRank computation
    ro, r = np.zeros(n), np.ones(n)
    a = 0
    while np.sum(np.abs(r-ro)) > errorlim:
        ro = r.copy()
        for i in range(n):
            # backlinks for each page
            Ai = np.array(A[:,i].todense())[:,0]
            # default values for dangling nodes
            v = dnodes / float(n)
            # rank source
            E = np.ones(n) / float(n)

            r[i] = ro.dot( Ai*d + v*d + E*(1-d) )
        
        a = a + 1
        if tutor:
            print ("Iteration ",a,": PageRank un-normalized values: ",r)

    # Normalized PageRank values
    return r/float(sum(r))

In [5]:
#Run the PageRank algorithm for an example web link matrix

if __name__=='__main__':
    G = np.array([[0,0,1,0,0,0,0],
                  [0,1,1,0,0,1,0],
                  [1,0,1,1,0,0,0],
                  [0,0,0,1,1,0,0],
                  [0,0,1,0,0,0,1],
                  [1,0,0,0,0,1,1],
                  [0,0,0,1,1,0,1]])
    prvec = PageRankAlgo(G,d=.85,tutor=True)
    print("PageRank vector:",prvec)
    print("Pages Ranked as follows:", np.argsort(-prvec).argsort()+1)

Representation of sparse matrix as compressed column matrix A:   (2, 0)	1.0
  (5, 0)	1.0
  (1, 1)	1.0
  (0, 2)	1.0
  (1, 2)	1.0
  (2, 2)	1.0
  (4, 2)	1.0
  (2, 3)	1.0
  (3, 3)	1.0
  (6, 3)	1.0
  (3, 4)	1.0
  (6, 4)	1.0
  (1, 5)	1.0
  (5, 5)	1.0
  (4, 6)	1.0
  (5, 6)	1.0
  (6, 6)	1.0
Row sums for A: [ 1.  3.  3.  2.  2.  3.  3.]
Final Hyperlink matrix A:   (2, 0)	1.0
  (5, 0)	0.333333333333
  (1, 1)	0.333333333333
  (0, 2)	0.333333333333
  (1, 2)	0.333333333333
  (2, 2)	0.333333333333
  (4, 2)	0.333333333333
  (2, 3)	0.5
  (3, 3)	0.5
  (6, 3)	0.5
  (3, 4)	0.5
  (6, 4)	0.333333333333
  (1, 5)	0.333333333333
  (5, 5)	0.333333333333
  (4, 6)	0.333333333333
  (5, 6)	0.333333333333
  (6, 6)	0.333333333333
Dangling nodes: [False False False False False False False]
Iteration  1 : PageRank un-normalized values:  [ 1.28333333  0.43333333  1.28333333  1.425       0.85833333  0.71666667
  1.        ]
Iteration  2 : PageRank un-normalized values:  [ 1.44388889  0.27277778  1.24319444  1.72604167  

Iteration  92 : PageRank un-normalized values:  [ 0.61920017  0.09649501  0.59668663  0.84193119  0.53403089  0.13511627
  0.36431514]
Iteration  93 : PageRank un-normalized values:  [ 0.61377605  0.09564972  0.59145974  0.83455598  0.52935285  0.13393266
  0.36112379]
Iteration  94 : PageRank un-normalized values:  [ 0.60839945  0.09481184  0.58627862  0.82724538  0.52471579  0.13275943
  0.35796039]
Iteration  95 : PageRank un-normalized values:  [ 0.60306995  0.0939813   0.5811429   0.81999882  0.52011935  0.13159648
  0.35482471]
Iteration  96 : PageRank un-normalized values:  [ 0.59778714  0.09315804  0.57605216  0.81281573  0.51556317  0.13044371
  0.35171649]
Iteration  97 : PageRank un-normalized values:  [ 0.5925506   0.09234199  0.57100602  0.80569557  0.5110469   0.12930104
  0.34863549]
Iteration  98 : PageRank un-normalized values:  [ 0.58735993  0.09153308  0.56600408  0.79863778  0.50657019  0.12816838
  0.34558149]
Iteration  99 : PageRank un-normalized values:  [ 0.582

Iteration  172 : PageRank un-normalized values:  [ 0.30629583  0.04773257  0.2951592   0.41647278  0.26416568  0.06683711
  0.18021347]
Iteration  173 : PageRank un-normalized values:  [ 0.30361272  0.04731444  0.29257365  0.41282453  0.26185163  0.06625162
  0.17863482]
Iteration  174 : PageRank un-normalized values:  [ 0.30095311  0.04689997  0.29001074  0.40920824  0.25955784  0.06567127
  0.17707   ]
Iteration  175 : PageRank un-normalized values:  [ 0.2983168   0.04648914  0.28747028  0.40562363  0.25728415  0.065096
  0.17551889]
Iteration  176 : PageRank un-normalized values:  [ 0.29570358  0.0460819   0.28495208  0.40207042  0.25503037  0.06452576
  0.17398137]
Iteration  177 : PageRank un-normalized values:  [ 0.29311326  0.04567823  0.28245594  0.39854833  0.25279634  0.06396053
  0.17245732]
Iteration  178 : PageRank un-normalized values:  [ 0.29054562  0.04527809  0.27998166  0.3950571   0.25058187  0.06340024
  0.17094661]
Iteration  179 : PageRank un-normalized values:  [

Iteration  248 : PageRank un-normalized values:  [ 0.15694071  0.02445735  0.15123449  0.21339348  0.13535395  0.03424618
  0.09233828]
Iteration  249 : PageRank un-normalized values:  [ 0.15556593  0.02424311  0.1499097   0.21152418  0.13416826  0.03394619
  0.0915294 ]
Iteration  250 : PageRank un-normalized values:  [ 0.15420319  0.02403074  0.14859651  0.20967125  0.13299297  0.03364883
  0.09072762]
Iteration  251 : PageRank un-normalized values:  [ 0.15285239  0.02382023  0.14729482  0.20783456  0.13182796  0.03335407
  0.08993286]
Iteration  252 : PageRank un-normalized values:  [ 0.15151342  0.02361157  0.14600454  0.20601395  0.13067317  0.03306189
  0.08914506]
Iteration  253 : PageRank un-normalized values:  [ 0.15018618  0.02340474  0.14472556  0.2042093   0.12952849  0.03277227
  0.08836416]
Iteration  254 : PageRank un-normalized values:  [ 0.14887057  0.02319971  0.14345778  0.20242045  0.12839384  0.03248519
  0.0875901 ]
Iteration  255 : PageRank un-normalized values: 

Iteration  334 : PageRank un-normalized values:  [ 0.07364086  0.01147606  0.07096335  0.10013004  0.06351177  0.01606924
  0.04332764]
Iteration  335 : PageRank un-normalized values:  [ 0.07299578  0.01137553  0.07034172  0.09925292  0.06295541  0.01592848
  0.0429481 ]
Iteration  336 : PageRank un-normalized values:  [ 0.07235635  0.01127588  0.06972554  0.09838348  0.06240393  0.01578895
  0.04257188]
Iteration  337 : PageRank un-normalized values:  [ 0.07172251  0.0111771   0.06911475  0.09752165  0.06185728  0.01565064
  0.04219895]
Iteration  338 : PageRank un-normalized values:  [ 0.07109423  0.01107919  0.06850931  0.09666737  0.06131542  0.01551354
  0.04182929]
Iteration  339 : PageRank un-normalized values:  [ 0.07047146  0.01098214  0.06790918  0.09582058  0.0607783   0.01537764
  0.04146287]
Iteration  340 : PageRank un-normalized values:  [ 0.06985414  0.01088594  0.06731431  0.0949812   0.06024589  0.01524294
  0.04109967]
Iteration  341 : PageRank un-normalized values: 

Iteration  396 : PageRank un-normalized values:  [ 0.0426785   0.00665094  0.04112675  0.05803028  0.03680819  0.00931292
  0.0251105 ]
Iteration  397 : PageRank un-normalized values:  [ 0.04230464  0.00659268  0.04076648  0.05752194  0.03648575  0.00923134
  0.02489053]
Iteration  398 : PageRank un-normalized values:  [ 0.04193406  0.00653493  0.04040938  0.05701806  0.03616614  0.00915047
  0.02467249]
Iteration  399 : PageRank un-normalized values:  [ 0.04156672  0.00647768  0.04005539  0.05651859  0.03584933  0.00907031
  0.02445637]
Iteration  400 : PageRank un-normalized values:  [ 0.0412026   0.00642094  0.03970451  0.05602349  0.0355353   0.00899086
  0.02424213]
Iteration  401 : PageRank un-normalized values:  [ 0.04084167  0.00636469  0.03935671  0.05553273  0.03522401  0.0089121
  0.02402977]
Iteration  402 : PageRank un-normalized values:  [ 0.0404839   0.00630894  0.03901195  0.05504627  0.03491546  0.00883403
  0.02381928]
Iteration  403 : PageRank un-normalized values:  

Iteration  490 : PageRank un-normalized values:  [ 0.0186648   0.00290869  0.01798617  0.02537868  0.01609751  0.00407286
  0.0109817 ]
Iteration  491 : PageRank un-normalized values:  [ 0.0185013   0.00288321  0.01782861  0.02515636  0.0159565   0.00403719
  0.0108855 ]
Iteration  492 : PageRank un-normalized values:  [ 0.01833923  0.00285795  0.01767244  0.02493599  0.01581672  0.00400182
  0.01079015]
Iteration  493 : PageRank un-normalized values:  [ 0.01817858  0.00283292  0.01751763  0.02471756  0.01567817  0.00396677
  0.01069563]
Iteration  494 : PageRank un-normalized values:  [ 0.01801934  0.0028081   0.01736418  0.02450104  0.01554083  0.00393202
  0.01060193]
Iteration  495 : PageRank un-normalized values:  [ 0.01786149  0.0027835   0.01721207  0.02428641  0.0154047   0.00389757
  0.01050906]
Iteration  496 : PageRank un-normalized values:  [ 0.01770503  0.00275912  0.01706129  0.02407366  0.01526975  0.00386343
  0.010417  ]
Iteration  497 : PageRank un-normalized values: 

Iteration  577 : PageRank un-normalized values:  [ 0.00868132  0.00135288  0.00836567  0.01180405  0.00748722  0.00189436
  0.00510777]
Iteration  578 : PageRank un-normalized values:  [ 0.00860527  0.00134103  0.00829239  0.01170065  0.00742164  0.00187776
  0.00506303]
Iteration  579 : PageRank un-normalized values:  [ 0.00852989  0.00132928  0.00821975  0.01159815  0.00735663  0.00186132
  0.00501868]
Iteration  580 : PageRank un-normalized values:  [ 0.00845517  0.00131764  0.00814775  0.01149655  0.00729218  0.00184501
  0.00497472]
Iteration  581 : PageRank un-normalized values:  [ 0.0083811   0.0013061   0.00807637  0.01139585  0.0072283   0.00182885
  0.00493114]
Iteration  582 : PageRank un-normalized values:  [ 0.00830768  0.00129465  0.00800562  0.01129602  0.00716498  0.00181283
  0.00488794]
Iteration  583 : PageRank un-normalized values:  [ 0.00823491  0.00128331  0.0079355   0.01119707  0.00710222  0.00179695
  0.00484512]
Iteration  584 : PageRank un-normalized values: 

Iteration  648 : PageRank un-normalized values:  [ 0.00464821  0.00072437  0.00447921  0.00632021  0.00400886  0.00101429
  0.00273484]
Iteration  649 : PageRank un-normalized values:  [ 0.00460749  0.00071802  0.00443997  0.00626484  0.00397374  0.00100541
  0.00271088]
Iteration  650 : PageRank un-normalized values:  [ 0.00456713  0.00071173  0.00440107  0.00620996  0.00393893  0.0009966
  0.00268714]
Iteration  651 : PageRank un-normalized values:  [ 0.00452712  0.0007055   0.00436252  0.00615556  0.00390443  0.00098787
  0.0026636 ]
Iteration  652 : PageRank un-normalized values:  [ 0.00448747  0.00069932  0.00432431  0.00610164  0.00387023  0.00097921
  0.00264026]
Iteration  653 : PageRank un-normalized values:  [ 0.00444816  0.00069319  0.00428643  0.00604819  0.00383633  0.00097064
  0.00261714]
Iteration  654 : PageRank un-normalized values:  [ 0.00440919  0.00068712  0.00424888  0.00599521  0.00380272  0.00096213
  0.00259421]
Iteration  655 : PageRank un-normalized values:  

Iteration  734 : PageRank un-normalized values:  [ 0.00218107  0.00033989  0.00210177  0.00296561  0.00188107  0.00047593
  0.00128326]
Iteration  735 : PageRank un-normalized values:  [ 0.00216196  0.00033692  0.00208335  0.00293963  0.00186459  0.00047176
  0.00127202]
Iteration  736 : PageRank un-normalized values:  [ 0.00214302  0.00033396  0.0020651   0.00291388  0.00184826  0.00046763
  0.00126088]
Iteration  737 : PageRank un-normalized values:  [ 0.00212425  0.00033104  0.00204701  0.00288836  0.00183207  0.00046353
  0.00124983]
Iteration  738 : PageRank un-normalized values:  [ 0.00210564  0.00032814  0.00202908  0.00286306  0.00181602  0.00045947
  0.00123888]
Iteration  739 : PageRank un-normalized values:  [ 0.0020872   0.00032526  0.00201131  0.00283798  0.00180011  0.00045545
  0.00122803]
Iteration  740 : PageRank un-normalized values:  [ 0.00206891  0.00032242  0.00199369  0.00281312  0.00178434  0.00045146
  0.00121727]
Iteration  741 : PageRank un-normalized values: 

Iteration  809 : PageRank un-normalized values:  [ 0.00112742  0.00017569  0.00108643  0.00153296  0.00097234  0.00024601
  0.00066333]
Iteration  810 : PageRank un-normalized values:  [ 0.00111754  0.00017416  0.00107691  0.00151953  0.00096383  0.00024386
  0.00065752]
Iteration  811 : PageRank un-normalized values:  [ 0.00110775  0.00017263  0.00106747  0.00150622  0.00095538  0.00024172
  0.00065176]
Iteration  812 : PageRank un-normalized values:  [ 0.00109805  0.00017112  0.00105812  0.00149302  0.00094701  0.00023961
  0.00064605]
Iteration  813 : PageRank un-normalized values:  [ 0.00108843  0.00016962  0.00104885  0.00147995  0.00093872  0.00023751
  0.00064039]
Iteration  814 : PageRank un-normalized values:  [ 0.00107889  0.00016813  0.00103967  0.00146698  0.0009305   0.00023543
  0.00063478]
Iteration  815 : PageRank un-normalized values:  [ 0.00106944  0.00016666  0.00103056  0.00145413  0.00092234  0.00023336
  0.00062922]
Iteration  816 : PageRank un-normalized values: 

Iteration  884 : PageRank un-normalized values:  [  5.82774124e-04   9.08184395e-05   5.61585008e-04   7.92402425e-04
   5.02615153e-04   1.27167704e-04   3.42883369e-04]
Iteration  885 : PageRank un-normalized values:  [  5.77669097e-04   9.00228817e-05   5.56665596e-04   7.85461081e-04
   4.98212309e-04   1.26053731e-04   3.39879754e-04]
Iteration  886 : PageRank un-normalized values:  [  5.72608790e-04   8.92342928e-05   5.51789277e-04   7.78580543e-04
   4.93848033e-04   1.24949517e-04   3.36902451e-04]
Iteration  887 : PageRank un-normalized values:  [  5.67592810e-04   8.84526118e-05   5.46955673e-04   7.71760277e-04
   4.89521988e-04   1.23854975e-04   3.33951229e-04]
Iteration  888 : PageRank un-normalized values:  [  5.62620770e-04   8.76777783e-05   5.42164412e-04   7.64999756e-04
   4.85233838e-04   1.22770021e-04   3.31025859e-04]
Iteration  889 : PageRank un-normalized values:  [  5.57692284e-04   8.69097322e-05   5.37415121e-04   7.58298457e-04
   4.80983252e-04   1.21694

Iteration  971 : PageRank un-normalized values:  [  2.71058110e-04   4.22411937e-05   2.61202694e-04   3.68559782e-04
   2.33774816e-04   5.91478518e-05   1.59480859e-04]
Iteration  972 : PageRank un-normalized values:  [  2.68683676e-04   4.18711662e-05   2.58914592e-04   3.65331246e-04
   2.31726979e-04   5.86297242e-05   1.58083827e-04]
Iteration  973 : PageRank un-normalized values:  [  2.66330042e-04   4.15043802e-05   2.56646534e-04   3.62130991e-04
   2.29697080e-04   5.81161354e-05   1.56699033e-04]
Iteration  974 : PageRank un-normalized values:  [  2.63997025e-04   4.11408071e-05   2.54398343e-04   3.58958770e-04
   2.27684963e-04   5.76070455e-05   1.55326370e-04]
Iteration  975 : PageRank un-normalized values:  [  2.61684445e-04   4.07804189e-05   2.52169846e-04   3.55814337e-04
   2.25690472e-04   5.71024152e-05   1.53965731e-04]
Iteration  976 : PageRank un-normalized values:  [  2.59392122e-04   4.04231877e-05   2.49960870e-04   3.52697449e-04
   2.23713453e-04   5.66022