In [2]:
import csv
import numpy as np

In [3]:
# the input data is contained in the dictionary pages_dict
# the key is the index of the src page. for each key there is a further dictionary, 
# containing the degree of the src page and the list of its destinations
#{
#    src1: {"degree":degree1, "destinations": [destinations1]},
#    src2: {"degree":degree2, "destinations": [destinations2]},
#    ...
#    
#}
pages_dict={}

#the pages_dict dictionary is initialised with the contents of the "example_index" file. 
#the "degree" and destinations fields are still empty. they will be filled reading the "example_arcs" file.
with open('example_index') as index_file:                                                                                          
    index_list = csv.reader(index_file, delimiter='\t')
    for index in index_list:
        pages_dict[int(index[1])]={"url":index[0],"degree":0,"destinations":[]}

In [4]:
# the pages_dict dictionary is updated with the destination information. 
# reading the lines of the "example_arcs" file, for any destination the "degree" field is incremented, 
# and the destination is added to the "destination" list of the relevant element
with open('example_arcs') as arcs_file:                                                                                          
    arclist = csv.reader(arcs_file, delimiter='\t')
    for arc in arclist:
        pages_dict[int(arc[0])]["degree"]+=1
        pages_dict[int(arc[0])]["destinations"].append(int(arc[1]))

In [5]:
pages_dict

{0: {'url': '1000notes.com', 'degree': 0, 'destinations': []},
 1: {'url': '100500.tv', 'degree': 0, 'destinations': []},
 2: {'url': 'abebooks.com', 'degree': 0, 'destinations': []},
 3: {'url': 'abebooks.de', 'degree': 0, 'destinations': []},
 4: {'url': 'amazon-presse.de', 'degree': 0, 'destinations': []},
 5: {'url': 'amazon.ca', 'degree': 0, 'destinations': []},
 6: {'url': 'amazon.cn', 'degree': 0, 'destinations': []},
 7: {'url': 'amazon.co.jp',
  'degree': 10,
  'destinations': [5, 6, 8, 9, 10, 12, 26, 57, 70, 82]},
 8: {'url': 'amazon.co.uk', 'degree': 0, 'destinations': []},
 9: {'url': 'amazon.com',
  'degree': 25,
  'destinations': [2,
   5,
   6,
   7,
   8,
   10,
   11,
   12,
   13,
   17,
   21,
   27,
   35,
   38,
   41,
   42,
   52,
   70,
   82,
   83,
   84,
   92,
   97,
   103,
   104]},
 10: {'url': 'amazon.de',
  'degree': 16,
  'destinations': [3, 4, 5, 6, 7, 8, 9, 12, 18, 26, 28, 53, 56, 63, 70, 82]},
 11: {'url': 'amazon.es', 'degree': 0, 'destinations': [

In [7]:
# N contains the number of pages
N=len(pages_dict)
print(f"{N} indexes have been read.")

# beta contains the beta factor
beta=0.9
print(f"beta is {beta}")

106 indexes have been read.
beta is 0.9


In [8]:
#the pages_dict structure is printed
print("The following dictionary has been created\n\n")
print("pages_dict={")
for src in pages_dict:
    print()
    print("\t",src,":\t{")
    print("\t\t'url': '",pages_dict[src]["url"],"',")
    print("\t\t'degree':",pages_dict[src]["degree"],",")
    print("\t\t'destinations'",pages_dict[src]["destinations"])
    print("\t\t}")
    print()
print("}")

The following dictionary has been created


pages_dict={

	 0 :	{
		'url': ' 1000notes.com ',
		'degree': 0 ,
		'destinations' []
		}


	 1 :	{
		'url': ' 100500.tv ',
		'degree': 0 ,
		'destinations' []
		}


	 2 :	{
		'url': ' abebooks.com ',
		'degree': 0 ,
		'destinations' []
		}


	 3 :	{
		'url': ' abebooks.de ',
		'degree': 0 ,
		'destinations' []
		}


	 4 :	{
		'url': ' amazon-presse.de ',
		'degree': 0 ,
		'destinations' []
		}


	 5 :	{
		'url': ' amazon.ca ',
		'degree': 0 ,
		'destinations' []
		}


	 6 :	{
		'url': ' amazon.cn ',
		'degree': 0 ,
		'destinations' []
		}


	 7 :	{
		'url': ' amazon.co.jp ',
		'degree': 10 ,
		'destinations' [5, 6, 8, 9, 10, 12, 26, 57, 70, 82]
		}


	 8 :	{
		'url': ' amazon.co.uk ',
		'degree': 0 ,
		'destinations' []
		}


	 9 :	{
		'url': ' amazon.com ',
		'degree': 25 ,
		'destinations' [2, 5, 6, 7, 8, 10, 11, 12, 13, 17, 21, 27, 35, 38, 41, 42, 52, 70, 82, 83, 84, 92, 97, 103, 104]
		}


	 10 :	{
		'url': ' amazon.de ',
		'degree': 16 

In [36]:
#r_old is initialised
r_old=np.ones(N)*1/N

# the r vector is calculated by iterations. the iterations are stopped when the maximum relative increment is lower than one part in 10000
for i in range(1000):
    #r_new is initialised for each iteration to include the (1-beta)/N factor
    r_new=np.ones(N)*(1-beta)/N
    #all the src pages are iterated 
    for src in pages_dict:
        #for each src page is taken the degree
        n=pages_dict[src]["degree"]
        
        if n != 0:
            #for each src page is taken the list of the destinations and it is iterated
            for destination in pages_dict[src]["destinations"]:
                #for each destination, the corresponding value of the vector r_new is updated
                r_new[destination]+=beta*r_old[src]/n
    

    if max(abs(r_new-r_old))/r_new.mean() < 0.000001:
        print("Threshold reached after",i,"iterations")
        break
    
    #the iteration is finished and r_old is assigned with the final r_new value for the next iteration
    r_old=r_new
    


Threshold reached after 18 iterations


In [37]:
print("Vector R=",r_old)

Vector R= [0.00125959 0.00108378 0.00098292 0.00100397 0.00100397 0.00113741
 0.00113741 0.00104349 0.00113741 0.00109788 0.00107683 0.00098292
 0.00113741 0.00098292 0.00108378 0.0009434  0.0016726  0.00098292
 0.00100397 0.00112014 0.0009434  0.00098292 0.00146983 0.00146983
 0.0009434  0.01345322 0.00109788 0.00098292 0.00100397 0.00279267
 0.00108378 0.0009434  0.00382399 0.00125959 0.00108378 0.00098292
 0.00125959 0.0009434  0.00183198 0.00108378 0.0009434  0.00098292
 0.00098292 0.00161021 0.00196276 0.00153222 0.00112014 0.00146983
 0.00245926 0.00146983 0.00108378 0.00108378 0.00150935 0.00100397
 0.0009434  0.00125959 0.00100397 0.00103731 0.00108378 0.0009434
 0.00146983 0.00146983 0.00195152 0.00100397 0.0009434  0.0009434
 0.00108378 0.00125959 0.00108378 0.0009434  0.00113741 0.00146983
 0.00146983 0.00146983 0.00160482 0.00125959 0.0009434  0.0009434
 0.00234829 0.00156099 0.00156099 0.00146983 0.00113741 0.00098292
 0.00098292 0.00108378 0.00143737 0.00108378 0.00245926

In [38]:
#the pages_dict data structure is update adding the calculated rank to each src page
for i in range(len(r_new)):
    pages_dict[i]["rank"]=r_new[i]

In [39]:
#the final pages_dict structure is printed
print("The pages dictionary has been updated with the calculated rank\n\n")
print("pages_dict={")
for src in pages_dict:
    print()
    print("\t",src,":\t{")
    print("\t\t'url': '",pages_dict[src]["url"],"',")
    print("\t\t'degree':",pages_dict[src]["degree"],",")
    print("\t\t'destinations'",pages_dict[src]["destinations"],",")
    print("\t\t'rank'",pages_dict[src]["rank"])
    print("\t\t}")
    print()
print("}")

The pages dictionary has been updated with the calculated rank


pages_dict={

	 0 :	{
		'url': ' 1000notes.com ',
		'degree': 0 ,
		'destinations' [] ,
		'rank' 0.0012595870815619305
		}


	 1 :	{
		'url': ' 100500.tv ',
		'degree': 0 ,
		'destinations' [] ,
		'rank' 0.0010837751371631657
		}


	 2 :	{
		'url': ' abebooks.com ',
		'degree': 0 ,
		'destinations' [] ,
		'rank' 0.000982919993781054
		}


	 3 :	{
		'url': ' abebooks.de ',
		'degree': 0 ,
		'destinations' [] ,
		'rank' 0.0010039681539167722
		}


	 4 :	{
		'url': ' amazon-presse.de ',
		'degree': 0 ,
		'destinations' [] ,
		'rank' 0.0010039681539167722
		}


	 5 :	{
		'url': ' amazon.ca ',
		'degree': 0 ,
		'destinations' [] ,
		'rank' 0.001137406194198178
		}


	 6 :	{
		'url': ' amazon.cn ',
		'degree': 0 ,
		'destinations' [] ,
		'rank' 0.001137406194198178
		}


	 7 :	{
		'url': ' amazon.co.jp ',
		'degree': 10 ,
		'destinations' [5, 6, 8, 9, 10, 12, 26, 57, 70, 82] ,
		'rank' 0.001043491921282732
		}


	 8 :	{
		'url'

In [40]:
s = 0
for i in range(106):
    s += pages_dict[i]['rank']

In [41]:
s

0.1515222914450789