In [None]:
import pandas as pd
import numpy as np
import pickle
import math
import matplotlib.pyplot as plt
import networkx as nx

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
# Directed graph
DiGraph = nx.read_edgelist('/content/drive/MyDrive/IR/Assignment3/Wiki-Vote.txt', comments='#', delimiter='\t', create_using=nx.DiGraph)

In [None]:
# list of child nodes of every node 
edgelist = pickle.load(open("/content/drive/MyDrive/IR/Assignment3/edgelist",'rb'))

# number of in and out degree of every node
in_out_degree = pickle.load(open("/content/drive/MyDrive/IR/Assignment3/in_out_degree",'rb'))

In [None]:
def pageRanking(max_itr):
  # initialize node rank with 1/(Number of Nodes)
  r = dict.fromkeys(DiGraph,1.0/len(DiGraph))

  # update value of rank
  for i in range(1,max_itr):
    rprevious = r
    r = dict.fromkeys(DiGraph,0)
    for key in r.keys():
      k = int(key)
      if in_out_degree[k][0]!=0:
        rank = 0
        if k in edgelist.keys():
          for n in edgelist[k]:
            if in_out_degree[n][1]!=0:
              rank += 0.8*rprevious[str(n)]/in_out_degree[n][1]
      r[key] = rank
    
    b = (1-sum(r.values()))/len(DiGraph)
    for key in r.keys():
      r[key] = r[key] + b
  return r
  

In [None]:
r = pageRanking(100)

In [None]:
sorted(r.items(), key=lambda x: x[1],reverse = True)

[('2565', 0.009438010519173635),
 ('1166', 0.0054741331852336626),
 ('1549', 0.005257993008557954),
 ('1374', 0.0044513297508182335),
 ('1151', 0.004064182817336744),
 ('5524', 0.003981891848595786),
 ('5802', 0.0038489062118703997),
 ('2972', 0.0036585632725494163),
 ('1608', 0.0033514143824402405),
 ('2658', 0.003183308039213882),
 ('6', 0.0031053690065793176),
 ('5189', 0.0030765166009585993),
 ('2485', 0.0030485066459949514),
 ('3453', 0.0030448100594621295),
 ('722', 0.002975499135183713),
 ('1305', 0.0029303930116693754),
 ('789', 0.0028883136248367435),
 ('2871', 0.002824183595185392),
 ('4310', 0.0027483425155386183),
 ('3352', 0.0027478998710408573),
 ('3447', 0.00272295423345969),
 ('5079', 0.0026065279264447907),
 ('2651', 0.0025635647599803264),
 ('737', 0.002558753867938459),
 ('813', 0.0024919059607141905),
 ('5800', 0.0024321508597427116),
 ('826', 0.0024264417276790035),
 ('2256', 0.0023982542865449534),
 ('311', 0.0023679604622576867),
 ('3787', 0.0023629426069918876),

In [None]:
# Edges in graph
wiki_edges = pickle.load(open("/content/drive/MyDrive/IR/Assignment3/wiki_edges",'rb'))

In [None]:
# List of parent nodes of each node
in_degree = {}
for i in range(len(wiki_edges)):
  edge = wiki_edges[i]
  
  if edge[1] in in_degree.keys():
    in_degree[edge[1]].append(edge[0])
  else:
    in_degree[edge[1]] = []
    in_degree[edge[1]].append(edge[0])

In [None]:
def auth_hub_score(max_itr):

  # initialize authority and hub score for every node as value 1
  auth_score = dict.fromkeys(DiGraph,1)
  hub_score = dict.fromkeys(DiGraph,1)

  for _ in range(1,max_itr):
    for key in DiGraph.nodes:
      old_auth = auth_score
      old_hub = hub_score

      # update Authority score
      if int(key) in in_degree.keys():
        auth_score[key] = sum(old_hub[str(k)] for k in in_degree[int(key)] if int(key) in in_degree.keys())
      else:
        auth_score[key] = 0

      # update Hubs score
      if int(key) in edgelist.keys():
        hub_score[key] = sum(old_auth[str(k)] for k in edgelist[int(key)])
      else:
        hub_score[key] = 0
      
      # Normalize Hubs Score and Authority Score
      auth_sum = sum([auth_score[key] for key in auth_score.keys()])
      hub_sum = sum([hub_score[key] for key in hub_score.keys()])
      for node in DiGraph.nodes:
        auth_score[node] /= auth_sum
        hub_score[node] /= hub_sum
  return auth_score,hub_score

In [None]:
auth_score, hub_score = auth_hub_score(100)

In [None]:
# pickle.dump(auth_score,open("/content/drive/MyDrive/IR/Assignment3/auth_score",'wb'))
# pickle.dump(hub_score,open("/content/drive/MyDrive/IR/Assignment3/hub_score",'wb'))

In [None]:
sorted(auth_score.items(), key= lambda x:x[1],reverse = True)

[('6950', 0.12283410394734937),
 ('6661', 0.10653852166080613),
 ('5880', 0.09243736483739717),
 ('6721', 0.041648455079773036),
 ('7765', 0.03669042375810286),
 ('5756', 0.029872942575773703),
 ('5514', 0.0291715896085008),
 ('5388', 0.027747062827709345),
 ('5778', 0.02774002181176072),
 ('8090', 0.02666576854702607),
 ('7952', 0.024849915364120328),
 ('5368', 0.024613239388955283),
 ('6088', 0.023962386903405093),
 ('4779', 0.021143293988495942),
 ('8169', 0.017644181968514114),
 ('4625', 0.015374994667565015),
 ('8024', 0.015111571374520627),
 ('7257', 0.015072272495179896),
 ('5260', 0.014292341550572438),
 ('4778', 0.012706782527714146),
 ('8296', 0.011371677235715744),
 ('8025', 0.011300819459283854),
 ('5406', 0.010243322284270488),
 ('7088', 0.009651261644437486),
 ('7520', 0.009601689847088698),
 ('4809', 0.009282499863184817),
 ('4216', 0.008653977178675904),
 ('4815', 0.008586889948399058),
 ('5044', 0.00828906870393977),
 ('2552', 0.00807722091128122),
 ('5726', 0.00799349

In [None]:
sorted(hub_score.items(), key= lambda x:x[1],reverse = True)

[('6483', 0.06875071619575998),
 ('6660', 0.05973482460250689),
 ('6132', 0.04952365829624222),
 ('7818', 0.04647976616379795),
 ('7682', 0.035965613515672325),
 ('5692', 0.03492567567071872),
 ('5203', 0.03013335775093676),
 ('6916', 0.02898995439540561),
 ('4967', 0.02496098362783597),
 ('6460', 0.02021832376744933),
 ('7614', 0.019925534470038253),
 ('5085', 0.019217575385328263),
 ('2346', 0.018901559661122773),
 ('7900', 0.017073254875737163),
 ('7132', 0.016084623094404626),
 ('7086', 0.0160285104782263),
 ('5531', 0.015855920904449993),
 ('7743', 0.015425659904582133),
 ('7964', 0.014944338443040824),
 ('7057', 0.014302661665654927),
 ('7727', 0.01380134149355243),
 ('6620', 0.01338275945615738),
 ('5190', 0.011146723932348957),
 ('5755', 0.010987855062174793),
 ('6615', 0.010034651901237077),
 ('5961', 0.009322797804678803),
 ('7055', 0.00837819284725244),
 ('5468', 0.00836455142133614),
 ('5320', 0.008048433043828957),
 ('7513', 0.007644754648815748),
 ('7515', 0.0075716522809