## Q-10 Page Rank
Implement Page Rank Algorithm. (Use python or beautiful soup for implementation).

In [1]:
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight='weight', dangling=None):
    if len(G) == 0:
        return {}
    if not G.is_directed():
        D = G.to_directed()
    else:
        D = G

    # Create a copy in (right) stochastic form
    W = nx.stochastic_graph(D, weight=weight)
    N = W.number_of_nodes()

    # Choose fixed starting vector if not given
    if nstart is None:
        x = dict.fromkeys(W, 1.0 / N)
    else:
        s = float(sum(nstart.values())) # Normalized nstart vector
        x = dict((k, v / s) for k, v in nstart.items())

    if personalization is None:
        p = dict.fromkeys(W, 1.0 / N) # Assign uniform personalization vector if not given
    else:
        missing = set(G) - set(personalization)
        if missing:
            raise NetworkXError('Personalization dictionary must have a value for every node. Missing nodes %s' % missing)
        s = float(sum(personalization.values()))
        p = dict((k, v / s) for k, v in personalization.items())

    if dangling is None:
        dangling_weights = p# Use personalization vector if dangling vector not specified
    else:
        missing = set(G) - set(dangling)
        if missing:
            raise NetworkXError('Dangling node dictionary must have a value for every node. Missing nodes %s' % missing)
        s = float(sum(dangling.values()))
        dangling_weights = dict((k, v/s) for k, v in dangling.items())

    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = dict.fromkeys(xlast.keys(), 0)
        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
        for n in x:
            # this matrix multiply looks odd because it is doing a left multiply x^T=xlast^T*W
            for nbr in W[n]:
                x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
            x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]

        # check convergence, l1 norm
        err = sum([abs(x[n] - xlast[n]) for n in x])
        if err < N*tol:
            return x
    raise NetworkXError('Pagerank: power iteration failed to converge in %d iterations.' % max_iter)

In [2]:
import networkx as nx
G = nx.barabasi_albert_graph(60, 41)
pr = nx.pagerank(G, 0.4)

In [3]:
print(pr)

{0: 0.02807896795872708, 1: 0.012754580183859235, 2: 0.012964321412127123, 3: 0.01297511160813396, 4: 0.012763419568358173, 5: 0.012371135321815344, 6: 0.011756847642128, 7: 0.012764033235410836, 8: 0.012965443414866606, 9: 0.012567588458424837, 10: 0.013779091000588972, 11: 0.013365008661775582, 12: 0.012378081178793052, 13: 0.012988597345350315, 14: 0.012372186276776987, 15: 0.013575949161974354, 16: 0.012580026877320488, 17: 0.013358904084663144, 18: 0.012963492875053911, 19: 0.013579710705306938, 20: 0.013390851053252158, 21: 0.012755361294500861, 22: 0.01258194319975621, 23: 0.013584013470495513, 24: 0.012558450924657618, 25: 0.012952360974702074, 26: 0.012966561430823316, 27: 0.012566807468649523, 28: 0.013153472858075211, 29: 0.012756321669434577, 30: 0.012975977500632237, 31: 0.013370592970508207, 32: 0.012980115260670612, 33: 0.012757760996106893, 34: 0.013166707723419243, 35: 0.01317294433438502, 36: 0.013567331935927754, 37: 0.012979867082150128, 38: 0.013779091000588972, 39

# ANOTHER ONE

In [20]:
import networkx as nx
from bs4 import BeautifulSoup
import requests

In [21]:
class PageRank:
    def __init__(self, damping_factor=0.85, max_iter=100, tol=1.0e-6):
        self.damping_factor = damping_factor
        self.max_iter = max_iter
        self.tol = tol
        self.graph = nx.DiGraph()

    def add_edge(self, from_node, to_node):
        self.graph.add_edge(from_node, to_node)

    def compute_pagerank(self):
        pagerank = nx.pagerank(self.graph, alpha=self.damping_factor, max_iter=self.max_iter, tol=self.tol)
        return pagerank

    def scrape_links(self, url):
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
            links = set()
            for link in soup.find_all('a', href=True):
                links.add(link['href'])
            return links
        except Exception as e:
            print(f"Error scraping {url}: {e}")
            return set()

    def build_graph_from_urls(self, seed_url, depth=1):
        urls_to_visit = [seed_url]
        visited_urls = set()

        for _ in range(depth):
            new_urls = []
            for url in urls_to_visit:
                if url not in visited_urls:
                    visited_urls.add(url)
                    links = self.scrape_links(url)
                    for link in links:
                        self.add_edge(url, link)  # Create edges in the graph
                        new_urls.append(link)
            urls_to_visit = new_urls

In [22]:
pagerank = PageRank()
# Example: Build graph from URLs (seeds)
# https://en.wikipedia.org/wiki/Web_scraping
# https://medium.com/@arti.singh280/list/the-quantum-world-6126d55e1882
seed_url = "https://docs.quantum.ibm.com/api/qiskit/release-notes/0.44#misc-deprecations"  # Change this URL as needed
pagerank.build_graph_from_urls(seed_url, depth=2)

# Compute PageRank
ranks = pagerank.compute_pagerank()

# Print the PageRank scores
print("PageRank Scores:")
for url, score in ranks.items():
    print(f"{url}: {score:.4f}")

Error scraping /api/qiskit/0.45/qiskit.circuit.IfElseOp#condition: Invalid URL '/api/qiskit/0.45/qiskit.circuit.IfElseOp#condition': No scheme supplied. Perhaps you meant https:///api/qiskit/0.45/qiskit.circuit.IfElseOp#condition?
Error scraping /api/qiskit/0.45/circuit_classical#qiskit.circuit.classical.expr.logic_or: Invalid URL '/api/qiskit/0.45/circuit_classical#qiskit.circuit.classical.expr.logic_or': No scheme supplied. Perhaps you meant https:///api/qiskit/0.45/circuit_classical#qiskit.circuit.classical.expr.logic_or?
Error scraping /api/qiskit/0.45/circuit_classical#qiskit.circuit.classical.expr.logic_not: Invalid URL '/api/qiskit/0.45/circuit_classical#qiskit.circuit.classical.expr.logic_not': No scheme supplied. Perhaps you meant https:///api/qiskit/0.45/circuit_classical#qiskit.circuit.classical.expr.logic_not?
Error scraping /api/qiskit/0.45/qiskit.dagcircuit.DAGCircuit#substitute_node: Invalid URL '/api/qiskit/0.45/qiskit.dagcircuit.DAGCircuit#substitute_node': No scheme s