<a href="https://colab.research.google.com/github/Mariyaben/Topic-Sensitive-Page-Rank-Algorithm/blob/main/TSPR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pyspark




In [2]:
from pyspark.sql import SparkSession
from pyspark import SparkContext, SparkConf


In [3]:
# Create a Spark session
spark = SparkSession.builder.appName("TopicSensitivePageRank").getOrCreate()

# Get the SparkContext
sc = spark.sparkContext


In [4]:
data = [
    ("A", 1.0, ["B", "C"]),
    ("B", 1.0, ["C", "A"]),
    ("C", 1.0, ["A"]),
    ("D", 1.0, ["C"]),
]


In [5]:
rdd = sc.parallelize(data)


In [6]:
DAMPING_FACTOR = 0.85
TOPIC_RELEVANT_PAGES = {"A", "C"}  # Example topic-relevant pages

def compute_contributions(page, rank, links):
    """Distributes rank to outgoing links."""
    num_links = len(links)
    if num_links == 0:
        return []

    contribution = rank / num_links
    return [(link, contribution) for link in links]

def topic_sensitive_pagerank(rdd, iterations=10):
    """Runs Topic-Sensitive PageRank using PySpark."""

    ranks = rdd.map(lambda x: (x[0], x[1]))  # (Page, Rank)
    links = rdd.map(lambda x: (x[0], x[2]))  # (Page, OutgoingLinks)

    for _ in range(iterations):
        contributions = links.join(ranks).flatMap(
            lambda x: compute_contributions(x[0], x[1][1], x[1][0])
        )

        new_ranks = contributions.reduceByKey(lambda x, y: x + y)

        # Apply topic-sensitive teleportation
        ranks = new_ranks.map(lambda x: (x[0],
            (1 - DAMPING_FACTOR) * (1.0 if x[0] in TOPIC_RELEVANT_PAGES else 0.15) +
            DAMPING_FACTOR * x[1])
        )

    return ranks.collect()

# Run Topic-Sensitive PageRank
final_ranks = topic_sensitive_pagerank(rdd)


In [7]:
for page, rank in final_ranks:
    print(f"Page: {page}, Rank: {rank:.4f}")


Page: C, Rank: 0.8676
Page: B, Rank: 0.5131
Page: A, Rank: 1.1335
