In [1]:
#checked and working
import xml.etree.ElementTree as ET
import networkx as nx
def parse_xml(xml_text):
    root = ET.fromstring(xml_text)
    return root
def generate_web_graph(xml_root):
    G = nx.DiGraph()
    for page in xml_root.findall('.//page'):
        page_id = page.find('id').text
        G.add_node(page_id)

        links = page.findall('.//link')
        for link in links:
            target_page_id = link.text
            G.add_edge(page_id, target_page_id)
    return G
def compute_topic_specific_pagerank(graph, topic_nodes, alpha=0.85, max_iter=100, tol=1e-6):
    personalization = {node: 1.0 if node in topic_nodes else 0.0 for node in graph.nodes}
    return nx.pagerank(graph, alpha=alpha, personalization=personalization, max_iter=max_iter, tol=tol)
if __name__ == "__main__":
    # Example XML text representing web pages and links
    example_xml = '''
    <webpage>
    <page>
    <id>1</id>
    <link>2</link>
    <link>3</link>
    </page>
    <page>
    <id>2</id>
    <link>1</link>
    <link>3</link>
    </page>
    <page>
    <id>3</id>
    <link>1</link>
    <link>2</link>
    </page>
    </webpage>
    '''
    # Parse XML
    xml_root = parse_xml(example_xml)
    # Generate web graph
    web_graph = generate_web_graph(xml_root)
    topic_specific_pagerank = compute_topic_specific_pagerank(web_graph, topic_nodes=['1', '2'])
    # Print the results
    print("Topic Specific Pagerank:")
    for node, score in sorted(topic_specific_pagerank.items(), key=lambda x: x[1], reverse=True):
        print(f"Node : {node} - PageRank : {score:.4f}")


Topic Specific Pagerank:
Node : 1 - PageRank : 0.3509
Node : 2 - PageRank : 0.3509
Node : 3 - PageRank : 0.2982
