In [24]:
import requests
import time
import json
import os

# File path to store citation data
DATA_FILE = "IIT_citation_data.json"

# Function to fetch citation data for a given paper ID with retries
def fetch_citations(paper_id, max_retries=3):
    url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}?fields=title,authors,citations.paperId,citations.title,citations.authors"

    for attempt in range(max_retries):
        try:
            response = requests.get(url, timeout=10)

            if response.status_code == 200:
                return response.json()
            elif response.status_code == 429:  # Rate limit exceeded
                wait_time = 2 ** attempt  # Exponential backoff
                print(f"Rate limit hit. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                print(f"Error {response.status_code} for {paper_id}: {response.text}")
                return None

        except requests.exceptions.RequestException as e:
            print(f"Request failed (Attempt {attempt+1}/{max_retries}): {e}")
            time.sleep(2)  # Small delay before retrying

    print(f"Failed to fetch data for {paper_id} after {max_retries} attempts.")
    return None

# Load existing data if available
def load_existing_data():
    if os.path.exists(DATA_FILE):
        with open(DATA_FILE, "r") as file:
            try:
                return json.load(file)
            except json.JSONDecodeError:
                print("Error loading existing data. Starting fresh.")
                return {}
    return {}

# Save data to a file
def save_data(data):
    with open(DATA_FILE, "w") as file:
        json.dump(data, file, indent=4)

# Root paper IDs
root_paper_ids = [
    "c3935b75b1f747864a07cc2a99b0b8fdcb34c26a",  # Paper 1
    "0b0feefb1d22078ea6e7ebc01801f7d179ed970c"   # Paper 2
]

# Load previously saved data
citation_data = load_existing_data()

# Fetch citation data for each paper if not already saved
for root_paper_id in root_paper_ids:
    if root_paper_id in citation_data:
        print(f"Data for {root_paper_id} already exists. Skipping fetch.")
        continue

    citation_dict = {} 
    author_dict = {}
    citation_edges = []
    id_arr = [root_paper_id]  # Queue for BFS-like processing

    # Set a time limit (3 minutes = 180 seconds)
    start_time = time.time()

    while id_arr:
        elapsed_time = time.time() - start_time
        if elapsed_time > 180:  
            print(f"Time limit exceeded for {root_paper_id}. Stopping collection.")
            break
        
        paper_id = id_arr.pop(0)  
        data = fetch_citations(paper_id)

        if data:
            paper_title = data.get("title", "Unknown Paper")
            authors = [author.get("name", "Unknown Author") for author in data.get("authors", [])]

            citation_dict[paper_id] = paper_title  
            author_dict[paper_id] = authors  # Store authors

            print(f"\nCiting Papers for: {paper_title} (ID: {paper_id})")
            print(f"Authors: {', '.join(authors)}")

            for citation in data.get("citations", []):
                cited_paper_id = citation.get("paperId")
                cited_paper_title = citation.get("title", "Unknown Title")
                cited_authors = [author.get("name", "Unknown Author") for author in citation.get("authors", [])]

                if cited_paper_id and cited_paper_title:
                    if cited_paper_id not in citation_dict:
                        citation_dict[cited_paper_id] = cited_paper_title
                        author_dict[cited_paper_id] = cited_authors
                        id_arr.append(cited_paper_id)  

                    citation_edges.append([cited_paper_id, paper_id])  

                    print(f"{cited_paper_title} (ID: {cited_paper_id})")
                    print(f"Authors: {', '.join(cited_authors)}")

        time.sleep(5)

    citation_data[root_paper_id] = {
        "dict": citation_dict, 
        "authors": author_dict, 
        "edges": citation_edges
    }

    save_data(citation_data)

print("\nFinished fetching citations for all papers.")

Rate limit hit. Retrying in 1 seconds...

Citing Papers for: Processor modeling for hardware software codesign (ID: c3935b75b1f747864a07cc2a99b0b8fdcb34c26a)
Authors: V. Rajesh, R. Moona
ISADL: An Instruction Set Architecture Description Language for VLIW (ID: 818de5cf1c0c60e166ac3a8d948fa59c82d21806)
Authors: Xin Xiao, Zhong Liu
Processor Modeling and Design Tools (ID: 3a7d0340c5a8b54d726d3396df25d9585efbeaf3)
Authors: A. Chattopadhyay, N. Dutt, R. Leupers, P. Mishra
A constraint-based WCET computation framework (ID: d994b7a7e8687eead59d1a9bcdd0649dd3f2bc78)
Authors: Hajer Herbegue, M. Filali, H. Cassé
Hardware architecture specification and constraint-based WCET computation (ID: 6f7105f41b189410a6f1e175fc2e9f0fe94dbb95)
Authors: Hajer Herbegue, H. Cassé, M. Filali, Christine Rochange
Using the CASM language for simulator synthesis and model verification (ID: 6e85b5b82b0cee627feb560b997c4d8791b8e0f4)
Authors: Roland Lezuo, A. Krall
Harmless, a hardware architecture description languag

In [27]:
import json
import networkx as nx
from pyvis.network import Network

# File path for stored citation data
DATA_FILE = "IIT_citation_data.json"

# Load citation data from JSON file
def load_data():
    try:
        with open(DATA_FILE, "r") as file:
            return json.load(file)
    except (FileNotFoundError, json.JSONDecodeError):
        print("Error loading data. Ensure citation_data.json exists and is correctly formatted.")
        return {}

# Function to draw the citation graph using PyVis with author details
def draw_graph_pyvis(root_paper_id, citation_edges, citation_dict, author_dict, paper_num):
    net = Network(notebook=True, directed=True, height="800px", width="100%")

    # Enable physics with strong layout for better visibility
    net.barnes_hut(gravity=-5000, central_gravity=0.2, spring_length=500, spring_strength=0.1)

    G = nx.DiGraph()

    # Add nodes with author details
    for paper_id, title in citation_dict.items():
        authors = author_dict.get(paper_id, ["Unknown Author"])
        author_str = ", ".join(authors)

        # Create hover tooltip with author details
        tooltip_text = f"Title: {title}, Authors: {author_str}"

        if paper_id == root_paper_id:
            net.add_node(
                paper_id, 
                label=f"{title}", 
                title=tooltip_text,  # Tooltip on hover
                color="darkred", 
                size=50, 
                font={"size": 30, "bold": True}, 
                physics=False  # Fix the root node in place
            )
        else:
            net.add_node(
                paper_id, 
                label=title, 
                title=tooltip_text,  # Tooltip on hover
                color="pink", 
                size=15, 
                font={"size": 12}
            )
        
        G.add_node(paper_id)

    # Add edges
    for cited_paper_id, citing_paper_id in citation_edges:
        net.add_edge(citing_paper_id, cited_paper_id)
        G.add_edge(citing_paper_id, cited_paper_id)

    # Save and show
    file_name = f"IIT_citation_graph_{paper_num}.html"
    net.show(file_name)
    print(f"Graph saved as {file_name}")

    return G  # Return networkx graph for analysis

# Load stored citation data
citation_data = load_data()

# Generate and visualize graphs
graphs = {}  # Store networkx graphs
for idx, root_paper_id in enumerate(citation_data.keys()):
    citation_edges = citation_data[root_paper_id]["edges"]
    citation_dict = citation_data[root_paper_id]["dict"]
    author_dict = citation_data[root_paper_id].get("authors", {})  # Get authors if available
    graphs[root_paper_id] = draw_graph_pyvis(root_paper_id, citation_edges, citation_dict, author_dict, idx + 1)

print("\nGraphs generated and saved.")

IIT_citation_graph_1.html
Graph saved as IIT_citation_graph_1.html
IIT_citation_graph_2.html
Graph saved as IIT_citation_graph_2.html

Graphs generated and saved.


In [26]:
# Function to compute network measures
def compute_measures(G, paper_num):
    print(f"\n Network Measures for Paper {paper_num}")

    # Compute Diameter
    if nx.is_strongly_connected(G):
        diameter = nx.diameter(G)
        print(f"Graph Diameter: {diameter}")
    else:
        print("Graph is not strongly connected. Diameter cannot be computed.")
        sccs = list(nx.strongly_connected_components(G))
        largest_scc = max(sccs, key=len)
        G_scc = G.subgraph(largest_scc)
        diameter_scc = nx.diameter(G_scc)
        print(f"Diameter of the Largest Strongly Connected Component: {diameter_scc}")

    # Centrality Measures
    degree_centrality = nx.degree_centrality(G)
    closeness_centrality = nx.closeness_centrality(G)
    betweenness_centrality = nx.betweenness_centrality(G)
    eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000)
    pagerank = nx.pagerank(G)

    print("\nTop 5 Nodes by Degree Centrality:")
    print(sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5])

    print("\nTop 5 Nodes by Closeness Centrality:")
    print(sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:5])

    print("\nTop 5 Nodes by Betweenness Centrality:")
    print(sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:5])

    print("\nTop 5 Nodes by Eigenvector Centrality:")
    print(sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)[:5])

    print("\nTop 5 Nodes by PageRank:")
    print(sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:5])

    # HITS Algorithm
    hubs, authorities = nx.hits(G)
    print("\nTop 5 Nodes by Hub Score:")
    print(sorted(hubs.items(), key=lambda x: x[1], reverse=True)[:5])

    print("\nTop 5 Nodes by Authority Score:")
    print(sorted(authorities.items(), key=lambda x: x[1], reverse=True)[:5])

    # Clustering Coefficients
    global_clustering = nx.average_clustering(G)
    local_clustering = nx.clustering(G)

    print(f"\nGlobal Clustering Coefficient: {global_clustering}")

    print("\nTop 5 Nodes by Local Clustering Coefficient:")
    print(sorted(local_clustering.items(), key=lambda x: x[1], reverse=True)[:5])

    # Connected Components
    if nx.is_directed(G):
        weakly_connected_components = list(nx.weakly_connected_components(G))
        print(f"\nNumber of Weakly Connected Components: {len(weakly_connected_components)}")
    else:
        connected_components = list(nx.connected_components(G))
        print(f"\nNumber of Connected Components: {len(connected_components)}")

# Compute network measures for each graph
for idx, root_paper_id in enumerate(root_paper_ids):
    compute_measures(graphs[root_paper_id], idx + 1)


 Network Measures for Paper 1
Graph is not strongly connected. Diameter cannot be computed.
Diameter of the Largest Strongly Connected Component: 0

Top 5 Nodes by Degree Centrality:
[('9e4f70e357319cdaab1af7ba8251e98dc9ed321b', 0.2948328267477204), ('bdec376a708b925d235aefcdd748f5c8dca8bc88', 0.22188449848024316), ('c3935b75b1f747864a07cc2a99b0b8fdcb34c26a', 0.18541033434650456), ('92e247b6c18ef5d3c539c281fcf33b27388bb854', 0.0790273556231003), ('36906392fb7c4ea483c5abdfcbd6d76663e01bb6', 0.060790273556231005)]

Top 5 Nodes by Closeness Centrality:
[('6d5986ef3e16b1fb5efe8da4a0b56c51187334d1', 0.014963759644610709), ('84664b3e75b9e661c08307067b34d9679d14daed', 0.01094224924012158), ('8d68b7f29acdcc39e5fc08a57ce75e47693cde0d', 0.01094224924012158), ('b13589b7555c8113367147a3547b61f89bfe8d18', 0.010855405992184108), ('4f6322139aaf30f473e5ac5cfc99eaf8711a5118', 0.009498480243161094)]

Top 5 Nodes by Betweenness Centrality:
[('9e4f70e357319cdaab1af7ba8251e98dc9ed321b', 0.0029144117429016