In [24]:
import requests
import time
import json
import os

# File path to store citation data
DATA_FILE = "NIT_citation_data.json"

# Function to fetch citation data for a given paper ID with retries
def fetch_citations(paper_id, max_retries=3):
    url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}?fields=title,authors,citations.paperId,citations.title,citations.authors"

    for attempt in range(max_retries):
        try:
            response = requests.get(url, timeout=10)  # Set timeout to avoid hanging requests

            if response.status_code == 200:
                return response.json()
            elif response.status_code == 429:  # Rate limit exceeded
                wait_time = 2 ** attempt  # Exponential backoff
                print(f"Rate limit hit. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                print(f"Error {response.status_code} for {paper_id}: {response.text}")
                return None

        except requests.exceptions.RequestException as e:
            print(f"Request failed (Attempt {attempt+1}/{max_retries}): {e}")
            time.sleep(2)  # Small delay before retrying

    print(f"Failed to fetch data for {paper_id} after {max_retries} attempts.")
    return None

# Load existing data if available
def load_existing_data():
    if os.path.exists(DATA_FILE):
        with open(DATA_FILE, "r") as file:
            try:
                return json.load(file)
            except json.JSONDecodeError:
                print("Error loading existing data. Starting fresh.")
                return {}
    return {}

# Save data to a file
def save_data(data):
    with open(DATA_FILE, "w") as file:
        json.dump(data, file, indent=4)

# Root paper IDs
root_paper_ids = [
    "559bcef033955ecb32aca43649b44fceb31c59aa",  # Paper 1
    "49af9b46020639c7b9f0a59c77b2e1b3bd53157c"   # Paper 2
]

# Load previously saved data
citation_data = load_existing_data()

# Fetch citation data for each paper if not already saved
for root_paper_id in root_paper_ids:
    if root_paper_id in citation_data:
        print(f"Data for {root_paper_id} already exists. Skipping fetch.")
        continue

    citation_dict = {}
    author_dict = {}  
    citation_edges = []
    id_arr = [root_paper_id]  # Queue for BFS-like processing

    # Set a time limit (3 minutes = 180 seconds)
    start_time = time.time()

    while id_arr:
        elapsed_time = time.time() - start_time
        if elapsed_time > 180:  
            print(f"Time limit exceeded for {root_paper_id}. Stopping collection.")
            break
        
        paper_id = id_arr.pop(0)  
        data = fetch_citations(paper_id)

        if data:
            paper_title = data.get("title", "Unknown Paper")
            authors = [author.get("name", "Unknown Author") for author in data.get("authors", [])]

            citation_dict[paper_id] = paper_title  
            author_dict[paper_id] = authors  # Store authors

            print(f"\nCiting Papers for: {paper_title} (ID: {paper_id})")
            print(f"Authors: {', '.join(authors)}")

            for citation in data.get("citations", []):
                cited_paper_id = citation.get("paperId")
                cited_paper_title = citation.get("title", "Unknown Title")
                cited_authors = [author.get("name", "Unknown Author") for author in citation.get("authors", [])]

                if cited_paper_id and cited_paper_title:
                    if cited_paper_id not in citation_dict:
                        citation_dict[cited_paper_id] = cited_paper_title
                        author_dict[cited_paper_id] = cited_authors
                        id_arr.append(cited_paper_id)  

                    citation_edges.append([cited_paper_id, paper_id])  

                    print(f"{cited_paper_title} (ID: {cited_paper_id})")
                    print(f"Authors: {', '.join(cited_authors)}")

        time.sleep(5)

    citation_data[root_paper_id] = {
        "dict": citation_dict, 
        "authors": author_dict, 
        "edges": citation_edges
    }

    save_data(citation_data)

print("\nFinished fetching citations for all papers.")


Citing Papers for: Applicability of Blockchain smart contracts in securing Internet and IoT: A systematic literature review (ID: 559bcef033955ecb32aca43649b44fceb31c59aa)
Authors: Auqib Hamid Lone, R. N. Mir
Improving IoT Management with Blockchain: Smart Home Access Control (ID: 09d8a7cbb0d25f651c6d05f52eeb9dec430b98d8)
Authors: Andrej Gono, Ivo Pisařovic, Martin Zejda, Jaromír Landa, David Prochazka
Distributed Databases for Computer Applications: Study and Selection of Distributed Ledger Technologies (ID: f8df957983453554ac630db47d512e2f7ad47225)
Authors: Carlo Kleber da Silva Rodrigues
Leveraging the usage of blockchain toward trust-dominated manufacturing systems (ID: fa22cc67208cefb1ac3d92e7a67f1e25a202c7b1)
Authors: Philip Samaha, Fadi El Kalach, R. Harik
Exploring IoT and Blockchain: A Comprehensive Survey on Security, Integration Strategies, Applications and Future Research Directions (ID: faf3b2b9c5ecd071f29a1ccd25bd294eaf7f140c)
Authors: Muath A. Obaidat, Majdi Rawashdeh, M

In [2]:
import json
import networkx as nx
from pyvis.network import Network

# File path for stored citation data
DATA_FILE = "NIT_citation_data.json"

# Load citation data from JSON file
def load_data():
    try:
        with open(DATA_FILE, "r") as file:
            return json.load(file)
    except (FileNotFoundError, json.JSONDecodeError):
        print("Error loading data. Ensure citation_data.json exists and is correctly formatted.")
        return {}

# Function to draw the citation graph using PyVis with author details
def draw_graph_pyvis(root_paper_id, citation_edges, citation_dict, author_dict, paper_num):
    net = Network(notebook=True, directed=True, height="800px", width="100%")

    # Enable physics with strong layout for better visibility
    net.barnes_hut(gravity=-5000, central_gravity=0.2, spring_length=500, spring_strength=0.1)

    G = nx.DiGraph()

    # Add nodes with author details
    for paper_id, title in citation_dict.items():
        authors = author_dict.get(paper_id, ["Unknown Author"])
        author_str = ", ".join(authors)

        # Create hover tooltip with author details
        tooltip_text = f"Title: {title}, Authors: {author_str}"

        if paper_id == root_paper_id:
            net.add_node(
                paper_id, 
                label=f"{title}", 
                title=tooltip_text,  # Tooltip on hover
                color="darkred", 
                size=50, 
                font={"size": 30, "bold": True}, 
                physics=False  # Fix the root node in place
            )
        else:
            net.add_node(
                paper_id, 
                label=title, 
                title=tooltip_text,  # Tooltip on hover
                color="pink", 
                size=15, 
                font={"size": 12}
            )
        
        G.add_node(paper_id)

    # Add edges
    for cited_paper_id, citing_paper_id in citation_edges:
        net.add_edge(citing_paper_id, cited_paper_id)
        G.add_edge(citing_paper_id, cited_paper_id)

    # Save and show
    file_name = f"NIT_citation_graph_{paper_num}.html"
    net.show(file_name)
    print(f"Graph saved as {file_name}")

    return G  # Return networkx graph for analysis

# Load stored citation data
citation_data = load_data()

# Generate and visualize graphs
graphs = {}  # Store networkx graphs
for idx, root_paper_id in enumerate(citation_data.keys()):
    citation_edges = citation_data[root_paper_id]["edges"]
    citation_dict = citation_data[root_paper_id]["dict"]
    author_dict = citation_data[root_paper_id].get("authors", {})  # Get authors if available
    graphs[root_paper_id] = draw_graph_pyvis(root_paper_id, citation_edges, citation_dict, author_dict, idx + 1)

print("\nGraphs generated and saved.")

NIT_citation_graph_1.html
Graph saved as NIT_citation_graph_1.html
NIT_citation_graph_2.html
Graph saved as NIT_citation_graph_2.html

Graphs generated and saved.


In [3]:
# Function to compute network measures
def compute_measures(G, paper_num):
    print(f"\n Network Measures for Paper {paper_num}")

    # Get the citation_dict for the current root paper
    root_paper_id = list(citation_data.keys())[paper_num-1]
    citation_dict = citation_data[root_paper_id]["dict"]

    # Function to convert a (paper_id, score) tuple to (paper_title, score)
    def id_to_title(item):
        paper_id, score = item
        title = citation_dict.get(paper_id, f"Unknown Title (ID: {paper_id})")
        return (title, score)

    # Compute Diameter
    if nx.is_strongly_connected(G):
        diameter = nx.diameter(G)
        print(f"Graph Diameter: {diameter}")
    else:
        print("Graph is not strongly connected. Diameter cannot be computed.")
        sccs = list(nx.strongly_connected_components(G))
        largest_scc = max(sccs, key=len)
        G_scc = G.subgraph(largest_scc)
        diameter_scc = nx.diameter(G_scc)
        print(f"Diameter of the Largest Strongly Connected Component: {diameter_scc}")

    # Centrality Measures
    degree_centrality = nx.degree_centrality(G)
    closeness_centrality = nx.closeness_centrality(G)
    betweenness_centrality = nx.betweenness_centrality(G)
    eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000)
    pagerank = nx.pagerank(G)

    print("\nTop 5 Nodes by Degree Centrality:")
    top_degree = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print([id_to_title(item) for item in top_degree])

    print("\nTop 5 Nodes by Closeness Centrality:")
    top_closeness = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print([id_to_title(item) for item in top_closeness])

    print("\nTop 5 Nodes by Betweenness Centrality:")
    top_betweenness = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print([id_to_title(item) for item in top_betweenness])

    print("\nTop 5 Nodes by Eigenvector Centrality:")
    top_eigenvector = sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
    print([id_to_title(item) for item in top_eigenvector])

    print("\nTop 5 Nodes by PageRank:")
    top_pagerank = sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:5]
    print([id_to_title(item) for item in top_pagerank])

    # HITS Algorithm
    hubs, authorities = nx.hits(G)
    print("\nTop 5 Nodes by Hub Score:")
    top_hubs = sorted(hubs.items(), key=lambda x: x[1], reverse=True)[:5]
    print([id_to_title(item) for item in top_hubs])

    print("\nTop 5 Nodes by Authority Score:")
    top_authorities = sorted(authorities.items(), key=lambda x: x[1], reverse=True)[:5]
    print([id_to_title(item) for item in top_authorities])

    # Clustering Coefficients
    global_clustering = nx.average_clustering(G)
    local_clustering = nx.clustering(G)

    print(f"\nGlobal Clustering Coefficient: {global_clustering}")

    print("\nTop 5 Nodes by Local Clustering Coefficient:")
    top_clustering = sorted(local_clustering.items(), key=lambda x: x[1], reverse=True)[:5]
    print([id_to_title(item) for item in top_clustering])

    # Connected Components
    if nx.is_directed(G):
        weakly_connected_components = list(nx.weakly_connected_components(G))
        print(f"\nNumber of Weakly Connected Components: {len(weakly_connected_components)}")
    else:
        connected_components = list(nx.connected_components(G))
        print(f"\nNumber of Connected Components: {len(connected_components)}")

# Compute network measures for each graph
for idx, root_paper_id in enumerate(citation_data):
    compute_measures(graphs[root_paper_id], idx + 1)


 Network Measures for Paper 1
Graph is not strongly connected. Diameter cannot be computed.
Diameter of the Largest Strongly Connected Component: 0

Top 5 Nodes by Degree Centrality:
[('Resource management in pervasive Internet of Things: A survey', 0.5877862595419847), ('BlockFaaS: Blockchain-enabled Serverless Computing Framework for AI-driven IoT Healthcare Applications', 0.08396946564885496), ('The internet of medical things in healthcare management: a review', 0.06870229007633588), ('Optimization of Maritime Communication Workflow Execution with a Task-Oriented Scheduling Framework in Cloud Computing', 0.05343511450381679), ('Linear Interval Approximation of Sensor Characteristics with Inflection Points', 0.04580152671755725)]

Top 5 Nodes by Closeness Centrality:
[('An efficient medical data encryption scheme using selective shuffling and inter-intra pixel diffusion IoT-enabled secure E-healthcare framework', 0.010178117048346055), ('A risk assessment method for power internet o