In [1]:
# Importing the necessary libraries
import pandas as pd
import networkx as nx
from itertools import combinations
import json
from networkx.readwrite import json_graph


  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [3]:
# we will load the csv file
data = pd.read_csv("data_scopus.csv")

In [5]:
# Cleaning the data
data_cleaned = data.dropna(subset=['Year', 'Authors with affiliations', 'Author(s) ID'])

In [27]:
# creating a new network graph
G = nx.Graph()

In [28]:
# Handle each row in the dataset that has been cleaned.
for index, row in data_cleaned.iterrows():
    # Remove any preceding and trailing whitespace and divide author IDs by semicolons.
    author_ids = [author_id.strip() for author_id in row['Author(s) ID'].split(';') if author_id]

    # Create a node for each author ID.
    for author_id in author_ids:
        if author_id not in G:
            G.add_node(author_id)

    # Include edges connecting each pair of this publication's author IDs.
    for author_pair in combinations(author_ids, 2):
        G.add_edge(*author_pair)

In [29]:
#General information about the graph
number_nodes = G.number_of_nodes()
number_edges = G.number_of_edges()

In [30]:
print(f"Number of nodes (authors): {number_nodes}")
print(f"Number of edges (co-authorships): {number_edges}")

Number of nodes (authors): 1552
Number of edges (co-authorships): 3049


In [31]:
#Transform the NetworkX graph into a D3.js-compatible format.
data_for_d3 = json_graph.node_link_data(G)

In [32]:
# Create a JSON file with the data.
with open('network_graph.json', 'w') as f:
    json.dump(data_for_d3, f)