In [3]:
import pandas as pd
from networkx.readwrite import json_graph
import networkx as nx
import json

# Load Author Network Data
df = pd.read_csv("/content/data_scopus.csv").fillna(0)
G = nx.Graph()

def get_author_country(authors_with_affiliations):
    first_affiliation = authors_with_affiliations.split(';')[0].strip()
    return first_affiliation.split(',')[-1].strip()

# Iterate through the DataFrame rows to create nodes and edges
for _, row in df.iterrows():
    authors = row['Authors'].split(',')
    author_Id = row['Author(s) ID'].split(';')
    Title = row['Title']
    Year = row['Year']
    Citations = row['Cited by']
    Publisher = row['Publisher']
    Authorwa = row['Authors with affiliations']

    for author, author_id in zip(authors, author_Id):
        if author_id:
            nodes = {
                'id': author_id,
                'Authors': ';'.join(authors),
                'Title': Title,
                'Year': Year,
                'Citations': Citations,
                'Publisher': Publisher,
                'Author with affiliations': Authorwa
            }
            G.add_node(author_id, **nodes)

# Create edges based on co-authorship relationships
for _, row in df.iterrows():
    authors = row['Author(s) ID'].split(';')
    for i in range(len(authors)):
        for j in range(i + 1, len(authors)):
            if authors[i] and authors[j]:
                G.add_edge(authors[i], authors[j])

# Generate clusters based on authors' countries
clusters = list(nx.connected_components(G))

# Assign classes to each cluster
class_mapping = {node: idx for idx, cluster in enumerate(clusters) for node in cluster}
nx.set_node_attributes(G, class_mapping, 'class')

# Save the data as a JSON file
coauthorship_data = {
    'nodes': [{'id': node, **G.nodes[node]} for node in G.nodes()],
    'links': [{'source': source, 'target': target} for source, target in G.edges()]
}

with open('coauthorship_data.json', 'w', encoding='utf-8') as outfile:
    json.dump(coauthorship_data, outfile, ensure_ascii=False)


In [4]:
# Save the publication network data as JSON
publication_network_data = json_graph.node_link_data(G)

with open('publication_network.json', 'w') as f:
    json.dump(publication_network_data, f)