In [None]:
# Import essential libraries.
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from community import community_louvain
import IPython.display as ipd

In [None]:
# Reading csv.
lep_author_data = pd.read_csv('/content/drive/MyDrive/Data Science MSc/authors.leptospirosis.csv')

# Concatanating author names.
lep_author_data['Fullname'] = lep_author_data['AuthorLastname'] + ' ' + lep_author_data['AuthorInitials']

# Creating dataframe of first authors.
authors = lep_author_data[lep_author_data['AuthorN'] == 1 ]
authors_a = authors[['PMID', 'Fullname']]

# Creating dataframe of secondary authors.
coauthors = lep_author_data[lep_author_data['AuthorN'] != 1 ]
coauthors_a = coauthors[['PMID', 'Fullname']]
coauthors_a.columns = ['PMID', 'Coauthors']

# Merged authors and coauthors on paper ID.
authors_merged = pd.merge(authors_a, coauthors_a, on = 'PMID', how = 'outer')
authors_merged.columns = ['PMID', 'Primary_Author', 'Secondary_Author']

# Displaying primary and secondary author dataframe.
ipd.display(authors_merged)

In [None]:
# Creating dataframe of primary authors and their value counts.
primary_author = authors_a['Fullname'].value_counts().reset_index()
primary_author.columns = ['Fullname', 'Count']

# Creating dataframe of secondary authors and their value counts.
#secondary_author = coauthors_a['Coauthors'].value_counts().reset_index()
#secondary_author.columns = ['Fullname', 'Count']

prim_topN = primary_author.iloc[:15,:]
prim_topN_names = prim_topN ['Fullname']

f = authors_merged[authors_merged['Primary_Author'].isin(prim_topN_names)]

ipd.display(f)
# Creating dataframe of total publication counts.
#author_count_merge = pd.merge(primary_author, secondary_author, on = 'Fullname', how = 'outer', suffixes = (' Author', ' Co-Author')).fillna(0)
#author_count_merge['Count Sum'] = author_count_merge['Count Author'] + author_count_merge['Count Co-Author']

# Ordering count sum descending.
#count_desc = author_count_merge.sort_values(by = ['Count Sum'], ascending = False, ignore_index = True)

# Creating dataframe of the top 10 publication contributers names.
####count_top10 = count_desc.iloc[:10,:]
####names_top10 = count_top10['Fullname']

# Creating dataframes of top 10 contributors when they've appeared as author.
#filtered_names_primary = authors_merged[authors_merged['Primary_Author'].isin(names_top10)]

# Creating dataframe of top 10 contributors when they've appeared as co-author.
#filtered_names_secondary = authors_merged[authors_merged['Secondary_Author'].isin(names_top10)]
##pmids_secondary = filtered_names_secondary['PMID'].unique()
#other_authors_secondary = authors_merged[authors_merged['PMID'].isin(pmids_secondary)]

#total = pd.concat([filtered_names_primary, other_authors_secondary])

#ipd.display(filtered_names_primary)
##ipd.display(filtered_names_primary)
#ipd.display(other_authors_secondary)
#ipd.display(total)

In [None]:
G = nx.from_pandas_edgelist(f, 'Primary_Author', 'Secondary_Author', create_using = nx.Graph)

partition = community_louvain.best_partition(G)

plt.figure(figsize=(12, 10))

pos = nx.spring_layout(G)
node_color = [partition[node] for node in G.nodes()]
nx.draw(G, pos, with_labels=True, node_color=node_color,
        arrows=True, font_size = 5, width = 2)


plt.show()