In [18]:
# import relevant library's
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [9]:
# relevant filepaths
fp_art=("articles.leptospirosis.csv")
fp_aut=("authors.leptospirosis.csv")
fp_year=("paper_counts.csv")
# create suitable data frames
df_art=pd.read_csv(fp_art)
df_aut=pd.read_csv(fp_aut)
df_year=pd.read_csv(fp_year)



In [10]:
# making a clean data frame without irrelevant columns and with a column for full name
df_aut["Full"]=df_aut["AuthorForename"]+' '+df_aut["AuthorLastname"]
df_clear=df_aut.drop(['AuthorInitials', 'AuthorAffiliation','AuthorForename','AuthorLastname'], axis=1)
# making a list of every author
AUTH=list(dict.fromkeys(df_clear['Full'].tolist()))
# making a table that counts for duplicates
count=df_clear.pivot_table(index=['Full'],aggfunc='size')
print(count)


Full
A A Adesiyun          6
A A Alfieri           1
A A B B Athukorala    1
A A Castro            2
A A Chowdhury         1
                     ..
Łukasz Bocian         1
Şaban Gönül           1
Željka Anzulović      1
Žiga Kalamar          1
Оlena Chervinska      1
Length: 14107, dtype: int64


In [11]:
# make a unstacked DF
ind=df_clear.set_index(["PMID","AuthorN"])
df_unstack=ind.unstack()
print(df_unstack)


                         Full                           \
AuthorN                    1                        2    
PMID                                                     
10548299              E Daher              D M Zanetta   
10569777            D A Haake                M K Mazel   
10585813          P C Marotto           C M Nascimento   
10586903         P Cumberland              C O Everard   
10596270        A Steger-Lieb                 B Gerber   
...                       ...                      ...   
38058661  Wesley P du Plessis              Sa'ad Lahri   
38074946          Sylvie Zida  Henri Gautier Ouédraogo   
38081475    Delphine Bonhomme       Ignacio Santecchia   
38087323       Noraini Philip          Kamruddin Ahmed   
38094659        Guan-Sheng Li              Hai-Qin Guo   

                                                            \
AuthorN                         3                       4    
PMID                                                         
1

In [7]:
# return a list of collaborator for a specific person
colab_search = np.column_stack([df_unstack[col].str.contains(r"A A Adesiyun", na=False) for col in df_unstack])
df_colab_search=df_unstack.loc[colab_search.any(axis=1)]
stack=df_colab_search.stack()
COLAB=list(dict.fromkeys(stack['Full'].tolist()))
COLAB.remove('A A Adesiyun')
# return a data frame for all authors
lst=[]
for i in AUTH:
    colab_search = np.column_stack([df_unstack[col].str.contains(str(i), na=False) for col in df_unstack])
    df_colab_search = df_unstack.loc[colab_search.any(axis=1)]
    stack = df_colab_search.stack()
    COLAB = list(dict.fromkeys(stack['Full'].tolist()))
    if i in COLAB:
        COLAB.remove(i)
    lst.append({'Author': i, 'collaborators':COLAB})
df_COLAB=pd.DataFrame(lst,columns=['Author','collaborators'])
print(df_COLAB)

df_COLAB.to_csv("df_COLAB.csv")

                 Author                                      collaborators
0               E Daher      [D M Zanetta, M B Cavalcante, R C Abdulkader]
1           D M Zanetta          [E Daher, M B Cavalcante, R C Abdulkader]
2        M B Cavalcante             [E Daher, D M Zanetta, R C Abdulkader]
3        R C Abdulkader             [E Daher, D M Zanetta, M B Cavalcante]
4             D A Haake  [M K Mazel, A M McCoy, F Milward, G Chao, J Ma...
...                 ...                                                ...
14103  Dinanibè Kambiré  [Sylvie Zida, Henri Gautier Ouédraogo, Tegwind...
14104      Seni Kouanda  [Sylvie Zida, Henri Gautier Ouédraogo, Tegwind...
14105      Pedro Escoll  [Delphine Bonhomme, Ignacio Santecchia, Stylia...
14106     Guan-Sheng Li                                      [Hai-Qin Guo]
14107       Hai-Qin Guo                                    [Guan-Sheng Li]

[14108 rows x 2 columns]


In [14]:
! pip install python-louvain

Collecting python-louvain
  Downloading python-louvain-0.16.tar.gz (204 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m204.6/204.6 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: python-louvain
  Building wheel for python-louvain (setup.py) ... [?25ldone
[?25h  Created wheel for python-louvain: filename=python_louvain-0.16-py3-none-any.whl size=9389 sha256=a0ff21abccb61ef025dc882a6747f4505f4f2dc48b7290dfd2f747db724f298f
  Stored in directory: /Users/rakesh/Library/Caches/pip/wheels/11/c1/e7/f62a211c636275e2da798bf0c307a3ae79aeddaf2524a03ce4
Successfully built python-louvain
Installing collected packages: python-louvain
Successfully installed python-louvain-0.16


In [15]:
import community

In [21]:
# Create a directed graph from the DataFrame
G = nx.DiGraph()

for _, row in df_COLAB.iterrows():
    author = row['Author']
    collaborators = row['collaborators']

    for collaborator in collaborators:
        G.add_edge(author, collaborator)
        
partition=community.best_partition(G.to_undirected())



In [28]:
# Visualize the graph
pos = nx.spring_layout(G)  # You can try different layout algorithms
plt.figure(figsize=(240, 200))
cmap=plt.cm.get_cmap('viridis',max(partition.values())+1)
nx.draw(G, pos, with_labels=True, font_weight='bold', node_color=list(partition.values()),cmap=cmap, node_size=800)

# Show the plot
plt.savefig('network.png')