In [None]:
#Preprocessing

import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Import Files
country_of_author = pd.read_excel('https://raw.githubusercontent.com/BGBH/Country/0ebfb893f4612dd099ea903d6e8044f018cef84b/Corresponding%20author%20countries.xlsx')

shapefile_path = 'https://raw.githubusercontent.com/BGBH/Country/e18130f0b9798ae34d5fa0189e9915f584bbc53c/ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp'

# Import Geography File
world = gpd.read_file(shapefile_path)

# Get only required column and capitalize them
world = world[['NAME', 'geometry','SOV_A3']]
world['NAME'] = world['NAME'].str.upper()

# Print for the country not exist in Geographical file
print(country_of_author[~country_of_author['Country'].isin(world['NAME'])])
country_of_author.columns = ['NAME', 'Publications']

# Import Files
country_of_colab = pd.read_excel("https://raw.githubusercontent.com/BGBH/Country/26ce91e517ca9aa6b0f53738b16b22063e343ee3/Country%20collboration.xlsx")

# Print for the country not exist in Geographical file
print(country_of_colab['From'][~country_of_colab['From'].isin(world['NAME'])].unique())
print('-'*25)
print(country_of_colab['To'][~country_of_colab['To'].isin(world['NAME'])].unique())

# Preprocess for density plot
x = pd.DataFrame(country_of_colab['From'].value_counts())
x = x.reset_index()
x.columns=['NAME','frequency']
country_of_colab.drop(columns='From',inplace=True)
country_of_colab.columns = ['NAME','frequency']
country_of_colab = pd.concat([country_of_colab, x],axis= 0)
country_of_colab = country_of_colab.groupby('NAME', as_index=False)['frequency'].sum()

# Create a full dataset for ploting the graph
full_dataset = pd.merge(world, country_of_author, on='NAME', how='outer')
full_dataset = pd.merge(full_dataset,country_of_colab, on='NAME', how='outer')
full_dataset = gpd.GeoDataFrame(full_dataset)

# Manually set geometry data
full_dataset = full_dataset.set_geometry('geometry')

In [None]:
#Figure 1 
# Import library
from geopandas.geodataframe import GeoDataFrame
from matplotlib.cm import ScalarMappable
from pandas.core.frame import DataFrame
from matplotlib import cm

# Creating a dataset 
partial_dataset: DataFrame | GeoDataFrame | None = full_dataset.copy()
partial_dataset['Publications']= np.log(partial_dataset['Publications'])

# Start ploting
fig, ax = plt.subplots(figsize=(60, 40))
cmap = plt.cm.get_cmap('Blues')
norm = plt.Normalize(vmin=0, vmax=partial_dataset['Publications'].max())
sm = cm.ScalarMappable(cmap=cmap, norm=norm)
cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.04)

partial_dataset.plot(column=partial_dataset['Publications'], cmap=cmap, norm= norm, edgecolor='black', ax=ax)

fontsize = 50
plt.xticks(fontsize=fontsize/2)
plt.yticks(fontsize=fontsize/2)
plt.xlabel('Longitude',fontsize=fontsize)
plt.ylabel('Latitude',fontsize=fontsize)
plt.title('Number of Publications',fontsize = fontsize)
cbar.set_label('Number of Publications', fontsize = fontsize)
cbar.ax.tick_params(labelsize=fontsize/2)
plt.subplots_adjust(left=0.1, right=0.9, top=0.95, bottom=0.05)

# Calculate the coordinate for ploting the number at the respective country
for idx, row in partial_dataset.iterrows():
    if row['Publications'] > 0:
        centroid = row['geometry'].centroid
        ax.text(centroid.x, centroid.y, f"{(int(row['Publications']))}", fontsize=int(fontsize/2), ha='center', color='black', alpha=0.8)


In [None]:
# Figure 2 - Dansity plot

from geopandas.geodataframe import GeoDataFrame
from matplotlib.cm import ScalarMappable
from pandas.core.frame import DataFrame

density = 'frequency'

# Plot map with white color
fig, ax = plt.subplots(figsize=(60, 40))
partial_dataset.plot(color = 'white', edgecolor='black', ax=ax)


fontsize = 50
plt.xticks(fontsize=fontsize/2)
plt.yticks(fontsize=fontsize/2)
plt.xlabel('Longitude',fontsize=fontsize)
plt.ylabel('Latitude',fontsize=fontsize)
plt.title('Number of Publications',fontsize = fontsize)
plt.subplots_adjust(left=0.1, right=0.9, top=0.95, bottom=0.05)

# Plot the density
for idx, row in partial_dataset.iterrows():
    if row[density] > 0:
        ax.scatter(row['geometry'].centroid.x, row['geometry'].centroid.y, s=row[density]*100, alpha=0.5, label=row['NAME'], color = 'red')


In [None]:
# Figure 3

from geopandas.geodataframe import GeoDataFrame
from matplotlib.cm import ScalarMappable
from pandas.core.frame import DataFrame

density = 'frequency'
color_in_the_map = 'Publications'
partial_dataset: DataFrame | GeoDataFrame | None = full_dataset.copy()

# Plot figure
fig, ax = plt.subplots(figsize=(60, 40))
cmap = plt.cm.get_cmap('YlOrBr')
norm = plt.Normalize(vmin=0, vmax=partial_dataset[color_in_the_map].max())
sm = cm.ScalarMappable(cmap=cmap, norm=norm)
cbar = fig.colorbar(sm, ax=ax, orientation='horizontal', fraction=0.05, pad=0.04)

partial_dataset.plot(column=color_in_the_map,norm= norm, edgecolor='black', ax=ax,cmap=cmap)

fontsize = 50
plt.xticks(fontsize=fontsize/2)
plt.yticks(fontsize=fontsize/2)
plt.xlabel('Longitude',fontsize=fontsize)
plt.ylabel('Latitude',fontsize=fontsize)
plt.subplots_adjust(left=0.1, right=0.9, top=0.95, bottom=0.05)

for idx, row in partial_dataset.iterrows():
    if row[density] > 0:
        ax.scatter(row['geometry'].centroid.x, row['geometry'].centroid.y, s=row[density]*100, alpha=0.5, label=row['NAME'], color = 'blue')

cbar.set_label('Number of Publications', fontsize = fontsize)
cbar.ax.tick_params(labelsize=fontsize/2)


In [None]:
# Figure 4 - Bubble plot

partial_dataset = full_dataset.copy()
partial_dataset.dropna(inplace=True)
partial_dataset['Bubble'] =np.log(partial_dataset['Publications'])

lats = [centroid.y for centroid in partial_dataset['geometry'].centroid]
lons = [centroid.x for centroid in partial_dataset['geometry'].centroid]
partial_dataset['Lons']=lons
partial_dataset['Lats']= lats
partial_dataset.sort_values(by= 'Bubble',ascending=False,inplace=True)
partial_dataset= partial_dataset.reset_index()

top= 10

# Plot the figure
fig = plt.figure(figsize=(15, 10))
ax = fig.add_subplot(111, projection='3d')

sc = ax.scatter(lons[top:], lats[top:], partial_dataset['Bubble'][top:], s=partial_dataset['Bubble'][top:]*10, color = 'pink', alpha=1)
sc2 = ax.scatter(lons[:top], lats[:top], partial_dataset['Bubble'][:top], s=partial_dataset['Bubble'][:top]*50, color = 'purple', alpha=1)
for long, lati,pub,txt in zip(lons,lats,partial_dataset['Bubble'][:top],partial_dataset['SOV_A3'][:top]):
    ax.text(long,lati,pub, txt,fontsize=(15 if len(txt)<7 else 10), color='black')


ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_zlabel('Projects Count')
ax.set_title('Publications Count per Country')

In [None]:
from matplotlib.colors import Normalize
import matplotlib.pyplot as plt
from matplotlib.patches import FancyArrowPatch
import pandas as pd
import seaborn as sns

country = pd.read_excel("https://raw.githubusercontent.com/BGBH/Country/26ce91e517ca9aa6b0f53738b16b22063e343ee3/Country%20collboration.xlsx")
from_ = country['From']
to = country['To']

print(from_[from_.isin(world['NAME'])==False].unique())
print(to[to.isin(world['NAME'])==False].unique())

partial_dataset = full_dataset.copy()
lats = [centroid.y for centroid in partial_dataset['geometry'].centroid]
lons = [centroid.x for centroid in partial_dataset['geometry'].centroid]
partial_dataset['Lons']=lons
partial_dataset['Lats']= lats
partial_dataset= partial_dataset.reset_index()
partial_dataset.set_index('NAME', inplace=True)

lats_from, lons_from = zip(*[partial_dataset.loc[x, ['Lats', 'Lons']].values for x in from_])
lats_to, lons_to = zip(*[partial_dataset.loc[x, ['Lats', 'Lons']].values for x in to])
coords = pd.DataFrame({'lats_from':lats_from,'lons_from':lons_from, 'lats_to':lats_to, 'lons_to':lons_to,'counts':country['Frequency']})

fig, ax = plt.subplots(figsize=(60, 40))
partial_dataset.plot(color='white', edgecolor='black', ax=ax)
to_plot = pd.DataFrame(coords)
print(to_plot.head(10))
to_plot = to_plot.sort_values(by='counts',ascending=False)
to_plot= to_plot.reset_index()
print(to_plot.head(10))

cmap = cm.get_cmap('viridis')
norm = Normalize(vmin=0, vmax=10)
for _,row in to_plot[0:10].iterrows():
    normalized_value = norm(_)  
    color = cmap(normalized_value)
    arrow = FancyArrowPatch(
        (row['lons_from'], row['lats_from']),  
        (row['lons_to'], row['lats_to']),      
        mutation_scale=80,                     
        color=color
,                         
        linewidth=20-_*1.5,         
        arrowstyle='->',                       
        connectionstyle='arc3,rad=0.1'         
    )
    ax.add_patch(arrow)

ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
sm = cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, ax=ax, orientation='horizontal', pad = 0.03)
cbar.set_label('Arrow Index', fontsize = fontsize)
