In [1]:
#Author Alex J. Yang, alexjieyang@outlook.com
import numpy as np
import pandas as pd
import itertools
import scipy.stats
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
from tqdm import tqdm
from scipy.stats import bootstrap

In [2]:
plt.switch_backend('agg')
plt.rcParams['pdf.use14corefonts'] = True
font = {'size': 15, 'family': 'Helvetica'}
plt.rc('font', **font)

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
def bootstrapped(data,n_resamples=20):
    data=(data,)
    bootstrap_ci=bootstrap(data, np.mean, confidence_level=0.95,n_resamples=n_resamples,
                         random_state=1, method='percentile')
    return bootstrap_ci.confidence_interval[1]-np.mean(data)

In [5]:
savepath = ''

# Load data

In [6]:
data = data[(data['Year']>=1978)&(data['Year']<=2020)]

data['International'] = 0
data.loc[data['SC']!=data['TC'],'International'] = 1

In [7]:
data2 = data2[(data2['Year']>=1978)&(data2['Year']<=2020)]

data2['International'] = 0
data2.loc[data2['SC']!=data2['TC'],'International'] = 1

In [17]:
data3 = data3[(data3['Year']>=1978)&(data3['Year']<=2020)]

data3['International'] = 0
data3.loc[data3['SC']!=data3['TC'],'International'] = 1

In [18]:
data4 = data4[(data4['Year']>=1978)&(data4['Year']<=2020)]

data4['International'] = 0
data4.loc[data4['SC']!=data4['TC'],'International'] = 1

# Function

In [32]:
def draw_line(ax,d,column,color,label):
    dd = d.groupby('Year')[column].mean().reset_index()
    ax.plot(dd['Year'],dd[column],color = color,label=label,lw=2)
    
def draw_line2(ax,d,color,label,ls='-'):
    dd = d.groupby('Year')['Date'].count().reset_index()
    ax.plot(dd['Year'],dd['Date'],color = color,label=label,lw=2,ls=ls)

def draw_line3(ax,d,d2,color,label,ls='-'):
    dd1 = d.groupby('Year')['Date'].count().reset_index()
    dd2 = d2.groupby('Year')['Date'].count().reset_index()
    dd1 = dd1.merge(dd2,on=['Year'],how='outer').fillna(0)
    dd1['P'] = dd1['Date_y']/dd1['Date_x']
    ax.plot(dd1['Year'],dd1['P'],color = color,label=label,lw=2,ls=ls)
    # ax.fill_between(dd1['Year'],dd1['P'],0,color = color)
    
def set_fig(ax,xlabel,ylabel,iflog = True,legend = True):
    plt.xlim(1977,2021)
    plt.yticks(fontsize=15,)
    plt.xticks(fontsize=15,)
    if legend:
        plt.legend(frameon =False,fontsize = 15,loc=2)
    if iflog:
        ax.set_yscale('log')
    if xlabel!=0:
        plt.xlabel(xlabel,fontsize = 15)
    if ylabel!=0:
        plt.ylabel(ylabel,fontsize = 15)
    ax.spines['bottom'].set_linewidth(2)
    ax.spines['left'].set_linewidth(2)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

# Distance

In [12]:
# International

data_ = data[data['International']==1]
data2_ = data2[data2['International']==1]
data3_ = data3[data3['International']==1]
data4_ = data4[data4['International']==1]

distance.columns=['SC', 'TC', 'Year', 'Geography', 'GDP distance', 'RD distance', 'Population distance','Researcher distance', 'Science distance', 'Collaboration distance', 'Knowledge distance', 'Governance distance','Language distance', 'Culture distance', 'Invention distance', 'Tech distance']

data_=data_.merge(distance, on=['SC', 'TC', 'Year'] , how='left')
data2_=data2_.merge(distance, on=['SC', 'TC', 'Year'] , how='left')
data3_=data3_.merge(distance, on=['SC', 'TC', 'Year'] , how='left')
data4_=data4_.merge(distance, on=['SC', 'TC', 'Year'] , how='left')

In [13]:
colors = sns.color_palette()

In [44]:
plt.subplots(1,1,figsize=(2,1))

ax=plt.subplot(1,1,1)
draw_line3(ax,data2,data2_, colors[1],'Scientist mobility')
draw_line3(ax,data,data_, colors[1],'Scientist migration',ls='--')
draw_line3(ax,data4,data4_, colors[2],'Inventor mobility')
draw_line3(ax,data3,data3_, colors[2],'Inventor migration',ls='--')
set_fig(ax,0,'Proportion',iflog = False,legend = False)
plt.xticks([1980,2000,2020])
plt.savefig(savepath+"Proportion.pdf",bbox_inches='tight')

In [55]:
plt.subplots(1,3,figsize=(13,2.1))
plt.subplots_adjust(left=None,bottom=None,right=None,top=None,hspace=0,wspace=0.28)

ax=plt.subplot(1,3, 1)
draw_line2(ax,data2_, colors[1],'Scientist mobility')
draw_line2(ax,data_, colors[1],'Scientist migration',ls='--')
draw_line2(ax,data4_, colors[2],'Inventor mobility')
draw_line2(ax,data3_, colors[2],'Inventor migration',ls='--')
set_fig(ax,'Year','#International mobility',iflog = True,legend = False)
plt.xticks([1980,2000,2020])
plt.yticks([1,10,100,1000,10000,100000,1000000])

ax=plt.subplot(1,3, 2)
sns.lineplot(data = data, x='Year',  y='Distance', n_boot=10, color=colors[1])
sns.lineplot(data = data2, x='Year', y='Distance', n_boot=10, color=colors[1], linestyle='--')
sns.lineplot(data = data3, x='Year', y='Distance', n_boot=10, color=colors[2])
sns.lineplot(data = data4, x='Year', y='Distance', n_boot=10, color=colors[2], linestyle='--')
plt.ticklabel_format(axis="y", style="sci", scilimits=(-2,2))
set_fig(ax,'Year','Geography distance',iflog = False,legend = False)
plt.xticks([1980,2000,2020])

ax=plt.subplot(1,3, 3)
sns.lineplot(data = data_, x='Year', y='Geography', n_boot=10, color=colors[1])
sns.lineplot(data = data2_, x='Year', y='Geography', n_boot=10, color=colors[1], linestyle='--')
sns.lineplot(data = data3_, x='Year', y='Geography', n_boot=10, color=colors[2])
sns.lineplot(data = data4_, x='Year', y='Geography', n_boot=10, color=colors[2], linestyle='--')
plt.ticklabel_format(axis="y", style="sci", scilimits=(-2,2))
set_fig(ax,'Year','Geography distance',iflog = False,legend = False)
plt.xticks([1980,2000,2020])

plt.savefig(savepath+"number and distance2.pdf",bbox_inches='tight')

In [1]:
plt.subplots(2,4,figsize=(15,4.2))
plt.subplots_adjust(left=None,bottom=None,right=None,top=None,hspace=0.1)

for i,col in enumerate(['Culture distance', 'Language distance', 'GDP distance', 'RD distance', 'Governance distance', 'Science distance', 'Invention distance']):
    ax=plt.subplot(2,4, i+1)
    sns.lineplot(data = data, x='Year', y=col, n_boot=10, color=colors[1])
    sns.lineplot(data = data2, x='Year', y=col, n_boot=10, color=colors[1], linestyle='--')
    sns.lineplot(data = data3, x='Year', y=col, n_boot=10, color=colors[2])
    sns.lineplot(data = data4, x='Year', y=col, n_boot=10, color=colors[2], linestyle='--')
    set_fig(ax,'Year',0,iflog = False,legend = False)
    if i==0 or i==4:
        plt.ylabel('Avg. distance', fontsize=15)
    else:
        plt.ylabel('')
    plt.rc('font', size=12)
    plt.ticklabel_format(axis="y", style="sci", scilimits=(-2,2))
    if i>=4:
        plt.xticks([1980,2000,2020])
    else:
        plt.xticks([1980,2000,2020],['','',''])
        plt.xlabel('')
    plt.title(col , fontsize=15)
ax = plt.subplot(2,4,8)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
plt.yticks([])
plt.xticks([])
plt.savefig(savepath+"all distance2.pdf",bbox_inches='tight')