In [1]:
import pandas as pd
import numpy as np
import os
import math
import re
import random
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
from matplotlib.colors import LinearSegmentedColormap, Normalize
import igraph as ig
from collections import Counter
import scipy.stats
import seaborn as sns
from matplotlib.font_manager import FontProperties  # 导入FontProperties
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
from matplotlib.colors import LinearSegmentedColormap, Normalize
import warnings
warnings.filterwarnings("ignore")

# data import

In [2]:
class DI_network:

    def open_file(self , path):
        file_lst = os.listdir(path)
        data = []
        for file in file_lst:
            with open(os.path.join(path , file), 'r', encoding='unicode_escape') as f:
                data.extend(f.readlines()[2:])
        return data
 
    def get_data(self , txt):
        p_lst = []                        
        t_p_lst = []                      
        for t in txt:
            t_p_lst.append(t[:-1])
            if t =='\n':
                p_lst.append(t_p_lst)
                t_p_lst = []
        return p_lst
    
    def get_reference(self , p):             
        reference_lst = []
        on0 = False
        on1 = True
        for pi in p:
            if pi[:2] == 'CR':
                on0 = True
            if pi[:2] == 'NR':
                on1 = False
            if on0 == True and on1 == True:
                reference_lst.append(pi[3:])
        return reference_lst
    
    def get_ti(self , p):             
        for pi in p:
            if pi[:2] == 'TI':
                p_ti = pi[2:].strip()
                return p_ti
    
    def get_tc(self , p):             
        for pi in p:
            if pi[:2] == 'TC':
                p_ti = int(pi[2:])
                return p_ti
        return np.nan
    
    def get_doi(self , p):             
        for pi in p:
            if pi[:2] == 'DI':
                p_ti = pi[2:].strip()
                return p_ti
        return np.nan
    
    def get_au(self , p):             
        for pi in p:
            if pi[:2] == 'AU':
                p_ti = pi[2:].strip()
                return p_ti
        return np.nan
    
    def get_year(self , p):             
        for pi in p:
            if pi[:2] == 'PY':
                p_ti = pi[2:].strip()
                return p_ti
        return np.nan

In [3]:
di = DI_network()
path = '.../JOI'

In [4]:
citing_data = di.open_file(path)
p_data = di.get_data(citing_data)
reference = di.get_reference(p_data[0])
ti = di.get_ti(p_data[0])
tc = di.get_tc(p_data[0])
doi = di.get_doi(p_data[0])
au = di.get_au(p_data[0])
year = di.get_year(p_data[0])
meta_inf = au + ' ' + year

# construct a network

In [5]:
meta_inf_lst = []           
doi_lst = []                
ref_doi_lst = []            
ti_lst = []
tc_lst = []

for p in p_data:
    reference = di.get_reference(p)
    doi = di.get_doi(p)
    au = di.get_au(p)
    year = di.get_year(p)
    ti = di.get_ti(p)
    tc = di.get_tc(p)
    meta_inf = au + ' ' + year
    
    meta_inf_lst.append(meta_inf)
    doi_lst.append(doi)
    ti_lst.append(ti)
    tc_lst.append(tc)
    
    ref_doi_lst_small = []
    for line in reference:
        if re.search( 'DOI 10.+', line)!= None:
            ref_doi_lst_small.append(re.search( 'DOI 10.+', line).group()[4:])
    
    ref_doi_lst.append(ref_doi_lst_small)

In [6]:
M = len(p_data)
adj_matrix = np.zeros((M,M))
for i in range(M):
    C_doi = doi_lst[i]
    for j in range(M):
        if C_doi in ref_doi_lst[j]:
            adj_matrix[i, j] = 1

In [9]:
df = pd.DataFrame(adj_matrix , columns = meta_inf_lst , index = meta_inf_lst)

In [7]:
g = ig.Graph.Adjacency(adj_matrix,directed = False)

In [8]:
g.vcount()

1257

In [9]:
g.ecount()

4515

# calculate DI

In [30]:
LDIO_lst = []

for i in range(M):
    out_degree = adj_matrix[i, :].sum()   #out-degree

    posterior_nodes = []
    prior_nodes = []
    
    nx = 0
    ny = 0
    nz = 0
    for j in range(M):
        if adj_matrix[i , j] == 1:
            posterior_nodes.append(j)
        if adj_matrix[j , i] == 1:
            prior_nodes.append(j)
    
    for m in posterior_nodes:
        local_y = 0
        for k in prior_nodes:
            if adj_matrix[k , m] == 1:
                local_y = 1
        ny += local_y

    for k in prior_nodes:
        local_x = 0
        for m in posterior_nodes:
            if adj_matrix[k , m] == 1 and m not in prior_nodes:
                local_x += 1
        nx += adj_matrix[k , :].sum() - 1 - local_x
        
    nz = out_degree - ny
    if (nx + ny + nz) == 0:
        LDIO = np.nan
    else:
        LDIO = (nz)/(nx + ny + nz)

    LDIO_lst.append(LDIO)

# 2stepDIO

In [31]:
GDIO_lst = []      
        
for i in range(M):
    posterior_nodes_lst = []
    prior_nodes_lst = []
    
    for j in range(M):                                #初始迭代值
        if adj_matrix[i , j] == 1:
            posterior_nodes_lst.append(j)
        if adj_matrix[j , i] == 1:
            prior_nodes_lst.append(j)
    
    
    new_posterior_nodes_lst = []
    if len(posterior_nodes_lst)>0:
        for j in posterior_nodes_lst:
            for k in range(M):
                if adj_matrix[j , k] == 1 and k not in prior_nodes_lst:
                    new_posterior_nodes_lst.append(k)
    posterior_nodes_lst.extend(new_posterior_nodes_lst)
    posterior_nodes_lst = list(set(posterior_nodes_lst))#去除重复值
    
    
    new_prior_nodes_lst = []
    if len(posterior_nodes_lst)>0:
        for j in prior_nodes_lst:
            for k in range(M):
                if adj_matrix[k , j] == 1 and k not in posterior_nodes_lst:
                    new_prior_nodes_lst.append(k)
    prior_nodes_lst.extend(new_prior_nodes_lst)
    prior_nodes_lst = list(set(prior_nodes_lst))           #去除重复值
    
    nx = 0
    ny = 0
    nz = 0
            
    for m in posterior_nodes_lst:
        local_y = 0
        for k in prior_nodes_lst:
            if adj_matrix[k , m] == 1:
                local_y = 1
        ny += local_y
        
    for k in prior_nodes_lst:
        local_x = 0
        for m in posterior_nodes_lst:
            if adj_matrix[k , m] == 1 and m not in prior_nodes_lst:       #限制条件
                local_x += 1
        nx += (adj_matrix[k , :].sum() - 1 - local_x)
    
    nz = len(posterior_nodes_lst) - ny
    
    if (nx + ny + nz) == 0:
        GDIO = np.nan
    else:
        GDIO = (nz)/(nx + ny + nz)

    GDIO_lst.append(GDIO)

# H

In [18]:
def Hindex(indexList):
    indexSet = sorted(list(set(indexList)), reverse = True)
    h = 1
    for index in indexSet:
        if h <= index:
            h += 1
        else:
            break
    return h-1

In [29]:
h_lst = []
for i in range(M):
    h_lst.append(Hindex([neighbors.degree() for neighbors in g.vs[5].neighbors()]))

# save

In [34]:
out_data = [meta_inf_lst, ti_lst, LDIO_lst,GDIO_lst,g.degree(),g.coreness(),h_lst,g.pagerank(),g.indegree(),g.outdegree()]
name_data = ['meta_inf', 'ti', 'DIO','2stepDIO','degree','coreness','h-index','pagerank','in-degree','out-degree']
DIOdata = pd.DataFrame(dict(zip(name_data , out_data)))

In [35]:
DIOdata.to_excel('JOIdata.xlsx',index=False)