In [1]:
import pandas as pd
import numpy as np
import igraph as ig

# data import

In [2]:
path1 = r'E:\netData\APS\0 citation and paper data\aps-dataset-citations-2010.zip'
f1 = pd.read_csv(path1)

In [3]:
path2 = r'E:\netData\APS\1 Paper\nodepro2010.zip'
f2 = pd.read_csv(path2)

In [4]:
f1 = f1[['citing_doi', 'cited_doi']]

In [5]:
f1['citing_id'] = f1['citing_doi'].map(dict(zip(f2['id'],f2.index)))
f1['cited_id'] =  f1['cited_doi'].map(dict(zip(f2['id'],f2.index)))
f11 = f1[['cited_id','citing_id']].copy()
f11 = f11[f11.columns[::-1]]

In [6]:
g = ig.Graph.DataFrame(f11, directed=True)

In [7]:
g.vcount()

463348

In [8]:
g.ecount()

4745622

In [9]:
g.reciprocity()

0.001745678056000779

# calculate D and CD

In [10]:
def calculate_CD(x):
    if g.vs[x].indegree()== 0:
        return 0,0
    elif g.vs[x].outdegree()== 0:
        return 1,1
    else:
        array = np.array([])
        for successor in g.vs[x].successors():
            array = np.append(array,successor.predecessors())
        array = np.unique(array)
        
        nj = np.intersect1d(array,g.vs[x].predecessors())
        ni = np.setdiff1d(g.vs[x].predecessors(),nj)
        nk = np.setdiff1d(array,g.vs[x].predecessors())

        CD = (len(ni)-len(nj))/(len(ni) + len(nj)+ len(nk))
        
        D = (len(ni))/(len(ni) + len(nj)+ len(nk))
        return [D, CD]

# 2stepD

In [12]:
def calculate_2D(x):
    if g.vs[x].outdegree() == 0:
        return 1, 1

    successors_1 = g.vs[x].successors()
    successors_2 = []

    # Collect successors of successors (second generation)
    for successor_1 in successors_1:
        successors_2.extend(successor_1.successors())

    if len(successors_2) == 0:
        return 1, 1

    array = np.array([])

    # Collect predecessors of successors (second generation)
    for successor_2 in successors_2:
        array = np.append(array, successor_2.predecessors())

    array = np.unique(array)

    nj = np.intersect1d(array, g.vs[x].predecessors())
    ni = np.setdiff1d(g.vs[x].predecessors(), nj)
    nk = np.setdiff1d(array, g.vs[x].predecessors())

    D2 = len(ni)/(len(ni) + len(nj)+ len(nk))
    return D2

# save

In [39]:
M = g.vcount()

In [40]:
Dlst = []
D2lst = []
CDlst = []

In [41]:
for i in range(M):
    DCD = calculate_CD(i)
    Dlst.append(DCD[0])
    CDlst.append(DCD[1])
    D2lst.append(calculate_2D(i))

In [42]:
out_data = [Dlst,D2lst ,CDlst]
name_data = ['D','2stepD','CD']
Ddata = pd.DataFrame(dict(zip(name_data , out_data)))

In [43]:
Ddata.describe()

Unnamed: 0,D,2stepD,CD
count,463348.0,463348.0,463348.0
mean,0.048016,0.032958,0.023165
std,0.172696,0.162038,0.178672
min,0.0,0.0,-1.0
25%,0.0,0.0,-0.013228
50%,0.001054,0.0,-0.002196
75%,0.012784,0.0048,0.0
max,1.0,1.0,1.0


In [3]:
f2.describe()

Unnamed: 0,year,closeness,betweenness,authority,hub,eigenvector,indegree,h,coreness,pagerank,DI,CD,2stepD,D,degree
count,463348.0,450761.0,463348.0,463348.0,463348.0,463348.0,463348.0,463348.0,463348.0,463348.0,374384.0,463348.0,463348.0,463348.0,463348.0
mean,1989.869744,0.196733,750843.7,7.363174e-05,0.007119738,0.000244,10.242025,3.328967,10.693731,2.158205e-06,-0.004513,0.023165,0.032958,0.048016,20.484051
std,17.557689,0.037913,13284700.0,0.00254828,0.04523288,0.007292,29.829037,4.142201,5.259505,1.142383e-05,0.083491,0.178672,0.162038,0.172696,32.529572
min,1893.0,0.072611,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.297752e-07,-1.0,-1.0,0.0,0.0,0.0
25%,1982.0,0.184324,1584.435,1.566782e-09,6.653263e-07,0.0,1.0,1.0,7.0,4.736622e-07,-0.017986,-0.013228,0.0,0.0,8.0
50%,1995.0,0.19608,59894.11,1.333347e-07,1.833919e-05,0.0,4.0,2.0,11.0,6.484363e-07,-0.004914,-0.002196,0.0,0.001054,15.0
75%,2003.0,0.207086,322636.9,2.036991e-06,0.0002864381,0.0,11.0,5.0,15.0,1.24475e-06,0.0,0.0,0.0048,0.012784,25.0
max,2009.0,1.0,4188175000.0,1.0,1.0,1.0,5464.0,106.0,34.0,0.002556489,0.995918,1.0,1.0,1.0,5474.0


In [45]:
f2['CD'] = CDlst
f2['2stepD'] = D2lst
f2['D'] = Dlst

In [49]:
f2['degree'] = g.degree()

In [50]:
f2.to_csv(r'E:\netData\APS\3 properties\nodepro2010.zip',index=False)