In [1]:
import pandas as pd
import numpy as np
import os
import math
import re
from collections import Counter
import random

# 数据的导入与处理

In [2]:
class DI_network:

    def open_file(self , path):
        file_lst = os.listdir(path)
        data = []
        for file in file_lst:
            with open(os.path.join(path , file), 'r', encoding='unicode_escape') as f:
                data.extend(f.readlines()[2:])
        return data
    
    def open_file_2(self , path):
        file_lst = os.listdir(path)
        file_lst_all = []
        data = []
        for file in file_lst:
            new_path = os.path.join(path , file)
            for i in os.listdir(new_path):
                file_lst_all.append(os.path.join(new_path , i))
            
        for file in file_lst_all:
            with open(file, 'r', encoding='utf-8') as f:
                data.extend(f.readlines()[2:])
        return data
 
    def get_data(self , txt):
        p_lst = []                        #p_lst是存储全部文献内容的列表（每篇文献为列表中的一个值）
        t_p_lst = []                      #t_p_lst是为了提取出p_lst的临时列表
        for t in txt:
            t_p_lst.append(t[:-1])
            if t =='\n':
                p_lst.append(t_p_lst)
                t_p_lst = []
        return p_lst
    
    def get_reference(self , p):             #此函数可以提取出该文献内容中参考文献的列表
        reference_lst = []
        on0 = False
        on1 = True
        for pi in p:
            if pi[:2] == 'CR':
                on0 = True
            if pi[:2] == 'NR':
                on1 = False
            if on0 == True and on1 == True:
                reference_lst.append(pi[3:])
        return reference_lst
    
    def get_ti(self , p):             #此函数可以提取出该文献内容中标题
        for pi in p:
            if pi[:2] == 'TI':
                p_ti = pi[2:].strip()
                return p_ti
        return 0
    
    def get_tc(self , p):             #此函数可以提取出该文献被引量
        for pi in p:
            if pi[:2] == 'TC':
                p_tc = int(pi[2:])
                return p_tc
        return 0
    
    def get_doi(self , p):             
        for pi in p:
            if pi[:2] == 'DI':
                p_di = pi[2:].strip()
                return p_di
        return 0
    
    def get_au(self , p):             
        for pi in p:
            if pi[:2] == 'AU':
                p_au = pi[2:].strip()
                return p_au
        return 0
    
    def get_year(self , p):             
        for pi in p:
            if pi[:2] == 'PY':
                p_year = pi[2:].strip()
                return p_year
        return 0

In [3]:
di = DI_network()
path = r'E:\目前研究项目\网络构建\1900-2020纯文本图情数据'

In [4]:
citing_data = di.open_file_2(path)
p_data = di.get_data(citing_data)

In [5]:
len(p_data)

134003

# 邻接矩阵的构建

In [6]:
meta_inf_lst = []           #FP的META
doi_lst = []                #FP的DOI
ref_doi_lst = []            #被引文献的DOI列表
ti_lst = []
tc_lst = []

for p in p_data:
    doi = di.get_doi(p)
    if doi != 0:
        reference = di.get_reference(p)
        au = di.get_au(p)
        year = di.get_year(p)
        ti = di.get_ti(p)
        tc = di.get_tc(p)
        meta_inf = str(au) + ' ' + str(year)

        meta_inf_lst.append(meta_inf)
        doi_lst.append(doi)
        ti_lst.append(ti)
        tc_lst.append(tc)

        ref_doi_lst_small = []
        for line in reference:
            if re.search( 'DOI 10.+', line)!= None:
                ref_doi_lst_small.append(re.search( 'DOI 10.+', line).group()[4:])

        ref_doi_lst.append(ref_doi_lst_small)

In [7]:
len(meta_inf_lst)

78556

In [8]:
M = len(meta_inf_lst)
adj_matrix = np.zeros((M,M),dtype='bool')
for i in range(M):
    C_doi = doi_lst[i]
    for j in range(M):
        if C_doi in ref_doi_lst[j]:
            adj_matrix[i, j] = 1

In [9]:
#np.savetxt("adj_matrix.txt", adj_matrix)

# DataFrame

In [10]:
df = pd.DataFrame(dict(zip(['meta_inf', 'doi', 'ti', 'tc'] , [meta_inf_lst, doi_lst, ti_lst, tc_lst])))

In [11]:
df = df.sort_values('tc',ascending= False)

In [None]:
df

# 计算2阶DI

In [39]:
def calculate_2di(focal_doi , adj_matrix):
    focal_loc = doi_lst.index(focal_doi)
    cited_doc_lst = []
    citing_ref_lst = []
    citing_cited_lst = []
    NR = 0
    NG = 0
    NE = 0
    
    cited1 = adj_matrix[focal_loc,:]
    citing1 = adj_matrix[:,focal_loc]
    for i in range(len(cited1)):
        if cited1[i] == 1:
            cited_doc_lst.append(i)
            cited2 = adj_matrix[i,:]
            for j in range(len(cited2)):
                if cited1[j] == 1:
                    cited_doc_lst.append(j)
                    
    for i in range(len(citing1)):
        if citing1[i] == 1:
            citing_ref_lst.append(i)
            citing2 = adj_matrix[:,i]
            for j in range(len(citing2)):
                if citing2[j] == 1:
                    citing_ref_lst.append(j)
    
    cited_doc_lst = np.unique(cited_doc_lst)
    citing_ref_lst = np.unique(citing_ref_lst)
    for i in citing_ref_lst:
        citing_cited_local = adj_matrix[i,:]
        for j in range(len(citing_cited_local)):
            if citing_cited_local[j] == 1:
                citing_cited_lst.append(j)

    citing_cited_lst = np.unique(citing_cited_lst)
    for i in citing_cited_lst:
        if i not in cited_doc_lst:
            NE += 1
                
    for i in cited_doc_lst:
        if i in citing_cited_lst:
            NR += 1
        else:
            NG += 1
    
    return NR,NG,NE

In [40]:
focal_doi = df['doi'].values[10]
NR,NG,NE = calculate_2di(focal_doi , adj_matrix)
print(NR,NG,NE)

266 234 1810


# SCIENTOMETRICS的数据

In [42]:
path_scientometrics = r'E:\目前研究项目\网络构建\sampledata\scientometrics_data'
citing_data_scientometrics = di.open_file(path_scientometrics)
p_data_scientometrics = di.get_data(citing_data_scientometrics)

meta_inf_lst_scientometrics = []           #FP的META
doi_lst_scientometrics = []                #FP的DOI
ti_lst_scientometrics = []
tc_lst_scientometrics = []

for p in p_data_scientometrics:
    doi_scientometrics = di.get_doi(p)
    if doi_scientometrics != 0:
        reference_scientometrics = di.get_reference(p)
        au_scientometrics = di.get_au(p)
        year_scientometrics = di.get_year(p)
        ti_scientometrics = di.get_ti(p)
        tc_scientometrics = di.get_tc(p)
        meta_inf_scientometrics = str(au_scientometrics) + ' ' + str(year_scientometrics)

        meta_inf_lst_scientometrics.append(meta_inf_scientometrics)
        doi_lst_scientometrics.append(doi_scientometrics)
        ti_lst_scientometrics.append(ti_scientometrics)
        tc_lst_scientometrics.append(tc_scientometrics)

df2 = pd.DataFrame(dict(zip(['meta_inf', 'doi', 'ti', 'tc'] , [meta_inf_lst_scientometrics, doi_lst_scientometrics, ti_lst_scientometrics, tc_lst_scientometrics])))

# 计算被引前100的论文的DI

In [54]:
sample_doi_lst = df2['doi'].values

In [55]:
NGlst = []
NRlst = []
NElst = []
di2_lst = []

for i in range(len(sample_doi_lst)):
    focal_doi = sample_doi_lst[i]
    try:
        doi_lst.index(focal_doi)
    except:
        NGlst.append(np.nan)
        NRlst.append(np.nan)
        NElst.append(np.nan)
        di2_lst.append(np.nan)
        print(np.nan)
        continue
    
    NR,NG,NE = calculate_2di(focal_doi , adj_matrix)
    DI2 = (NG+1)/(NR+NG+NE+5)
    di2_lst.append(DI2)
    NGlst.append(NG)
    NRlst.append(NR)
    NElst.append(NE)

    print(NG,NR,NE,DI2)

0 0 241 0.0040650406504065045
2 4 1682 0.0017720023626698169
0 9 3381 0.0002945508100147275
1 2 2371 0.0008406893652795292
1 2 2675 0.0007454342154304882
5 1 748 0.007905138339920948
1 2 2165 0.0009203865623561896
1 0 280 0.006993006993006993
1 0 667 0.0029717682020802376
nan
3 12 1262 0.0031201248049922
1 0 364 0.005405405405405406
0 3 2603 0.00038299502106472615
0 2 3437 0.00029036004645760743
3 7 2768 0.001437297879985627
0 6 1857 0.0005353319057815846
0 3 236 0.004098360655737705
nan
nan
0 4 2458 0.00040535062829347385
5 25 3551 0.0016731734523145567
1 3 2733 0.0007293946024799417
1 10 4082 0.0004880429477794046
6 8 1081 0.006363636363636364
1 2 790 0.002506265664160401
3 3 1798 0.002211166390270868
0 6 4699 0.00021231422505307856
0 5 1638 0.0006067961165048543
1 4 796 0.0024813895781637717
0 4 2496 0.0003992015968063872
0 1 910 0.001091703056768559
nan
nan
1 2 2102 0.0009478672985781991
0 0 1876 0.000531632110579479
6 0 767 0.008997429305912597
3 8 1397 0.0028308563340410475
10 4 

10 4 94 0.09734513274336283
0 0 0 0.2
0 1 44 0.02
32 13 279 0.10030395136778116
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
0 0 1385 0.0007194244604316547
0 0 0 0.2
nan
0 0 0 0.2
0 0 4598 0.00021724961981316532
0 0 0 0.2
0 0 2403 0.0004152823920265781
0 0 5771 0.00017313019390581717
0 0 2087 0.0004780114722753346
0 1 1300 0.0007656967840735069
0 0 115 0.008333333333333333
0 0 2203 0.0004528985507246377
0 0 3 0.125
0 0 2782 0.0003588087549336204
0 0 1501 0.0006640106241699867
nan
0 2 4129 0.00024177949709864604
0 1 275 0.0035587188612099642
nan
0 0 6924 0.0001443209698369173
0 2 2164 0.00046061722708429296
0 0 2191 0.0004553734061930783
0 0 12 0.058823529411764705
0 1 2335 0.00042716787697565144
0 0 968 0.0010277492291880781
0 6 1205 0.0008223684210526315
0 1 3772 0.00026469031233456857
0 0 0 0.2
0 1 563 0.0017574692442882249
0 1 3774 0.00026455026455026457
0 0 483 0.0020491803278688526
1 5 2475 0.000804505229283990

2 0 436 0.006772009029345372
0 0 477 0.002074688796680498
nan
3 4 1696 0.00234192037470726
0 3 4245 0.00023512814483893723
0 2 4117 0.00024248302618816683
1 1 219 0.008849557522123894
7 0 595 0.013179571663920923
2 7 1704 0.0017462165308498253
1 1 818 0.0024242424242424242
3 1 1563 0.002544529262086514
1 3 2352 0.0008470986869970351
3 16 2953 0.0013436345314074571
3 0 389 0.010075566750629723
4 4 2956 0.001684068710003368
1 13 3852 0.0005166623611469904
6 6 3454 0.002016709881878421
1 0 44 0.04
1 0 22 0.07142857142857142
2 5 1637 0.0018192844147968466
6 0 3 0.5
0 0 73 0.01282051282051282
1 0 256 0.007633587786259542
2 17 3378 0.0008818342151675485
0 14 3632 0.0002738975623116954
1 0 0 0.3333333333333333
3 5 1372 0.002888086642599278
4 4 1546 0.003207184092366902
5 19 1423 0.004132231404958678
1 0 0 0.3333333333333333
11 27 2677 0.004411764705882353
15 4 1147 0.013663535439795047
2 5 2178 0.0013698630136986301
13 8 1309 0.010486891385767791
4 5 1336 0.003703703703703704
0 0 69 0.0135135

2 0 6 0.23076923076923078
nan
29 0 5 0.7692307692307693
4 0 38 0.10638297872340426
nan
2 6 748 0.003942181340341655
2 5 21 0.09090909090909091
13 0 0 0.7777777777777778
1 0 0 0.3333333333333333
0 2 35 0.023809523809523808
nan
5 2 95 0.056074766355140186
6 5 549 0.012389380530973451
nan
90 26 200 0.2834890965732087
3 3 1028 0.0038498556304138597
5 3 803 0.007352941176470588
6 5 434 0.015555555555555555
2 0 528 0.005607476635514018
7 0 0 0.6666666666666666
0 3 566 0.0017421602787456446
nan
nan
nan
nan
0 3 247 0.00392156862745098
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
0 1 1600 0.0006226650062266501
0 0 2507 0.0003980891719745223
0 0 2278 0.0004380201489268506
0 0 3915 0.00025510204081632655
0 0 1623 0.0006142506142506142
nan
1 0 1186 0.0016778523489932886
0 1 6713 0.0001488316713796696
0 2 2469 0.0004038772213247173
1 1 2852 0.0006995452955578874
0 0 3087 0.0003234152652005175
0 3 2419 0.00041203131437989287
0 0 2992 0.0

1 2 5290 0.00037750094375235937
0 0 2076 0.0004805382027871216
0 4 3247 0.0003071253071253071
0 6 4745 0.00021026072329688813
1 7 2633 0.0007558578987150416
nan
2 2 1946 0.001534526854219949
4 2 358 0.013550135501355014
0 9 2024 0.0004906771344455348
2 4 507 0.005791505791505791
2 0 811 0.003667481662591687
0 0 2021 0.0004935834155972359
1 1 851 0.002331002331002331
2 4 2101 0.0014204545454545455
1 6 2284 0.0008710801393728223
6 0 539 0.012727272727272728
0 3 2308 0.0004317789291882556
0 1 2307 0.00043233895373973193
2 0 2985 0.001002673796791444
5 0 24 0.17647058823529413
0 1 3073 0.0003247807729782397
2 0 0 0.42857142857142855
2 0 98 0.02857142857142857
1 6 2332 0.0008532423208191126
11 2 410 0.028037383177570093
2 1 223 0.012987012987012988
1 5 2953 0.0006747638326585695
3 14 1388 0.0028368794326241137
7 11 1978 0.00399800099950025
0 0 613 0.0016181229773462784
9 2 73 0.11235955056179775
2 6 1594 0.001866832607342875
1 11 1569 0.0012610340479192938
0 10 1393 0.0007102272727272727
3 

28 1 270 0.09539473684210527
7 2 831 0.009467455621301775
15 13 1363 0.011461318051575931
2 7 1641 0.0018126888217522659
3 12 998 0.003929273084479371
0 5 475 0.002061855670103093
nan
1 9 268 0.007067137809187279
1 1 104 0.018018018018018018
3 0 0 0.5
3 0 0 0.5
7 14 1338 0.005865102639296188
nan
7 0 8 0.4
5 1 24 0.17142857142857143
1 0 1131 0.001759014951627089
5 3 1658 0.003590664272890485
nan
6 7 1196 0.005766062602965404
3 0 0 0.5
13 21 193 0.0603448275862069
8 5 659 0.013293943870014771
12 6 679 0.018518518518518517
76 23 918 0.07534246575342465
nan
nan
11 11 687 0.01680672268907563
5 0 0 0.6
0 0 39 0.022727272727272728
8 12 1021 0.008604206500956023
15 0 45 0.24615384615384617
3 0 0 0.5
0 0 0 0.2
2 0 1239 0.002407704654895666
3 1 19 0.14285714285714285
0 0 563 0.0017605633802816902
9 5 649 0.014970059880239521
1 0 0 0.3333333333333333
0 0 2 0.14285714285714285
0 0 132 0.0072992700729927005
0 0 0 0.2
nan
nan
7 5 438 0.017582417582417582
0 0 23 0.03571428571428571
nan
nan
0 2 17 0.0

0 2 1654 0.0006020469596628537
1 4 2942 0.0006775067750677507
0 13 4526 0.00022007042253521127
nan
1 0 2016 0.0009891196834817012
0 1 2012 0.0004955401387512388
2 0 180 0.016042780748663103
1 2 4558 0.0004380201489268506
1 5 2814 0.0007079646017699115
0 1 1375 0.000724112961622013
1 1 1213 0.001639344262295082
0 1 828 0.001199040767386091
1 0 654 0.0030303030303030303
0 2 171 0.0056179775280898875
5 0 111 0.049586776859504134
2 1 3845 0.0007786140669608098
0 0 194 0.005025125628140704
9 4 1385 0.007127583749109052
2 6 2566 0.0011632415664986429
0 1 3238 0.00030826140567200987
1 2 1778 0.0011198208286674132
1 4 2825 0.0007054673721340388
1 3 1747 0.0011389521640091116
0 2 3807 0.00026219192448872575
1 0 0 0.3333333333333333
2 0 0 0.42857142857142855
0 0 1292 0.0007710100231303007
0 1 4869 0.00020512820512820512
1 1 1414 0.0014074595355383533
nan
1 2 2893 0.0006894174422612892
3 11 1195 0.0032948929159802307
nan
1 0 1571 0.0012682308180088776
0 9 3395 0.0002933411557641537
0 7 2542 0.000

3 7 438 0.008830022075055188
0 4 2423 0.00041118421052631577
1 0 1209 0.0016460905349794238
9 5 983 0.00998003992015968
0 0 0 0.2
4 2 82 0.053763440860215055
1 0 1680 0.0011862396204033216
7 7 965 0.008130081300813009
2 0 1048 0.002843601895734597
8 13 1924 0.004615384615384616
4 9 2790 0.0017806267806267807
4 10 1335 0.003692762186115214
0 0 0 0.2
0 1 1951 0.000510986203372509
0 8 1131 0.0008741258741258741
0 0 0 0.2
5 3 140 0.0392156862745098
2 32 2998 0.0009878169245966415
18 10 618 0.029185867895545316
3 0 3 0.36363636363636365
24 30 1513 0.015903307888040712
15 22 935 0.016376663254861822
2 9 1073 0.0027548209366391185
1 0 0 0.3333333333333333
0 2 2052 0.00048567265662943174
2 0 25 0.09375
1 0 1406 0.00141643059490085
2 40 1079 0.0026642984014209592
nan
10 37 1629 0.006543723973825104
2 0 0 0.42857142857142855
nan
2 5 64 0.039473684210526314
9 11 555 0.017241379310344827
3 4 456 0.008547008547008548
nan
1 3 708 0.002789400278940028
nan
nan
6 0 0 0.6363636363636364
2 2 828 0.003584

11 12 368 0.030303030303030304
0 0 0 0.2
nan
0 0 0 0.2
6 0 6 0.4117647058823529
6 10 209 0.030434782608695653
0 1 37 0.023255813953488372
35 17 676 0.04911323328785812
nan
2 5 470 0.006224066390041493
1 3 312 0.006230529595015576
nan
47 24 216 0.1643835616438356
0 4 819 0.0012077294685990338
1 0 36 0.047619047619047616
1 0 0 0.3333333333333333
2 0 0 0.42857142857142855
0 0 0 0.2
13 3 592 0.022838499184339316
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
0 0 259 0.003787878787878788
nan
0 0 1180 0.0008438818565400844
0 0 2430 0.0004106776180698152
0 0 373 0.0026455026455026454
0 0 2667 0.00037425149700598805
0 0 0 0.2
0 0 7148 0.0001398014818957081
0 0 2255 0.0004424778761061947
nan
0 0 883 0.0011261261261261261
0 0 0 0.2
0 0 3319 0.00030084235860409147
nan
nan
nan
0 0 312 0.0031545741324921135
0 0 3605 0.0002770083102493075
0 0 3590 0.0002781641168289291
0 4 2763 0.00036075036075036075
1 4 3461 0.0005762028233938346
1 0 4218 0.0004734848484848485
1

0 0 855 0.0011627906976744186
0 2 2982 0.00033456005352960856
1 0 24 0.06666666666666667
0 1 2226 0.00044802867383512545
4 38 3498 0.0014104372355430183
2 3 4401 0.0006801178871004307
4 40 2929 0.0016789791806581598
4 0 91 0.05
2 2 2797 0.0010691375623663579
nan
nan
4 8 1748 0.0028328611898017
2 3 750 0.003947368421052632
nan
1 6 1448 0.0013698630136986301
1 0 0 0.3333333333333333
22 4 777 0.028465346534653466
12 0 0 0.7647058823529411
8 1 967 0.009174311926605505
2 0 1569 0.0019035532994923859
4 8 2512 0.0019770660340055358
18 2 402 0.04449648711943794
0 2 1934 0.0005151983513652757
8 4 946 0.009345794392523364
7 5 841 0.009324009324009324
1 5 661 0.002976190476190476
2 8 520 0.005607476635514018
2 6 2384 0.0012515644555694619
6 12 948 0.007209062821833162
0 5 2001 0.0004972650422675286
3 1 865 0.004576659038901602
11 0 0 0.75
9 4 1859 0.005327650506126798
0 8 1691 0.0005868544600938967
6 0 1395 0.004978662873399715
3 2 278 0.013888888888888888
1 12 3068 0.0006480881399870382
0 6 428 

0 0 2405 0.0004149377593360996
0 0 3631 0.000275027502750275
0 0 4095 0.00024390243902439024
0 0 4451 0.0002244165170556553
0 0 6089 0.00016409583196586806
0 0 3891 0.0002566735112936345
0 0 2073 0.00048123195380173246
0 0 2718 0.0003672420124862284
0 0 1826 0.0005461496450027307
nan
0 0 55 0.016666666666666666
0 0 2574 0.00038774718883288094
0 0 2912 0.00034281796366129587
0 3 5060 0.0001973164956590371
0 2 3709 0.00026910656620021526
nan
0 0 84 0.011235955056179775
nan
0 1 1725 0.0005777007510109763
0 0 1882 0.0005299417064122947
nan
0 0 1757 0.0005675368898978433
0 0 0 0.2
0 1 2291 0.00043535045711797995
0 2 2651 0.0003762227238525207
0 0 542 0.0018281535648994515
0 0 67 0.013888888888888888
0 4 3167 0.00031486146095717883
1 6 2603 0.0007648183556405354
1 2 3354 0.000594883997620464
0 0 1449 0.000687757909215956
0 1 4261 0.00023435669088352472
0 2 2760 0.0003614022406938923
nan
0 1 2909 0.00034305317324185246
0 0 831 0.0011961722488038277
0 0 3014 0.0003312355084465055
nan
0 2 3211 

8 0 0 0.6923076923076923
4 24 4011 0.0012363996043521265
0 4 1457 0.0006821282401091405
5 0 1034 0.005747126436781609
11 18 1010 0.011494252873563218
5 24 3112 0.0019071837253655435
11 10 1145 0.010247651579846286
2 0 888 0.0033519553072625698
3 5 1259 0.0031446540880503146
0 0 0 0.2
2 4 18 0.10344827586206896
5 3 373 0.015544041450777202
3 22 3953 0.0010042681395932714
nan
6 4 601 0.011363636363636364
2 0 191 0.015151515151515152
5 2 637 0.009244992295839754
nan
4 0 6 0.3333333333333333
0 3 1882 0.0005291005291005291
14 4 345 0.04076086956521739
4 18 2047 0.0024108003857280617
1 5 2596 0.0007671653241273494
9 0 664 0.014749262536873156
4 6 922 0.005336179295624333
14 6 442 0.032119914346895075
0 1 135 0.0070921985815602835
12 24 1746 0.007274762171236709
1 0 2076 0.0009606147934678194
1 2 899 0.002205071664829107
24 27 1797 0.013491635186184566
1 2 1397 0.0014234875444839859
0 1 1232 0.0008077544426494346
2 27 1454 0.0020161290322580645
1 0 721 0.002751031636863824
3 12 1568 0.0025188

0 3 617 0.0016
0 4 1031 0.0009615384615384616
6 0 0 0.6363636363636364
nan
3 0 0 0.5
40 0 0 0.9111111111111111
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
0 0 5032 0.0001985308715505261
0 0 661 0.0015015015015015015
nan
0 0 1822 0.0005473453749315818
0 0 3277 0.0003046922608165753
0 0 1659 0.0006009615384615385
0 0 3885 0.0002570694087403599
0 0 50 0.01818181818181818
0 0 5650 0.00017683465959328028
0 0 1906 0.0005232862375719519
0 0 5078 0.0001967342120794806
0 0 29 0.029411764705882353
0 0 2265 0.0004405286343612335
0 0 1589 0.0006273525721455458
nan
0 1 3922 0.0002545824847250509
0 0 4101 0.00024354603019970775
0 0 4585 0.00021786492374727668
nan
0 0 4163 0.0002399232245681382
0 0 5388 0.00018542555164101615
0 1 4335 0.000230361667818475
0 0 18 0.043478260869565216
0 0 3338 0.0002991325157044571
0 1 4260 0.00023441162681669012
nan
1 4 2375 0.0008385744234800838
nan
0 0 574 0.0017271157167530224
0 4 6342 0.0001574555188159345
0 5 5955 0.00016764459

0 1 1921 0.0005189413596263622
2 0 878 0.003389830508474576
6 0 1754 0.00396600566572238
nan
0 0 164 0.005917159763313609
5 34 4009 0.0014803849000740192
1 1 2260 0.000882223202470225
7 0 38 0.16
1 2 3275 0.0006091989034419738
0 4 1628 0.0006108735491753207
14 0 442 0.03253796095444685
3 0 381 0.010282776349614395
7 3 1543 0.005134788189987163
2 4 1672 0.0017825311942959
0 2 2434 0.00040966816878328555
3 2 667 0.005908419497784343
1 3 4645 0.0004297378599054577
2 1 4168 0.0007183908045977011
2 4 2670 0.0011189854531891085
7 26 2379 0.0033098882912701694
4 1 309 0.01567398119122257
4 8 2000 0.002478929102627665
8 3 1297 0.006854531607006854
1 7 591 0.0033112582781456954
2 0 0 0.42857142857142855
6 28 2176 0.003160270880361174
7 0 0 0.6666666666666666
nan
0 4 2324 0.0004286326618088298
10 11 1181 0.009113504556752278
0 8 2416 0.0004116920543433512
3 5 2889 0.0013783597518952446
2 13 3334 0.0008944543828264759
0 14 3080 0.00032268473701193933
3 1 287 0.013513513513513514
3 5 1148 0.003445

21 12 323 0.060941828254847646
nan
4 1 3 0.38461538461538464
21 0 122 0.14864864864864866
2 0 0 0.42857142857142855
27 35 1459 0.01834862385321101
4 0 0 0.5555555555555556
10 1 653 0.016442451420029897
nan
nan
0 0 105 0.00909090909090909
2 3 200 0.014285714285714285
1 0 52 0.034482758620689655
4 3 412 0.01179245283018868
1 0 765 0.0025940337224383916
nan
15 0 0 0.8
1 0 29 0.05714285714285714
18 20 121 0.11585365853658537
6 0 0 0.6363636363636364
2 1 39 0.06382978723404255
nan
0 3 16 0.041666666666666664
nan
0 0 11 0.0625
3 0 0 0.5
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
0 0 730 0.0013605442176870747
0 0 3690 0.00027063599458728013
0 0 2557 0.00039032006245121
0 0 5948 0.00016798252981689904
0 1 4183 0.00023872045834328001
0 0 1887 0.0005285412262156448
0 0 2995 0.0003333333333333333
0 1 1434 0.0006944444444444445
0 0 126 0.007633587786259542
nan
0 0 1217 0.0008183306055646482
0 1 4902 0.00020374898125509371
1 0 1755 0.001135718341851221
1 0 1984 0.00

2 1 561 0.005272407732864675
0 6 3226 0.00030892801977139327
1 1 2247 0.0008873114463176575
nan
0 1 2134 0.00046728971962616824
1 2 2758 0.0007230657989877079
0 0 1760 0.0005665722379603399
1 0 1252 0.001589825119236884
1 1 2942 0.0006781959986436081
0 1 757 0.001310615989515072
2 4 2998 0.0009970089730807576
0 2 1754 0.0005678591709256105
0 0 729 0.0013623978201634877
0 0 867 0.0011467889908256881
0 2 1823 0.000546448087431694
0 0 743 0.001336898395721925
0 6 3584 0.0002781641168289291
0 3 1137 0.0008733624454148472
0 0 1959 0.0005091649694501018
1 0 828 0.002398081534772182
3 18 2071 0.0019074868860276585
1 0 3396 0.0005878894767783657
0 0 1921 0.0005192107995846313
2 11 5741 0.0005209237714881056
0 5 1454 0.0006830601092896175
2 4 1100 0.0027002700270027003
0 1 1302 0.0007645259938837921
0 1 2232 0.00044682752457551384
2 2 4003 0.0007477567298105683
1 5 1748 0.0011370096645821489
0 1 1517 0.0006565988181221273
0 1 369 0.0026666666666666666
nan
4 3 549 0.008912655971479501
2 3 1569 0

5 27 1134 0.005123825789923143
2 10 706 0.004149377593360996
10 0 0 0.7333333333333333
7 9 600 0.01288244766505636
11 3 421 0.02727272727272727
2 0 0 0.42857142857142855
nan
3 0 588 0.006711409395973154
nan
0 0 0 0.2
33 18 795 0.0399529964747356
26 3 160 0.13917525773195877
44 46 1140 0.03643724696356275
3 0 97 0.0380952380952381
2 0 456 0.0064794816414686825
nan
7 2 492 0.015810276679841896
1 0 0 0.3333333333333333
2 2 842 0.0035252643948296123
18 0 0 0.8260869565217391
1 0 0 0.3333333333333333
0 0 16 0.047619047619047616
4 2 1053 0.004699248120300752
1 0 0 0.3333333333333333
nan
12 6 498 0.02495201535508637
5 4 614 0.009554140127388535
nan
14 7 158 0.08152173913043478
0 0 0 0.2
1 4 556 0.0035335689045936395
0 1 202 0.004807692307692308
1 1 327 0.005988023952095809
4 0 334 0.014577259475218658
0 0 0 0.2
3 4 502 0.007782101167315175
1 1 871 0.002277904328018223
nan
0 0 499 0.001984126984126984
4 0 108 0.042735042735042736
nan
9 0 0 0.7142857142857143
0 6 1012 0.0009775171065493646
5 0 

0 8 4098 0.00024324981756263683
nan
2 0 6 0.23076923076923078
0 0 3947 0.00025303643724696357
0 0 1 0.16666666666666666
2 2 882 0.003367003367003367
2 13 2812 0.001059322033898305
4 6 2752 0.0018070112034694614
9 2 1022 0.009633911368015413
1 0 913 0.002176278563656148
0 0 2455 0.0004065040650406504
0 1 650 0.001524390243902439
2 8 2255 0.0013215859030837004
0 0 263 0.0037313432835820895
0 1 1438 0.0006925207756232687
0 10 3075 0.00032362459546925567
0 0 12 0.058823529411764705
1 0 3428 0.0005824111822947001
0 0 1469 0.0006784260515603799
0 1 1497 0.0006653359946773121
1 1 692 0.002861230329041488
0 5 3234 0.00030826140567200987
0 3 3237 0.0003081664098613251
0 2 4599 0.00021710811984368216
3 8 3119 0.0012759170653907496
0 0 3642 0.00027419797093501506
0 0 1202 0.0008285004142502071
nan
2 0 35 0.07142857142857142
nan
0 18 3855 0.00025786487880350697
0 4 3559 0.0002802690582959641
1 12 1452 0.0013605442176870747
2 1 786 0.003778337531486146
0 2 1027 0.0009671179883945841
2 0 0 0.4285714

1 0 0 0.3333333333333333
5 12 634 0.009146341463414634
2 4 5936 0.0005044560282495376
1 0 0 0.3333333333333333
1 5 1649 0.0012048192771084338
19 13 2859 0.006906077348066298
0 1 576 0.001718213058419244
1 6 1508 0.0013157894736842105
6 12 2539 0.00273224043715847
1 1 208 0.009302325581395349
2 1 915 0.0032502708559046588
13 26 1383 0.009810791871058164
1 0 161 0.011976047904191617
4 5 1500 0.0033025099075297227
0 3 2161 0.0004610419548178884
0 0 0 0.2
3 20 1700 0.0023148148148148147
7 22 1242 0.006269592476489028
5 0 81 0.06593406593406594
4 0 306 0.015873015873015872
0 5 738 0.001336898395721925
4 0 0 0.5555555555555556
3 3 750 0.005256241787122208
nan
0 1 1242 0.0008012820512820513
0 0 420 0.002352941176470588
4 12 809 0.006024096385542169
5 5 230 0.024489795918367346
nan
13 14 1760 0.0078125
17 21 1569 0.011166253101736972
2 1 1096 0.002717391304347826
50 79 1122 0.04060509554140127
1 1 1234 0.0016116035455278
3 0 44 0.07692307692307693
8 21 488 0.017241379310344827
4 8 2464 0.00201

0 2 3151 0.00031665611146295124
0 8 3663 0.0002720348204570185
0 1 3534 0.0002824858757062147
0 3 2461 0.0004050222762251924
4 32 5834 0.000851063829787234
4 7 2656 0.0018712574850299401
5 14 2214 0.002680965147453083
4 3 1297 0.0038197097020626434
2 1 756 0.003926701570680628
nan
0 4 1225 0.0008103727714748784
2 2 234 0.012345679012345678
0 3 2960 0.00033692722371967657
2 2 2035 0.0014677103718199608
8 10 2988 0.0029890401859847225
0 0 3384 0.00029507229271171436
7 1 235 0.03225806451612903
0 0 988 0.0010070493454179255
3 6 2056 0.001932367149758454
4 8 3673 0.0013550135501355014
2 3 1392 0.0021398002853067048
0 0 0 0.2
0 1 911 0.0010905125408942203
2 7 1513 0.0019646365422396855
nan
nan
1 8 3059 0.0006508298080052066
0 0 551 0.0017985611510791368
0 0 318 0.0030959752321981426
1 0 5 0.18181818181818182
4 0 2265 0.0021987686895338612
3 2 1578 0.0025188916876574307
nan
1 1 1061 0.0018726591760299626
2 0 0 0.42857142857142855
0 10 1408 0.0007027406886858749
3 4 667 0.005891016200294551
5

0 0 1555 0.000641025641025641
0 0 2705 0.00036900369003690036
0 1 1412 0.0007052186177715092
0 0 3525 0.00028328611898016995
0 2 1675 0.0005945303210463733
0 0 1424 0.0006997900629811056
0 0 3086 0.00032351989647363315
0 1 3603 0.0002770850651149903
0 1 448 0.0022026431718061676
0 2 2703 0.00036900369003690036
1 0 684 0.002898550724637681
0 0 2024 0.0004928536224741252
1 2 3363 0.0005932957579353308
2 1 3436 0.0008710801393728223
0 2 2469 0.0004038772213247173
1 2 1410 0.0014104372355430183
0 1 3095 0.0003224766204450177
1 0 2950 0.0006765899864682003
0 0 0 0.2
0 1 1782 0.0005592841163310962
0 0 808 0.0012300123001230013
0 0 2034 0.0004904364884747426
0 8 2833 0.00035137034434293746
1 1 1641 0.0012135922330097086
1 1 3312 0.0006025911419102139
0 5 1991 0.0004997501249375312
6 16 1003 0.006796116504854369
0 0 2032 0.0004909180166912126
0 2 3300 0.0003023888720895071
0 0 1679 0.0005938242280285036
nan
0 5 693 0.001422475106685633
2 10 5570 0.0005369608018614641
1 0 1062 0.001872659176029

In [56]:
df2['NG'] = NGlst
df2['NR'] = NRlst
df2['NE'] = NElst
df2[r'GDI(k=2)'] = di2_lst

In [53]:
df2TOP150.to_excel('DIscientomertics top150.xlsx')

In [58]:
df2

Unnamed: 0,meta_inf,doi,ti,tc,NG,NR,NE,GDI(k=2)
0,"Yasukawa, S 2015",10.1007/s11192-014-1466-5,Comparison of examiners' forward citations in ...,2,0.0,0.0,241.0,0.004065
1,"Nabout, JC 2015",10.1007/s11192-014-1385-5,Publish (in a group) or perish (alone): the tr...,43,2.0,4.0,1682.0,0.001772
2,"Guerrero-Bote, VP 2014",10.1007/s11192-014-1243-5,Relationship between downloads and citations a...,30,0.0,9.0,3381.0,0.000295
3,"Zhai, LH 2014",10.1007/s11192-014-1394-4,International comparative study on nanofiltrat...,4,1.0,2.0,2371.0,0.000841
4,"Dyachenko, EL 2014",10.1007/s11192-014-1357-9,Internationalization of academic journals: Is ...,17,1.0,2.0,2675.0,0.000745
...,...,...,...,...,...,...,...,...
6915,"Yang, GC 2015",10.1007/s11192-015-1763-7,Using the comprehensive patent citation networ...,26,2.0,4.0,3253.0,0.000919
6916,"Wang, LX 2015",10.1007/s11192-015-1727-y,A bibliometric analysis of research on Central...,6,0.0,0.0,1318.0,0.000756
6917,"Bjork, BC 2015",10.1007/s11192-015-1556-z,Article processing charges in OA journals: rel...,55,7.0,15.0,796.0,0.009721
6918,"Cavacini, A 2015",10.1007/s11192-014-1506-1,What is the best database for computer science...,29,3.0,3.0,3748.0,0.001064


In [57]:
df2.to_excel('DIscientomertics.xlsx')

# DRAFTS