In [3]:
import json
import gzip
import pyodbc
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import text
import urllib
from urllib.request import urlopen
import traceback
import logging
from azure.storage.blob import BlobServiceClient
import time
import pandas as pd
import os

In [4]:
#Connect to database
def connect_to_azure():
   
    params = urllib.parse.quote_plus(r'Driver={ODBC Driver 17 for SQL Server};Server=#DB_URL#;Database=#DBNAME#;Uid=#DB_ID#;Pwd=#DB_KEY#;Encrypt=yes;TrustServerCertificate=no;Connection Timeout=30;')

    conn_str = 'mssql+pyodbc:///?odbc_connect={}'.format(params)
    engine_azure = create_engine(conn_str,echo=False)

    print('connection is ok')
    print(engine_azure.table_names())

    return engine_azure

In [5]:
engine_azure = connect_to_azure()

connection is ok
['all_pubs_linked', 'cord_publications', 'history', 'join_all_pubs_projs', 'join_oa_cord_doi', 'join_src_pubs_rels', 'join_trg_pubs_rels', 'oa_projects', 'oa_publications', 'oa_publications_three', 'oa_publications_three_copy', 'oa_publications_two', 'oa_pubs', 'oa_relationships', 'oa_relationships_copy', 'oa_relationships_copy_two', 'projects', 'projects_two']


## Step 1. Get OpenAIRE publications that are in CORD19 and related to funded projects

In [6]:
df_final = pd.read_sql_table('join_all_pubs_projs', engine_azure)
df_final.head()

Unnamed: 0,oaid,oa_title,oa_doi,oa_pmid,oa_arxiv,oa_pub_date,cord_uid,cord_title,cord_doi,cord_pmcid,...,proj_oaid,proj_code,proj_title,proj_startdate,proj_enddate,currency,amount,jurisdiction,longname,shortname
0,50|dedup_wf_001::9001ec12701e4ce5a66adbd25a377437,GROMACS: High performance molecular simulation...,10.1080/07391102.2020.1824816,empty,empty,empty,h62ii7ir,Repurposing of the approved small molecule dru...,10.1080/07391102.2020.1824816,PMC7576931,...,40|corda_______::b21f6b1618151028fe7e3b4a2f2f894f,258980,Million-core Molecular Simulation,2011-05-01,2017-04-30,empty,0.0,EU;,European Commission;,EC;
1,50|dedup_wf_001::7667e9e8013e45658fb3eae0be158d10,Imaging and spectroscopy of domains of the cel...,10.1039/d0an00696c,empty,empty,empty,fhr40sr3,Imaging and spectroscopy of domains of the cel...,10.1039/d0an00696c,,...,40|corda__h2020::0875aa41d7225523933cecf91d83e5ab,665778,SUPPORTING MOBILITY IN THE ERA THROUGH AN INTE...,2015-09-01,2021-05-31,EUR,5841000.0,EU;,European Commission;,EC;
2,50|dedup_wf_001::e1d688bcaa8d8ff6760a1031106e130f,Subsumption Demodulation in First-Order Theore...,10.1007/978-3-030-51074-9_17,,empty,empty,pjnxv3zg,Subsumption Demodulation in First-Order Theore...,10.1007/978-3-030-51074-9_17,PMC7324223,...,40|fwf_________::91dbeb8c3d808e456bbe8df2403038ee,W 1255,Vollantrag zu Logical Methods in Computer Science,2014-03-01,2022-02-28,EUR,6385130.0,AT;,Austrian Science Fund (FWF);,FWF;
3,50|dedup_wf_001::7521e47071fa7dc2223634cf64d07fe3,Induction of Cell-Cell Fusion by Ebola Virus G...,10.1371/journal.ppat.1005373,26730950,empty,empty,7ycgd0h7,Induction of Cell-Cell Fusion by Ebola Virus G...,10.1371/journal.ppat.1005373,PMC4711667,...,40|nih_________::01565c59f11b660d408e6da3ca911200,5R01AI053668-14,Entry mechanisms used by a model retrovirus,2003-01-01,2018-04-30,empty,0.0,US;,National Institutes of Health;,NIH;
4,50|dedup_wf_001::2c17496023455b7cad411efe9b5e1cdc,Interferon-induced transmembrane protein 3 blo...,10.1371/journal.ppat.1007532,30640957,empty,empty,15wxk8lt,Interferon-induced transmembrane protein 3 blo...,10.1371/journal.ppat.1007532,PMC6347298,...,40|nih_________::01565c59f11b660d408e6da3ca911200,5R01AI053668-14,Entry mechanisms used by a model retrovirus,2003-01-01,2018-04-30,empty,0.0,US;,National Institutes of Health;,NIH;


In [7]:
#Count projects per funder
proj_by_funders = df_final.groupby('longname')['proj_oaid'].nunique()
proj_by_funders.head()

longname
Academy of Finland;                        46
Australian Research Council (ARC);         44
Austrian Science Fund (FWF);               34
Canadian Institutes of Health Research;     1
Croatian Science Foundation (CSF);          3
Name: proj_oaid, dtype: int64

In [8]:
#Count CORD publications by funders
cov_pubs_by_funders = df_final.groupby("longname")['oaid'].count()
cov_pubs_by_funders.head()

longname
Academy of Finland;                         85
Australian Research Council (ARC);          50
Austrian Science Fund (FWF);                53
Canadian Institutes of Health Research;    788
Croatian Science Foundation (CSF);           7
Name: oaid, dtype: int64

In [9]:
#Calculate the funded amount
pd.options.display.float_format = '{:.2f}'.format
amount_by_funders = df_final.groupby("longname", sort = True)['amount'].sum()

In [10]:
#Aggregate data by funders
new_df = pd.DataFrame()
new_df['funder'] = proj_by_funders.index
new_df['COVID projects'] = proj_by_funders.values
new_df['COVID publications'] = cov_pubs_by_funders.values
new_df['project to pub ratio'] = new_df['COVID publications'] / new_df['COVID projects']
new_df['amount'] = amount_by_funders.values
new_df['amount to pub ratio'] = new_df['amount'] / new_df['COVID publications']
new_df

Unnamed: 0,funder,COVID projects,COVID publications,project to pub ratio,amount,amount to pub ratio
0,Academy of Finland;,46,85,1.85,37316502.0,439017.67
1,Australian Research Council (ARC);,44,50,1.14,0.0,0.0
2,Austrian Science Fund (FWF);,34,53,1.56,94655566.0,1785954.08
3,Canadian Institutes of Health Research;,1,788,788.0,0.0,0.0
4,Croatian Science Foundation (CSF);,3,7,2.33,0.0,0.0
5,European Commission;,657,1814,2.76,6050751891.8,3335585.39
6,"Fundação para a Ciência e a Tecnologia, I.P.;",48,83,1.73,0.0,0.0
7,"Ministry of Education, Science and Technologic...",20,31,1.55,0.0,0.0
8,"Ministry of Science, Education and Sports of t...",4,9,2.25,0.0,0.0
9,National Health and Medical Research Council (...,192,398,2.07,0.0,0.0


In [11]:
#Drop duplicates
df_final = df_final.drop_duplicates(subset=['oaid'])

## Step 2. Use Semantic Scholar API to obtain citations/influential citations

In [20]:
#Given url, access semanticscholar api and get data
#returns dictionary of records
def get_sem_data(url):
    result = {}
    
    try:
        # store the response of URL
        response = urlopen(url)
        if response.status == 403:
            alert('Rate limited. Waiting to retry…')
            wait(response.retry-after)
            retry(url)
        
        # storing the JSON response 
        # from url in data
        data_json = json.loads(response.read())
        
        result['citations'] = str(data_json['numCitedBy'])
        result['influential citations'] = str(data_json['influentialCitationCount'])
        
        return result
    except Exception as e:
        if e.code == 404:
            pass
        else:
            logging.error(traceback.format_exc())

In [46]:
#Construct url and get semantic scholar data, saving it to the local folder
def get_semantic_scholar_citations():
    base_url = 'https://api.semanticscholar.org/v1/paper/'
    sem_list = []
    counter = 0
    start = 0
    for index, row in df_final.iterrows():
        if counter >= start:
            end_url = base_url + row['oa_doi']
            try:
                res_obj = get_sem_data(end_url)

                res_obj['oaid'] = row['oa_doi']

                sem_list.append(res_obj)
            except:
                print('problems with link')
            
            #We need to wait after each 100 requests due to API limitations
            if counter % 100 == 0:
                df_sem = pd.DataFrame(sem_list)
                filename = 'semantic_ + ' + str(start) + '_' + str(counter) + '.csv'
                df_sem.to_csv(filename,index=False)
                time.sleep(302)
            df_sem = pd.DataFrame(sem_list)
            filename = 'semantic_ + ' + str(start) + '_' + str(counter) + '.csv'
            df_sem.to_csv(filename,index=False)
        counter += 1

2001
https://api.semanticscholar.org/v1/paper/10.1007/s00249-020-01434-z
2002
https://api.semanticscholar.org/v1/paper/10.3390/vaccines8020186
2003
https://api.semanticscholar.org/v1/paper/10.1038/s41467-020-17543-4
2004
https://api.semanticscholar.org/v1/paper/10.1016/j.chest.2020.05.530
2005
https://api.semanticscholar.org/v1/paper/10.1111/pcmr.12776
2006
https://api.semanticscholar.org/v1/paper/10.1038/s41598-018-20002-2
2007
https://api.semanticscholar.org/v1/paper/10.1007/978-3-030-53288-8_6
2008
https://api.semanticscholar.org/v1/paper/10.1038/gene.2014.56
2009
https://api.semanticscholar.org/v1/paper/10.15172/pneu.2015.6/636
2010
https://api.semanticscholar.org/v1/paper/10.1016/j.addr.2014.11.004
2011
https://api.semanticscholar.org/v1/paper/10.1371/journal.ppat.1002155
2012
https://api.semanticscholar.org/v1/paper/10.1371/journal.ppat.1001175
2013
https://api.semanticscholar.org/v1/paper/10.1186/s13643-019-0991-y
2014
https://api.semanticscholar.org/v1/paper/10.1186/s13643-017-

2116
https://api.semanticscholar.org/v1/paper/10.1101/2020.01.30.927889
2117
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0028507
2118
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0229467
2119
https://api.semanticscholar.org/v1/paper/10.1186/1757-4749-5-22
2120
https://api.semanticscholar.org/v1/paper/10.1016/j.chom.2016.01.011
2121
https://api.semanticscholar.org/v1/paper/10.3390/biom7010029
2122
https://api.semanticscholar.org/v1/paper/10.1152/ajpgi.00116.2016
2123
https://api.semanticscholar.org/v1/paper/10.1038/s41536-020-0090-7
2124
https://api.semanticscholar.org/v1/paper/10.1016/j.celrep.2017.10.005
2125
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0076892
2126
https://api.semanticscholar.org/v1/paper/10.1186/s13054-014-0494-0
2127
https://api.semanticscholar.org/v1/paper/10.1186/1743-422X-6-86
2128
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.11.20061960
2129
https://api.semanticscholar.org/v1/paper/10.1371/journal.

2230
https://api.semanticscholar.org/v1/paper/10.1007/s40473-016-0076-3
2231
https://api.semanticscholar.org/v1/paper/10.1101/2020.05.08.20031666
2232
https://api.semanticscholar.org/v1/paper/10.1016/j.jiph.2019.04.013
2233
https://api.semanticscholar.org/v1/paper/10.1080/21505594.2020.1780088
2234
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.10.20059121
2235
https://api.semanticscholar.org/v1/paper/10.1378/chest.107.6.1570
2236
https://api.semanticscholar.org/v1/paper/10.1186/s40635-020-0295-5
2237
https://api.semanticscholar.org/v1/paper/10.1093/aje/kww013
2238
https://api.semanticscholar.org/v1/paper/10.1016/j.epidem.2016.04.002
2239
https://api.semanticscholar.org/v1/paper/10.1371/currents.outbreaks.98d2f8f3382d84f390736cd5f5fe133c
2240
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0194527
2241
https://api.semanticscholar.org/v1/paper/10.1098/rsos.180041
2242
https://api.semanticscholar.org/v1/paper/10.1186/s12865-014-0055-y
2243
https://api.semanticscho

2345
https://api.semanticscholar.org/v1/paper/10.1038/ncomms10680
2346
https://api.semanticscholar.org/v1/paper/10.1016/j.chom.2020.04.009
2347
https://api.semanticscholar.org/v1/paper/10.1016/j.it.2019.10.001
2348
https://api.semanticscholar.org/v1/paper/10.1007/978-1-4939-6993-7_29
2349
https://api.semanticscholar.org/v1/paper/10.1016/s2468-2667(20)30164-x
2350
https://api.semanticscholar.org/v1/paper/10.1186/1471-2458-3-5
2351
https://api.semanticscholar.org/v1/paper/10.1016/j.anai.2017.12.023
2352
https://api.semanticscholar.org/v1/paper/10.1016/j.micinf.2011.09.007
2353
https://api.semanticscholar.org/v1/paper/10.1128/msystems.00741-20
2354
https://api.semanticscholar.org/v1/paper/10.1002/oby.21106
2355
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.08.031203
2356
https://api.semanticscholar.org/v1/paper/10.3390/v11010025
2357
https://api.semanticscholar.org/v1/paper/10.1093/molbev/msy048
2358
https://api.semanticscholar.org/v1/paper/10.1101/2020.03.24.992230
2359
https:

2460
https://api.semanticscholar.org/v1/paper/10.1128/msphere.00379-16
2461
https://api.semanticscholar.org/v1/paper/10.1038/nrmicro2614
2462
https://api.semanticscholar.org/v1/paper/10.1007/s00134-004-2503-2
2463
https://api.semanticscholar.org/v1/paper/10.1038/sj.gt.3303085
2464
https://api.semanticscholar.org/v1/paper/10.1038/s41541-019-0109-5
2465
https://api.semanticscholar.org/v1/paper/10.1371/journal.ppat.1003389
2466
https://api.semanticscholar.org/v1/paper/10.1038/nchem.2325
2467
https://api.semanticscholar.org/v1/paper/10.1101/2020.02.07.939124
2468
https://api.semanticscholar.org/v1/paper/10.2147/MDER.S46044
2469
https://api.semanticscholar.org/v1/paper/10.4103/2152-7806.171239
2470
https://api.semanticscholar.org/v1/paper/10.1111/j.1600-065x.2011.01062.x
2471
https://api.semanticscholar.org/v1/paper/10.1038/nri3719
2472
https://api.semanticscholar.org/v1/paper/10.1038/nm1240
2473
https://api.semanticscholar.org/v1/paper/10.1038/s41522-019-0084-7
2474
https://api.semanticsch

2576
https://api.semanticscholar.org/v1/paper/10.1371/journal.pbio.0060226
2577
https://api.semanticscholar.org/v1/paper/10.1093/nar/gkr893
2578
https://api.semanticscholar.org/v1/paper/10.1099/jgv.0.001098
2579
https://api.semanticscholar.org/v1/paper/10.1161/strokeaha.115.011674
2580
https://api.semanticscholar.org/v1/paper/10.1126/science.abc0473
2581
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.23.20076521
2582
https://api.semanticscholar.org/v1/paper/10.1101/661579
2583
https://api.semanticscholar.org/v1/paper/10.1093/ve/veaa007
2584
https://api.semanticscholar.org/v1/paper/10.1128/JVI.00370-19
2585
https://api.semanticscholar.org/v1/paper/10.1093/cid/ciy656
2586
https://api.semanticscholar.org/v1/paper/10.1016/j.epidem.2015.12.002
2587
https://api.semanticscholar.org/v1/paper/10.1016/j.arr.2010.05.005
2588
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.11.036855
2589
https://api.semanticscholar.org/v1/paper/10.1101/2020.06.24.20139634
2590
https://api.semant

2692
https://api.semanticscholar.org/v1/paper/10.1007/s10393-015-1061-0
2693
https://api.semanticscholar.org/v1/paper/10.1038/srep17965
2694
https://api.semanticscholar.org/v1/paper/10.1093/mp/sss092
2695
https://api.semanticscholar.org/v1/paper/10.1016/j.antiviral.2019.104598
2696
https://api.semanticscholar.org/v1/paper/10.2471/blt.12.109447
2697
https://api.semanticscholar.org/v1/paper/10.1111/imr.12363
2698
https://api.semanticscholar.org/v1/paper/10.1126/science.1233028
2699
https://api.semanticscholar.org/v1/paper/10.1007/978-3-030-59291-2_8
2700
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.28.20082966
going to sleep
2701
https://api.semanticscholar.org/v1/paper/10.1016/j.tmaid.2020.101832
2702
https://api.semanticscholar.org/v1/paper/10.1111/jeb.13595
2703
https://api.semanticscholar.org/v1/paper/10.1007/978-3-319-16345-1_1
2704
https://api.semanticscholar.org/v1/paper/10.1371/journal.ppat.1005274
2705
https://api.semanticscholar.org/v1/paper/10.1371/JOURNAL.PMED.005

2807
https://api.semanticscholar.org/v1/paper/10.1016/j.virusres.2012.06.004
2808
https://api.semanticscholar.org/v1/paper/10.3201/eid1102.040524
2809
https://api.semanticscholar.org/v1/paper/10.7717/peerj.9255
2810
https://api.semanticscholar.org/v1/paper/10.1093/nar/gkq885
2811
https://api.semanticscholar.org/v1/paper/10.1038/nature03712
2812
https://api.semanticscholar.org/v1/paper/10.1093/heapro/dau074
2813
https://api.semanticscholar.org/v1/paper/10.1186/s13012-014-0197-6
2814
https://api.semanticscholar.org/v1/paper/10.1186/s12885-019-5520-9
2815
https://api.semanticscholar.org/v1/paper/10.1136/bmjopen-2019-035632
2816
https://api.semanticscholar.org/v1/paper/10.1186/s12913-019-4642-8
2817
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0024806
2818
https://api.semanticscholar.org/v1/paper/10.1186/s13643-015-0116-1
2819
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0134707
2820
https://api.semanticscholar.org/v1/paper/10.1136/neurintsurg-2017-013724


2923
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.20.20073023
2924
https://api.semanticscholar.org/v1/paper/10.1128/jvi.02557-15
2925
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0156739
2926
https://api.semanticscholar.org/v1/paper/10.1007/BF03405472
2927
https://api.semanticscholar.org/v1/paper/10.1016/j.pharmthera.2019.03.003
2928
https://api.semanticscholar.org/v1/paper/10.1016/j.cell.2008.02.043
2929
https://api.semanticscholar.org/v1/paper/10.1378/chest.08-0888
2930
https://api.semanticscholar.org/v1/paper/10.3390/biomedicines6010001
2931
https://api.semanticscholar.org/v1/paper/10.1093/infdis/jit839
2932
https://api.semanticscholar.org/v1/paper/10.1093/infdis/jis333
2933
https://api.semanticscholar.org/v1/paper/10.1016/j.jmb.2005.09.074
2934
https://api.semanticscholar.org/v1/paper/10.3201/eid1108.040449
2935
https://api.semanticscholar.org/v1/paper/10.1186/1471-2121-12-25
2936
https://api.semanticscholar.org/v1/paper/10.1101/19012153
2937
https://ap

3038
https://api.semanticscholar.org/v1/paper/10.1186/1471-2466-5-11
3039
https://api.semanticscholar.org/v1/paper/10.1016/j.healthplace.2020.102418
3040
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0236419
3041
https://api.semanticscholar.org/v1/paper/10.1016/j.tins.2020.04.004
3042
https://api.semanticscholar.org/v1/paper/10.1136/bmjopen-2019-036030
3043
https://api.semanticscholar.org/v1/paper/10.1101/2020.05.12.088542
3044
https://api.semanticscholar.org/v1/paper/10.1177/0148607117703956
3045
https://api.semanticscholar.org/v1/paper/10.1103/PhysRevX.2.031005
3046
https://api.semanticscholar.org/v1/paper/10.1136/bmjopen-2020-038621
3047
https://api.semanticscholar.org/v1/paper/10.1037/a0013105
3048
https://api.semanticscholar.org/v1/paper/10.1111/j.1524-475x.2012.00856.x
3049
https://api.semanticscholar.org/v1/paper/10.1093/cid/ciaa626
3050
https://api.semanticscholar.org/v1/paper/10.1038/s41467-020-17651-1
3051
https://api.semanticscholar.org/v1/paper/10.1152/jappl

3153
https://api.semanticscholar.org/v1/paper/10.1177/0844562120934237
3154
https://api.semanticscholar.org/v1/paper/10.1186/s13031-020-00289-7
3155
https://api.semanticscholar.org/v1/paper/10.1007/s10578-019-00912-6
3156
https://api.semanticscholar.org/v1/paper/10.1016/j.tips.2005.04.006
3157
https://api.semanticscholar.org/v1/paper/10.1086/520608
3158
https://api.semanticscholar.org/v1/paper/10.1016/j.jmr.2020.106802
3159
https://api.semanticscholar.org/v1/paper/10.1038/s41598-020-63974-w
3160
https://api.semanticscholar.org/v1/paper/10.1080/14760584.2019.1604231
3161
https://api.semanticscholar.org/v1/paper/10.1007/s00787-020-01634-0
3162
https://api.semanticscholar.org/v1/paper/10.1016/j.autrev.2020.102566
3163
https://api.semanticscholar.org/v1/paper/10.1093/phe/phaa026
3164
https://api.semanticscholar.org/v1/paper/10.1016/j.atmosenv.2015.03.039
3165
https://api.semanticscholar.org/v1/paper/10.1016/j.ceca.2020.102269
3166
https://api.semanticscholar.org/v1/paper/10.1002/eji.201948

3268
https://api.semanticscholar.org/v1/paper/10.1038/ncomms5323
3269
https://api.semanticscholar.org/v1/paper/10.1186/1471-2458-14-850
3270
https://api.semanticscholar.org/v1/paper/10.1038/srep46045
3271
https://api.semanticscholar.org/v1/paper/10.1073/pnas.1914087117
3272
https://api.semanticscholar.org/v1/paper/10.1016/j.virol.2010.06.019
3273
https://api.semanticscholar.org/v1/paper/10.1186/1743-422X-6-166
3274
https://api.semanticscholar.org/v1/paper/10.15252/emmm.201708078
3275
https://api.semanticscholar.org/v1/paper/10.1371/journal.ppat.1007944
3276
https://api.semanticscholar.org/v1/paper/10.1371/journal.pbio.2007044
3277
https://api.semanticscholar.org/v1/paper/10.1016/j.buildenv.2016.02.003
3278
https://api.semanticscholar.org/v1/paper/10.1128/mBio.01253-18
3279
https://api.semanticscholar.org/v1/paper/10.1371/journal.ppat.1001329
3280
https://api.semanticscholar.org/v1/paper/10.1016/j.vaccine.2009.09.024
3281
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.003

3383
https://api.semanticscholar.org/v1/paper/10.1111/jcmm.13173
3384
https://api.semanticscholar.org/v1/paper/10.1016/bs.apcsb.2017.06.004
3385
https://api.semanticscholar.org/v1/paper/10.1038/srep28672
3386
https://api.semanticscholar.org/v1/paper/10.1186/s12879-020-4933-z
3387
https://api.semanticscholar.org/v1/paper/10.7554/elife.47261
3388
https://api.semanticscholar.org/v1/paper/10.1017/ice.2020.286
3389
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.16.20067884
3390
https://api.semanticscholar.org/v1/paper/10.1101/2020.05.15.20103341
3391
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.19.20071803
3392
https://api.semanticscholar.org/v1/paper/10.1038/gim.2016.109
3393
https://api.semanticscholar.org/v1/paper/10.3390/medicines5030081
3394
https://api.semanticscholar.org/v1/paper/10.1007/s00253-019-09873-1
3395
https://api.semanticscholar.org/v1/paper/10.1093/infdis/jix584
3396
https://api.semanticscholar.org/v1/paper/10.1111/irv.12538
3397
https://api.semantics

3499
https://api.semanticscholar.org/v1/paper/10.1101/656686
3500
https://api.semanticscholar.org/v1/paper/10.1126/sciadv.aaz7809
going to sleep
3501
https://api.semanticscholar.org/v1/paper/10.1126/sciadv.abb6651
3502
https://api.semanticscholar.org/v1/paper/10.1093/nar/gkaa617
3503
https://api.semanticscholar.org/v1/paper/10.1103/PhysRevLett.124.212504
3504
https://api.semanticscholar.org/v1/paper/10.1002/ppul.23795
3505
https://api.semanticscholar.org/v1/paper/10.1007/978-3-030-45724-2_12
3506
https://api.semanticscholar.org/v1/paper/10.3390/ijms20081996
3507
https://api.semanticscholar.org/v1/paper/10.1016/j.bbagrm.2009.05.005
3508
https://api.semanticscholar.org/v1/paper/10.1101/2020.01.26.920132
3509
https://api.semanticscholar.org/v1/paper/10.1016/j.virol.2015.03.001
3510
https://api.semanticscholar.org/v1/paper/10.1155/2018/7089174
3511
https://api.semanticscholar.org/v1/paper/10.1016/j.abb.2013.07.018
3512
https://api.semanticscholar.org/v1/paper/10.1038/cr.2007.44
3513
https:

3614
https://api.semanticscholar.org/v1/paper/10.1126/science.aba9757
3615
https://api.semanticscholar.org/v1/paper/10.1186/1471-2334-14-509
3616
https://api.semanticscholar.org/v1/paper/10.1186/1743-422x-8-98
3617
https://api.semanticscholar.org/v1/paper/10.1016/j.atherosclerosis.2016.12.017
3618
https://api.semanticscholar.org/v1/paper/10.1007/978-3-030-53291-8_27
3619
https://api.semanticscholar.org/v1/paper/10.1007/978-3-030-53291-8_26
3620
https://api.semanticscholar.org/v1/paper/10.1007/978-3-030-45190-5_25
3621
https://api.semanticscholar.org/v1/paper/10.1007/s00418-013-1120-y
3622
https://api.semanticscholar.org/v1/paper/10.1371/journal.ppat.1002674
3623
https://api.semanticscholar.org/v1/paper/10.1002/emmm.201303236
3624
https://api.semanticscholar.org/v1/paper/10.1371/journal.ppat.1005684
3625
https://api.semanticscholar.org/v1/paper/10.1021/acs.est.0c01174
3626
https://api.semanticscholar.org/v1/paper/10.1093/infdis/jiw333
3627
https://api.semanticscholar.org/v1/paper/10.110

3730
https://api.semanticscholar.org/v1/paper/10.1016/s0378-1135(02)00300-0
3731
https://api.semanticscholar.org/v1/paper/10.1101/2020.05.11.089409
3732
https://api.semanticscholar.org/v1/paper/10.1530/REP-10-0282
3733
https://api.semanticscholar.org/v1/paper/10.1016/j.cmet.2020.07.016
3734
https://api.semanticscholar.org/v1/paper/10.1021/acs.analchem.0c02772
3735
https://api.semanticscholar.org/v1/paper/10.1101/2020.06.21.20136606
3736
https://api.semanticscholar.org/v1/paper/10.1146/annurev-animal-022114-111009
3737
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.27.052225
3738
https://api.semanticscholar.org/v1/paper/10.1016/j.fsi.2013.10.029
3739
https://api.semanticscholar.org/v1/paper/10.1128/jvi.01950-15
3740
https://api.semanticscholar.org/v1/paper/10.1007/s11548-015-1292-0
3741
https://api.semanticscholar.org/v1/paper/10.1007/s00253-011-3213-7
3742
https://api.semanticscholar.org/v1/paper/10.1002/phar.2402
3743
https://api.semanticscholar.org/v1/paper/10.1038/s41577-0

3845
https://api.semanticscholar.org/v1/paper/10.1128/msphere.00221-18
3846
https://api.semanticscholar.org/v1/paper/10.1038/nri.2016.100
3847
https://api.semanticscholar.org/v1/paper/10.1101/2020.03.20.20039966
3848
https://api.semanticscholar.org/v1/paper/10.1007/s00335-010-9273-7
3849
https://api.semanticscholar.org/v1/paper/10.1128/JVI.02261-13
3850
https://api.semanticscholar.org/v1/paper/10.3390/v3091699
3851
https://api.semanticscholar.org/v1/paper/10.1016/s0074-7696(06)52005-4
3852
https://api.semanticscholar.org/v1/paper/10.1136/bmjgh-2020-003607
3853
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0033389
3854
https://api.semanticscholar.org/v1/paper/10.1038/nrmicro.2016.182
3855
https://api.semanticscholar.org/v1/paper/10.1177/1553350615610650
3856
https://api.semanticscholar.org/v1/paper/10.3201/eid1811.120607
3857
https://api.semanticscholar.org/v1/paper/10.1097/MD.0000000000001104
3858
https://api.semanticscholar.org/v1/paper/10.1371/journal.pgen.1000612
385

3961
https://api.semanticscholar.org/v1/paper/10.1084/jem.20071164
3962
https://api.semanticscholar.org/v1/paper/10.1093/infdis/jit348
3963
https://api.semanticscholar.org/v1/paper/10.1371/journal.ppat.1003133
3964
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.19.20071373
3965
https://api.semanticscholar.org/v1/paper/10.1016/S0140-6736(16)30468-8
3966
https://api.semanticscholar.org/v1/paper/10.1186/gm119
3967
https://api.semanticscholar.org/v1/paper/10.1371/journal.pcbi.1002454
3968
https://api.semanticscholar.org/v1/paper/10.1101/634832
3969
https://api.semanticscholar.org/v1/paper/10.1186/1471-2458-12-1110
3970
https://api.semanticscholar.org/v1/paper/10.1016/j.virol.2009.12.020
3971
https://api.semanticscholar.org/v1/paper/10.1016/j.meegid.2017.09.028
3972
https://api.semanticscholar.org/v1/paper/10.1186/cc13812
3973
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0018222
3974
https://api.semanticscholar.org/v1/paper/10.1371/journal.pbio.1000219
3975
https:

4077
https://api.semanticscholar.org/v1/paper/10.1111/j.1750-2659.2009.00119.x
4078
https://api.semanticscholar.org/v1/paper/10.1136/bmjopen-2018-025036
4079
https://api.semanticscholar.org/v1/paper/10.1007/BF03399438
4080
https://api.semanticscholar.org/v1/paper/10.1093/ve/vev001
4081
https://api.semanticscholar.org/v1/paper/10.1177/1010539520956447
4082
https://api.semanticscholar.org/v1/paper/10.1186/s13613-018-0385-7
4083
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0214207
4084
https://api.semanticscholar.org/v1/paper/10.1016/j.scitotenv.2019.04.266
4085
https://api.semanticscholar.org/v1/paper/10.1186/1447-056X-7-5
4086
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.22.20075077
4087
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.25.20079129
4088
https://api.semanticscholar.org/v1/paper/10.1093/infdis/jir173
4089
https://api.semanticscholar.org/v1/paper/10.12688/wellcomeopenres.16225.2
4090
https://api.semanticscholar.org/v1/paper/10.1016/j.jin

4192
https://api.semanticscholar.org/v1/paper/10.1136/medethics-2020-106323
4193
https://api.semanticscholar.org/v1/paper/10.1111/bph.15101
4194
https://api.semanticscholar.org/v1/paper/10.3390/ncrna5010029
4195
https://api.semanticscholar.org/v1/paper/10.1128/jvi.01056-16
4196
https://api.semanticscholar.org/v1/paper/10.1002/eji.201746942
4197
https://api.semanticscholar.org/v1/paper/10.1093/ageing/afy180
4198
https://api.semanticscholar.org/v1/paper/10.1093/brain/awaa211
4199
https://api.semanticscholar.org/v1/paper/10.1093/ehjci/jeaa178
4200
https://api.semanticscholar.org/v1/paper/10.3389/fcimb.2019.00459
going to sleep
4201
https://api.semanticscholar.org/v1/paper/10.1183/13993003.01387-2020
4202
https://api.semanticscholar.org/v1/paper/10.1186/s12985-020-01400-3
4203
https://api.semanticscholar.org/v1/paper/10.3201/eid1601.081467
4204
https://api.semanticscholar.org/v1/paper/10.1002/jmv.25828
4205
https://api.semanticscholar.org/v1/paper/10.1002/bip.20706
4206
https://api.semanti

4308
https://api.semanticscholar.org/v1/paper/10.1016/j.semcdb.2020.08.006
4309
https://api.semanticscholar.org/v1/paper/10.1111/hex.12933
4310
https://api.semanticscholar.org/v1/paper/10.1002/cti2.1109
4311
https://api.semanticscholar.org/v1/paper/10.1186/1743-422X-3-68
4312
https://api.semanticscholar.org/v1/paper/10.1155/2012/236345
4313
https://api.semanticscholar.org/v1/paper/10.5435/jaaos-d-19-00074
4314
https://api.semanticscholar.org/v1/paper/10.1186/s13054-018-2281-9
4315
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.04.20052696
4316
https://api.semanticscholar.org/v1/paper/10.1080/10550887.2020.1791379
4317
https://api.semanticscholar.org/v1/paper/10.1128/JVI.00505-19
4318
https://api.semanticscholar.org/v1/paper/10.1099/vir.0.034983-0
4319
https://api.semanticscholar.org/v1/paper/10.1038/nsmb.1680
4320
https://api.semanticscholar.org/v1/paper/10.1186/s12951-017-0295-0
4321
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0157620
4322
https://api.seman

4424
https://api.semanticscholar.org/v1/paper/10.1186/1471-2105-12-219
4425
https://api.semanticscholar.org/v1/paper/10.1038/tp.2017.13
4426
https://api.semanticscholar.org/v1/paper/10.1101/2020.02.10.942136
4427
https://api.semanticscholar.org/v1/paper/10.1016/j.coviro.2011.06.008
4428
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0021042
4429
https://api.semanticscholar.org/v1/paper/10.1007/s00464-013-3066-5
4430
https://api.semanticscholar.org/v1/paper/10.1016/j.bej.2008.12.004
4431
https://api.semanticscholar.org/v1/paper/10.1111/brv.12632
4432
https://api.semanticscholar.org/v1/paper/10.1073/pnas.1101684108
4433
https://api.semanticscholar.org/v1/paper/10.1038/ncomms14617
4434
https://api.semanticscholar.org/v1/paper/10.1111/jvs.12151
4435
https://api.semanticscholar.org/v1/paper/10.2807/1560-7917.ES.2017.22.36.30609
4436
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0147041
4437
https://api.semanticscholar.org/v1/paper/10.1159/000440835
4438
https:

4539
https://api.semanticscholar.org/v1/paper/10.3201/eid2306.161417
4540
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0225576
4541
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.26.20075937
4542
https://api.semanticscholar.org/v1/paper/10.1101/2020.02.19.20025452
4543
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.22.20075762
4544
https://api.semanticscholar.org/v1/paper/10.1101/2020.03.16.20037176
4545
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.23.20075796
4546
https://api.semanticscholar.org/v1/paper/10.1101/2020.05.03.20089920
4547
https://api.semanticscholar.org/v1/paper/10.1136/bmj.f288
4548
https://api.semanticscholar.org/v1/paper/10.1136/bmjopen-2011-000544
4549
https://api.semanticscholar.org/v1/paper/10.1101/790444
4550
https://api.semanticscholar.org/v1/paper/10.1098/rsif.2017.0115
4551
https://api.semanticscholar.org/v1/paper/10.1371/journal.pcbi.1006204
4552
https://api.semanticscholar.org/v1/paper/10.1101/247734
4553
http

4654
https://api.semanticscholar.org/v1/paper/10.1016/j.drudis.2014.06.018
4655
https://api.semanticscholar.org/v1/paper/10.3390/v11020197
4656
https://api.semanticscholar.org/v1/paper/10.1371/journal.ppat.1003232
4657
https://api.semanticscholar.org/v1/paper/10.1098/rspb.2020.1039
4658
https://api.semanticscholar.org/v1/paper/10.1103/PhysRevLett.124.211101
4659
https://api.semanticscholar.org/v1/paper/10.1016/j.healthplace.2020.102396
4660
https://api.semanticscholar.org/v1/paper/10.1016/j.virol.2011.11.016
4661
https://api.semanticscholar.org/v1/paper/10.1038/srep18610
4662
https://api.semanticscholar.org/v1/paper/10.1101/2020.02.09.20021261
4663
https://api.semanticscholar.org/v1/paper/10.1038/s41598-017-17749-5
4664
https://api.semanticscholar.org/v1/paper/10.3201/eid2407.172059
4665
https://api.semanticscholar.org/v1/paper/10.1073/pnas.1811115115
4666
https://api.semanticscholar.org/v1/paper/10.1038/nrd.2018.93
4667
https://api.semanticscholar.org/v1/paper/10.1016/j.vaccine.2008.1

4768
https://api.semanticscholar.org/v1/paper/10.1016/j.tmaid.2020.101806
4769
https://api.semanticscholar.org/v1/paper/10.1136/bmjgh-2020-002307
4770
https://api.semanticscholar.org/v1/paper/10.1038/s41467-020-17920-z
4771
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0219778
4772
https://api.semanticscholar.org/v1/paper/10.1016/j.annonc.2020.03.286
4773
https://api.semanticscholar.org/v1/paper/10.1021/acs.biochem.0c00447
4774
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0186854
4775
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0159709
4776
https://api.semanticscholar.org/v1/paper/10.1016/j.virusres.2014.05.023
4777
https://api.semanticscholar.org/v1/paper/10.3201/eid1309.070491
4778
https://api.semanticscholar.org/v1/paper/10.1016/j.virol.2007.09.045
4779
https://api.semanticscholar.org/v1/paper/10.14336/ad.2017.0613
4780
https://api.semanticscholar.org/v1/paper/10.1038/pr.2012.179
4781
https://api.semanticscholar.org/v1/paper/10.1086

4884
https://api.semanticscholar.org/v1/paper/10.3201/eid2012.140684
4885
https://api.semanticscholar.org/v1/paper/10.3389/fendo.2019.00767
4886
https://api.semanticscholar.org/v1/paper/10.3390/ijms19082154
4887
https://api.semanticscholar.org/v1/paper/10.3389/fimmu.2017.00446
4888
https://api.semanticscholar.org/v1/paper/10.1371/journal.pntd.0002933
4889
https://api.semanticscholar.org/v1/paper/10.1186/s13643-018-0820-8
4890
https://api.semanticscholar.org/v1/paper/10.1128/mbio.00862-13
4891
https://api.semanticscholar.org/v1/paper/10.1016/j.jcv.2013.04.011
4892
https://api.semanticscholar.org/v1/paper/10.1371/journal.pntd.0003143
4893
https://api.semanticscholar.org/v1/paper/10.1101/2020.05.11.089375
4894
https://api.semanticscholar.org/v1/paper/10.1016/j.chaos.2018.02.010
4895
https://api.semanticscholar.org/v1/paper/10.1186/s13567-018-0575-1
4896
https://api.semanticscholar.org/v1/paper/10.1038/s41396-020-0685-4
4897
https://api.semanticscholar.org/v1/paper/10.1038/nsmb.2544
4898
h

5000
https://api.semanticscholar.org/v1/paper/10.2147/jpr.s143512
going to sleep
5001
https://api.semanticscholar.org/v1/paper/10.3390/v12010083
5002
https://api.semanticscholar.org/v1/paper/10.1038/s41372-019-0318-9
5003
https://api.semanticscholar.org/v1/paper/10.1101/2020.05.16.20098657
5004
https://api.semanticscholar.org/v1/paper/10.1093/infdis/jir691
5005
https://api.semanticscholar.org/v1/paper/10.1126/science.abb6144
5006
https://api.semanticscholar.org/v1/paper/10.3892/ol.2016.5550
5007
https://api.semanticscholar.org/v1/paper/10.1371/journal.pntd.0006642
5008
https://api.semanticscholar.org/v1/paper/10.1002/wer.1229
5009
https://api.semanticscholar.org/v1/paper/10.1038/srep10840
5010
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.29.20085456
5011
https://api.semanticscholar.org/v1/paper/10.1002/cbic.202000238
5012
https://api.semanticscholar.org/v1/paper/10.1186/cc10116
5013
https://api.semanticscholar.org/v1/paper/10.1111/irv.12474
5014
https://api.semanticscholar.

5116
https://api.semanticscholar.org/v1/paper/10.11604/pamj.2013.16.102.1450
5117
https://api.semanticscholar.org/v1/paper/10.3390/v7122957
5118
https://api.semanticscholar.org/v1/paper/10.1007/s11908-011-0168-x
5119
https://api.semanticscholar.org/v1/paper/10.1007/s00277-020-04281-2
5120
https://api.semanticscholar.org/v1/paper/10.3201/eid2511.190157
5121
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0030802
5122
https://api.semanticscholar.org/v1/paper/10.1016/j.virusres.2007.02.015
5123
https://api.semanticscholar.org/v1/paper/10.1007/s11481-010-9231-x
5124
https://api.semanticscholar.org/v1/paper/10.1007/s10875-011-9627-2
5125
https://api.semanticscholar.org/v1/paper/10.1038/npp.2016.116
5126
https://api.semanticscholar.org/v1/paper/10.1111/irv.12440
5127
https://api.semanticscholar.org/v1/paper/10.1021/mp400631w
5128
https://api.semanticscholar.org/v1/paper/10.1098/rsos.171435
5129
https://api.semanticscholar.org/v1/paper/10.1371/journal.pone.0146599
5130
https://a

5231
https://api.semanticscholar.org/v1/paper/10.2196/14731
5232
https://api.semanticscholar.org/v1/paper/10.1101/2020.04.13.039321
5233
https://api.semanticscholar.org/v1/paper/10.1093/pubmed/fdaa104
5234
https://api.semanticscholar.org/v1/paper/10.1016/s2589-7500(20)30184-9
5235
https://api.semanticscholar.org/v1/paper/10.1101/2020.05.04.20079301
5236
https://api.semanticscholar.org/v1/paper/10.1098/rsif.2010.0216
5237
https://api.semanticscholar.org/v1/paper/10.1098/rsif.2011.0309
5238
https://api.semanticscholar.org/v1/paper/10.1177/0020731420946590
5239
https://api.semanticscholar.org/v1/paper/10.1073/pnas.0908491107
5240
https://api.semanticscholar.org/v1/paper/10.1136/bmj.b2651
5241
https://api.semanticscholar.org/v1/paper/10.1099/vir.0.032094-0
5242
https://api.semanticscholar.org/v1/paper/10.1099/vir.0.028381-0
5243
https://api.semanticscholar.org/v1/paper/10.1186/s13643-019-0970-3
5244
https://api.semanticscholar.org/v1/paper/10.1186/s11556-019-0213-6
5245
https://api.semanti

5347
https://api.semanticscholar.org/v1/paper/10.1186/s12992-018-0415-0
5348
https://api.semanticscholar.org/v1/paper/10.1111/jth.15084
5349
https://api.semanticscholar.org/v1/paper/10.1093/ve/vez033
5350
https://api.semanticscholar.org/v1/paper/10.1080/08959420.2020.1765685
5351
https://api.semanticscholar.org/v1/paper/10.1007/s00251-020-01174-6
5352
https://api.semanticscholar.org/v1/paper/10.1111/dewb.12157
5353
https://api.semanticscholar.org/v1/paper/10.1109/access.2020.3001298
5354
https://api.semanticscholar.org/v1/paper/10.1186/s13100-015-0047-3
5355
https://api.semanticscholar.org/v1/paper/10.1101/2020.05.14.20101378
5356
https://api.semanticscholar.org/v1/paper/10.1111/1753-6405.13015
5357
https://api.semanticscholar.org/v1/paper/10.1007/978-3-030-49165-9_5
5358
https://api.semanticscholar.org/v1/paper/10.1007/978-3-030-49165-9_3
5359
https://api.semanticscholar.org/v1/paper/10.1186/2051-5960-2-3
5360
https://api.semanticscholar.org/v1/paper/10.1186/1471-2458-11-337
5361
http

"for item in obj['pid']:\n   end_url = get_end_url(item['scheme'], item['value'])\n   #skip other schemes for now\n   if not end_url:\n       continue\n   full_url = base_url + end_url\n   print(full_url)\n   get_sem_data(full_url)\n"

In [17]:
def get_citation_table():
    filenames = os.listdir()
    semantic_files = [filename for filename in filenames if filename.startswith('semantic_')]
    
    dfs = [pd.read_csv(filename) for filename in semantic_files]
    final_df = pd.concat(dfs, ignore_index=True)
    
    df_sum_unique = final_df.drop_duplicates()

    return df_sum_unique

In [18]:
#First, files from semantic_data have to be unpacked into the same folder as the notebook
citation_table = get_citation_table()
citation_table.head()

Unnamed: 0,citations,influential citations,oaid
0,6,0,10.1080/07391102.2020.1824816
1,0,0,10.1039/d0an00696c
2,4,0,10.1007/978-3-030-51074-9_17
3,30,2,10.1371/journal.ppat.1005373
4,34,7,10.1371/journal.ppat.1007532


## Step 3. Enrich final table with citations and influential citations 

In [20]:
df = pd.merge(df_final,citation_table,left_on='oa_doi', right_on='oaid',how='left')

In [21]:
#Export to excel
df.to_excel('aLL_rows_table.xlsx')

### Extract data for European Commission

In [22]:
ecdf = df.loc[df['shortname'] == 'EC;']
ecdf

Unnamed: 0,oaid_x,oa_title,oa_doi,oa_pmid,oa_arxiv,oa_pub_date,cord_uid,cord_title,cord_doi,cord_pmcid,...,proj_startdate,proj_enddate,currency,amount,jurisdiction,longname,shortname,citations,influential citations,oaid_y
0,50|dedup_wf_001::9001ec12701e4ce5a66adbd25a377437,GROMACS: High performance molecular simulation...,10.1080/07391102.2020.1824816,empty,empty,empty,h62ii7ir,Repurposing of the approved small molecule dru...,10.1080/07391102.2020.1824816,PMC7576931,...,2011-05-01,2017-04-30,empty,0.00,EU;,European Commission;,EC;,6.00,0.00,10.1080/07391102.2020.1824816
1,50|dedup_wf_001::7667e9e8013e45658fb3eae0be158d10,Imaging and spectroscopy of domains of the cel...,10.1039/d0an00696c,empty,empty,empty,fhr40sr3,Imaging and spectroscopy of domains of the cel...,10.1039/d0an00696c,,...,2015-09-01,2021-05-31,EUR,5841000.00,EU;,European Commission;,EC;,0.00,0.00,10.1039/d0an00696c
35,50|dedup_wf_001::ba6d49008ea5b759d7842bab41ef100c,Treatment of COVID-19 Pneumonia: the Case for ...,10.1007/s12015-020-10004-x,32696426,empty,empty,9j9ni3wg,Treatment of COVID-19 Pneumonia: the Case for ...,10.1007/s12015-020-10004-x,PMC7372209,...,2020-01-01,2024-12-31,EUR,6844620.00,EU;,European Commission;,EC;,2.00,0.00,10.1007/s12015-020-10004-x
36,50|dedup_wf_001::545751d3799352206b93e1a4f2b34b4a,Histo-blood group glycans in the context of pe...,10.1016/j.bbagen.2015.12.026,26748235,empty,empty,5p9j5xj0,Histo-blood group glycans in the context of pe...,10.1016/j.bbagen.2015.12.026,PMC7117023,...,2015-09-01,2019-08-31,EUR,3293890.00,EU;,European Commission;,EC;,18.00,1.00,10.1016/j.bbagen.2015.12.026
37,50|dedup_wf_001::4e9ab37f254220b9f1f1950283065f58,How to organize an online conference,10.1038/s41578-020-0194-0,empty,empty,empty,hg4edj36,How to organize an online conference,10.1038/s41578-020-0194-0,PMC7095294,...,2017-04-01,2022-09-30,EUR,1999890.00,EU;,European Commission;,EC;,20.00,0.00,10.1038/s41578-020-0194-0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5410,50|dedup_wf_001::0aa743d2ff4a7f111c7381e39cd7579d,Characterization of a Novel RNA Virus Discover...,10.3390/v9080214,,empty,empty,1baso3q2,Characterization of a Novel RNA Virus Discover...,10.3390/v9080214,PMC5580471,...,2011-01-01,2015-12-31,empty,0.00,EU;,European Commission;,EC;,6.00,0.00,10.3390/v9080214
5411,50|dedup_wf_001::bf94c29c77af374502de61584b6f76ec,Predicting the effects of parasite co-infectio...,10.1098/rspb.2017.2610,29540516,empty,empty,vcrbzzhu,Predicting the effects of parasite co-infectio...,10.1098/rspb.2017.2610,PMC5879626,...,2015-05-22,2016-08-21,EUR,112673.00,EU;,European Commission;,EC;,14.00,0.00,10.1098/rspb.2017.2610
5423,50|dedup_wf_001::483a90ed604f97e529e467abba4ad91f,Harnessing publicly available genetic data to ...,10.1007/s00439-016-1647-9,26946290,empty,empty,wrwp7ogi,Harnessing publicly available genetic data to ...,10.1007/s00439-016-1647-9,PMC4835528,...,2013-10-01,2017-03-31,empty,0.00,EU;,European Commission;,EC;,10.00,2.00,10.1007/s00439-016-1647-9
5424,50|dedup_wf_001::967b21879fc0fb82d8c255296472ecba,Comparison of phenotypic and genotypic diagnos...,10.1016/j.jcv.2019.09.003,31521013,empty,empty,awr2tbj0,Comparison of phenotypic and genotypic diagnos...,10.1016/j.jcv.2019.09.003,PMC7106360,...,2013-09-01,2017-02-28,empty,0.00,EU;,European Commission;,EC;,1.00,0.00,10.1016/j.jcv.2019.09.003


### Find top projects by citations

In [23]:
ecdf.groupby(['proj_title'])['citations'].agg('sum').reset_index().sort_values(by=['citations'], ascending=False).to_excel('top_proj.xlsx')

### Find top projects by influential citations

In [24]:
ecdf.groupby(['proj_title'])['influential citations'].agg('sum').reset_index().sort_values(by=['influential citations'], ascending=False).to_excel('top_proj_inf.xlsx')

### Find top projects by publication count

In [25]:
ecdf.groupby(['proj_title'])['citations'].agg('count').reset_index().sort_values(by=['citations'], ascending=False).to_excel('top_proj_pub_count.xlsx')

## Step 4. Aggregate the final table by funders

In [77]:
df.loc[df['citations'] > 0]

Unnamed: 0,oaid_x,oa_title,oa_doi,oa_pmid,oa_arxiv,oa_pub_date,cord_uid,cord_title,cord_doi,cord_pmcid,...,proj_startdate,proj_enddate,currency,amount,jurisdiction,longname,shortname,citations,influential citations,oaid_y
101,50|dedup_wf_001::3c8cd854535bbf53a45363e6fae9afd7,Efficient Sensing of Infected Cells in Absence...,10.1371/journal.ppat.1003412,23785283,empty,empty,2mceonc1,Efficient Sensing of Infected Cells in Absence...,10.1371/journal.ppat.1003412,PMC3681750,...,2010-04-01,2014-03-31,empty,0.00,EU;,European Commission;,EC;,41.00,2.00,10.1371/journal.ppat.1003412
102,50|dedup_wf_001::35cb19f220f7d3974726f3ce2a52df5e,COVID-19 pandemic changes the food consumption...,10.1016/j.tifs.2020.08.017,empty,empty,empty,l5r1hl8x,COVID-19 pandemic changes the food consumption...,10.1016/j.tifs.2020.08.017,PMC7462788,...,2019-10-01,2023-09-30,EUR,10189600.00,EU;,European Commission;,EC;,11.00,0.00,10.1016/j.tifs.2020.08.017
103,50|dedup_wf_001::c7adc248fd2b293c1b5de76ddd918896,Social network-based distancing strategies to ...,10.1038/s41562-020-0898-6,17283616,empty,empty,qpzg8lam,Social network-based distancing strategies to ...,10.1038/s41562-020-0898-6,,...,2019-11-01,2024-10-31,EUR,2499810.00,EU;,European Commission;,EC;,174.00,10.00,10.1038/s41562-020-0898-6
104,50|dedup_wf_001::c0dc5cb4e9c24974eea633931eb9d797,Demographic science aids in understanding the ...,10.1101/2020.03.15.20036293,32400861,empty,empty,gv8wlo06,Demographic science aids in understanding the ...,10.1101/2020.03.15.20036293,,...,2019-11-01,2024-10-31,EUR,2499810.00,EU;,European Commission;,EC;,13.00,0.00,10.1101/2020.03.15.20036293
105,50|dedup_wf_001::1568062058ffd299a162b5d4ac08510a,"Forecasting spatial, socioeconomic and demogra...",10.1186/s12916-020-01646-2,32594909,empty,empty,5nnlyavp,"Forecasting spatial, socioeconomic and demogra...",10.1186/s12916-020-01646-2,PMC7321716,...,2019-11-01,2024-10-31,EUR,2499810.00,EU;,European Commission;,EC;,13.00,0.00,10.1186/s12916-020-01646-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5429,50|dedup_wf_001::b0fdbfba077427c97120a881845d4780,Mesenchymal Stem Cell Infusion Shows Promise f...,10.14336/ad.2020.0301,32257554,empty,empty,v73bsrwg,Mesenchymal Stem Cell Infusion Shows Promise f...,10.14336/ad.2020.0301,PMC7069463,...,2018-06-15,2023-05-31,empty,0.00,US;,National Institutes of Health;,NIH;,61.00,5.00,10.14336/ad.2020.0301
5430,50|dedup_wf_001::829344a03f04c2a1a9445658316d15f8,Myocardial T1 mapping and extracellular volume...,10.1186/1532-429X-15-92,24124732,empty,empty,ogv0yga9,Myocardial T1 mapping and extracellular volume...,10.1186/1532-429x-15-92,,...,empty,empty,empty,0.00,US;,National Institutes of Health;,NIH;,735.00,26.00,10.1186/1532-429X-15-92
5431,50|dedup_wf_001::de458fac816db7268d5ec90fca9e590b,Zika and Flavivirus Shell Disorder: Virulence ...,10.3390/biom9110710,31698857,empty,empty,nmt221tu,Zika and Flavivirus Shell Disorder: Virulence ...,10.3390/biom9110710,PMC6920988,...,2010-08-01,2021-07-31,empty,0.00,US;,National Science Foundation;,NSF;,13.00,0.00,10.3390/biom9110710
5432,50|dedup_wf_001::1cd6834b6cdd391dc04b4d2137e322bb,HIV Vaccine Mystery and Viral Shell Disorder,10.3390/biom9050178,31072073,empty,empty,q28h5zi8,HIV Vaccine Mystery and Viral Shell Disorder,10.3390/biom9050178,PMC6572542,...,2010-08-01,2021-07-31,empty,0.00,US;,National Science Foundation;,NSF;,19.00,2.00,10.3390/biom9050178


### Count projects per funder

In [62]:
proj_by_funders = df.groupby('longname')['proj_oaid'].nunique()
proj_by_funders.head()

longname
Academy of Finland;                        31
Australian Research Council (ARC);         38
Austrian Science Fund (FWF);               26
Canadian Institutes of Health Research;     1
Croatian Science Foundation (CSF);          3
Name: proj_oaid, dtype: int64

### Count CORD publications by funders

In [26]:
cov_pubs_by_funders = df.groupby("longname")['oaid_x'].count()
cov_pubs_by_funders.head()

longname
Academy of Finland;                         39
Australian Research Council (ARC);          42
Austrian Science Fund (FWF);                27
Canadian Institutes of Health Research;    472
Croatian Science Foundation (CSF);           4
Name: oaid_x, dtype: int64

In [27]:
pd.options.display.float_format = '{:.2f}'.format
amount_by_funders = df.groupby("longname", sort = True)['amount'].sum()

### Count citations and influential citations by funders

In [28]:
citations_by_funders = df.groupby("longname", sort = True)['citations'].sum()
citations_by_funders

longname
Academy of Finland;                                                                      952.00
Australian Research Council (ARC);                                                       502.00
Austrian Science Fund (FWF);                                                            1144.00
Canadian Institutes of Health Research;                                                24166.00
Croatian Science Foundation (CSF);                                                        67.00
European Commission;                                                                   39592.00
Fundação para a Ciência e a Tecnologia, I.P.;                                           2176.00
Ministry of Education, Science and Technological Development of Republic of Serbia;      244.00
Ministry of Science, Education and Sports of the Republic of Croatia (MSES);             108.00
National Health and Medical Research Council (NHMRC);                                   4103.00
National Institutes of Health; 

In [66]:
inf_citations_by_funders = df.groupby("longname", sort = True)['influential citations'].sum()
inf_citations_by_funders

longname
Academy of Finland;                                                                     45.00
Australian Research Council (ARC);                                                      22.00
Austrian Science Fund (FWF);                                                            48.00
Canadian Institutes of Health Research;                                                997.00
Croatian Science Foundation (CSF);                                                       3.00
European Commission;                                                                  1996.00
Fundação para a Ciência e a Tecnologia, I.P.;                                           89.00
Ministry of Education, Science and Technological Development of Republic of Serbia;     12.00
Ministry of Science, Education and Sports of the Republic of Croatia (MSES);             2.00
National Health and Medical Research Council (NHMRC);                                  164.00
National Institutes of Health;                     

### Construct the final dataframe for export to Excel

In [67]:
new_df = pd.DataFrame()
new_df['funder'] = proj_by_funders.index
new_df['COVID projects'] = proj_by_funders.values
new_df['COVID publications'] = cov_pubs_by_funders.values
new_df['project to pub ratio'] = new_df['COVID publications'] / new_df['COVID projects']
new_df['funding'] = amount_by_funders.values
new_df['funding to pub ratio'] = new_df['funding'] / new_df['COVID publications']
new_df['citations'] = citations_by_funders.values
new_df['influential citations'] = inf_citations_by_funders.values
new_df['citations to pub ratio'] = new_df['citations'] / new_df['COVID publications']
new_df['influential citations to pub ratio'] = new_df['influential citations'] / new_df['COVID publications']
new_df['funding to citations ratio'] = new_df['funding'] / new_df['citations']
new_df['funding to influential citations ratio'] = new_df['funding'] / new_df['influential citations']
new_df

Unnamed: 0,funder,COVID projects,COVID publications,project to pub ratio,funding,funding to pub ratio,citations,influential citations,citations to pub ratio,influential citations to pub ratio,funding to citations ratio,funding to influential citations ratio
0,Academy of Finland;,31,39,1.26,17766222.0,455544.15,735.0,45.0,18.85,1.15,24171.73,394804.93
1,Australian Research Council (ARC);,38,42,1.11,0.0,0.0,475.0,22.0,11.31,0.52,0.0,0.0
2,Austrian Science Fund (FWF);,26,27,1.04,39467117.0,1461745.07,1140.0,48.0,42.22,1.78,34620.28,822231.6
3,Canadian Institutes of Health Research;,1,472,472.0,0.0,0.0,24166.0,997.0,51.2,2.11,0.0,0.0
4,Croatian Science Foundation (CSF);,3,4,1.33,0.0,0.0,67.0,3.0,16.75,0.75,0.0,0.0
5,European Commission;,543,894,1.65,2947962098.4,3297496.75,39019.0,1996.0,43.65,2.23,75551.96,1476934.92
6,"Fundação para a Ciência e a Tecnologia, I.P.;",28,30,1.07,0.0,0.0,2176.0,89.0,72.53,2.97,0.0,0.0
7,"Ministry of Education, Science and Technologic...",12,13,1.08,0.0,0.0,244.0,12.0,18.77,0.92,0.0,0.0
8,"Ministry of Science, Education and Sports of t...",3,4,1.33,0.0,0.0,108.0,2.0,27.0,0.5,0.0,0.0
9,National Health and Medical Research Council (...,120,143,1.19,0.0,0.0,4048.0,164.0,28.31,1.15,0.0,0.0


### Export to Excel

In [None]:
new_df.to_excel('final_table.xlsx')