# 1. Using OpenAlex to find taxonomists

## 1.2. Preprocessing OpenAlex article data into author data

In [2]:
import pandas as pd
import matplotlib.pyplot as plt # version 3.5.2
import pickle

## Functions

In [3]:
def get_dict_info(df_input):
    hostcols = ['host_id', 'issn_l', 'issn', 'host_display_name', 'publisher',
                'host_type', 'host_url', 'is_host_oa', 'host_version', 'host_license']
    
    #df = pd.DataFrame(columns = hostcols + ["is_oa", "oa_status", "oa_url"])
    new_rows = []
    
    for article in df_input.itertuples():
        # get host (journal) info
        # if there is a list within the dictionary, pandas will turn it into two rows
        if article.host_venue["issn"] != None and len(article.host_venue["issn"]) != 1:
            article.host_venue["issn"] = '\n'.join(article.host_venue["issn"])
            
        l_journal = list(article.host_venue.values())
        l_oa = list(article.open_access.values())
        # unite open access and journal info from this article and previous articles
        l_new = l_journal + l_oa
        
        new_rows.append(l_new)
    
    # unite data in dictionaries with accessible data
    new_df = pd.DataFrame(new_rows, columns=hostcols + ["is_oa", "oa_status", "oa_url"])
    return df_input.merge(new_df, left_index=True, right_index=True)

In [4]:
# get authorship information from raw dataframe WITH all other data

def get_authors(df_input):
    # create empty dataframe with all authorship attributes
    df = pd.DataFrame()
    authors_list = []
    
    for article in df_input.itertuples():
        authors = pd.DataFrame(article.authorships)
        
        if len(authors) != 0:
            # disassemble author info
            for author in authors.itertuples():
                new_info = [article.id]+[author.author_position]+list(author.author.values())+[author.raw_affiliation_string]
                
                # add institution info
                if len(author.institutions) != 0:
                    new_info += list(author.institutions[0].values()) 
                else:
                    # no institution, no info
                    new_info += [None, None, None, None, None]
                authors_list.append(new_info) 
    
    new_df = pd.DataFrame(authors_list, 
                          columns=["article_id", "author_position", "author_id", "author_display_name", "orcid",
                                   "raw_affiliation_string", 
                                   "inst_id", "inst_display_name", "ror", "inst_country_code", "inst_type"])
    df = pd.concat([df, new_df])
    
    return pd.merge(df, df_input, left_on="article_id", right_on="id")

In [6]:
def get_single_authors(df_input):
    # keep most recent publication per author
    keep = []
    
    for author in set(df_input["author_id"]): # go over each unique author
        # get all rows that match author
        publications = df_input[df_input["author_id"]==author]
        # get most recent one
        most_recent = publications["publication_date"].max()
        keep += publications[publications["publication_date"]==most_recent].values.tolist()
    
    keep_df = pd.DataFrame(keep,
                           columns = df_input.columns)
    
    return keep_df

## Results

In [5]:
journals = pd.read_pickle("./data/tax_nomen_phyl_journals.pkl")
journals

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,alternate_host_venues,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated
0,https://openalex.org/W2765496928,https://doi.org/10.1093/isd/ixx003,Anchored Hybrid Enrichment-Based Phylogenomics...,Anchored Hybrid Enrichment-Based Phylogenomics...,2017,2017-10-25,{'openalex': 'https://openalex.org/W2765496928...,"{'id': 'https://openalex.org/V4210235066', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,"[{'id': 'https://openalex.org/V4210235066', 'd...","[https://openalex.org/W171439747, https://open...","[https://openalex.org/W2014192257, https://ope...",https://api.openalex.org/works/W2765496928/ngrams,"{'Abstract': [0], 'A': [1, 65], 'data': [2, 51...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 14}, {'year'...",2023-01-19T17:32:52.521048,2017-11-10,
1,https://openalex.org/W2904902663,https://doi.org/10.1093/isd/ixy018,Species Delineation Within the Euwallacea forn...,Species Delineation Within the Euwallacea forn...,2018,2018-11-01,{'openalex': 'https://openalex.org/W2904902663...,"{'id': 'https://openalex.org/V4210235066', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,"[{'id': 'https://openalex.org/V4210235066', 'd...","[https://openalex.org/W1426752381, https://ope...","[https://openalex.org/W1976060978, https://ope...",https://api.openalex.org/works/W2904902663/ngrams,"{'Abstract': [0], 'The': [1, 56, 150], 'ambros...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 10}, {'year'...",2023-01-21T21:32:09.051077,2018-12-22,
2,https://openalex.org/W2996165219,https://doi.org/10.1093/isd/ixz024,Higher-Level Phylogeny and Reclassification of...,Higher-Level Phylogeny and Reclassification of...,2019,2019-11-01,{'openalex': 'https://openalex.org/W2996165219...,"{'id': 'https://openalex.org/V4210235066', 'is...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"[{'id': 'https://openalex.org/V4210235066', 'd...","[https://openalex.org/W1676602553, https://ope...","[https://openalex.org/W102747345, https://open...",https://api.openalex.org/works/W2996165219/ngrams,"{'Abstract': [0], 'Fireflies': [1], '(Lampyrid...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 14}, {'year'...",2023-01-21T18:56:45.111348,2019-12-26,
3,https://openalex.org/W2883729471,https://doi.org/10.1093/isd/ixy008,"Evolution, Diversification, and Biogeography o...","Evolution, Diversification, and Biogeography o...",2018,2018-07-01,{'openalex': 'https://openalex.org/W2883729471...,"{'id': 'https://openalex.org/V4210235066', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,"[{'id': 'https://openalex.org/V4210235066', 'd...","[https://openalex.org/W14509275, https://opena...","[https://openalex.org/W1554287491, https://ope...",https://api.openalex.org/works/W2883729471/ngrams,"{'Abstract': [0], 'The': [1], 'grasshopper': [...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 9}, {'year':...",2023-01-18T17:23:13.811225,2018-08-03,
4,https://openalex.org/W3123906059,https://doi.org/10.1093/isd/ixz016,Understanding UCEs: A Comprehensive Primer on ...,Understanding UCEs: A Comprehensive Primer on ...,2019,2019-09-03,{'openalex': 'https://openalex.org/W3123906059...,"{'id': 'https://openalex.org/V4210235066', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'green', 'oa_url'...",...,"[{'id': 'https://openalex.org/V4210235066', 'd...","[https://openalex.org/W1529325410, https://ope...","[https://openalex.org/W154122946, https://open...",https://api.openalex.org/works/W3123906059/ngrams,"{'Abstract': [0], 'Targeted': [1], 'enrichment...",https://api.openalex.org/works?filter=cites:W3...,"[{'year': 2022, 'cited_by_count': 8}, {'year':...",2023-01-20T16:04:56.940388,2021-02-01,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58955,https://openalex.org/W4313322893,https://doi.org/10.1002/tax.12863,"One in, one out: Generic circumscription withi...","One in, one out: Generic circumscription withi...",2022,2022-12-31,{'openalex': 'https://openalex.org/W4313322893...,"{'id': 'https://openalex.org/V163860444', 'iss...",journal-article,"{'is_oa': True, 'oa_status': 'green', 'oa_url'...",...,"[{'id': 'https://openalex.org/V163860444', 'di...","[https://openalex.org/W1479986764, https://ope...","[https://openalex.org/W770724554, https://open...",https://api.openalex.org/works/W4313322893/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2023-01-22T04:51:34.054198,2023-01-06,
58956,https://openalex.org/W4317214345,https://doi.org/10.1002/tax.12864,Phylogeny based generic reclassification of <i...,Phylogeny based generic reclassification of <i...,2023,2023-01-18,{'openalex': 'https://openalex.org/W4317214345...,"{'id': 'https://openalex.org/V163860444', 'iss...",journal-article,"{'is_oa': True, 'oa_status': 'hybrid', 'oa_url...",...,"[{'id': 'https://openalex.org/V163860444', 'di...","[https://openalex.org/W135826177, https://open...","[https://openalex.org/W2042968073, https://ope...",https://api.openalex.org/works/W4317214345/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2023-01-24T21:46:44.072025,2023-01-18,
58957,https://openalex.org/W568755969,https://doi.org/10.12705/642.13,(038–039) Two proposals to deal with reprints ...,(038–039) Two proposals to deal with reprints ...,2015,2015-05-05,{'openalex': 'https://openalex.org/W568755969'...,"{'id': 'https://openalex.org/V163860444', 'iss...",journal-article,"{'is_oa': True, 'oa_status': 'green', 'oa_url'...",...,"[{'id': 'https://openalex.org/V163860444', 'di...",[],"[https://openalex.org/W1990804418, https://ope...",https://api.openalex.org/works/W568755969/ngrams,"{'Recent': [0], 'discussions': [1], 'about': [...",https://api.openalex.org/works?filter=cites:W5...,[],2023-01-16T15:15:41.319573,2016-06-24,
58958,https://openalex.org/W616988082,https://doi.org/10.12705/642.21,(2354) Proposal to conserve the name <i>Adelos...,(2354) Proposal to conserve the name <i>Adelos...,2015,2015-05-05,{'openalex': 'https://openalex.org/W616988082'...,"{'id': 'https://openalex.org/V163860444', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"[{'id': 'https://openalex.org/V163860444', 'di...",[],"[https://openalex.org/W1923458328, https://ope...",https://api.openalex.org/works/W616988082/ngrams,,https://api.openalex.org/works?filter=cites:W6...,[],2023-01-23T08:57:56.702874,2016-06-24,


In [6]:
journals = get_dict_info(journals)
journals

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2765496928,https://doi.org/10.1093/isd/ixx003,Anchored Hybrid Enrichment-Based Phylogenomics...,Anchored Hybrid Enrichment-Based Phylogenomics...,2017,2017-10-25,{'openalex': 'https://openalex.org/W2765496928...,"{'id': 'https://openalex.org/V4210235066', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,Insect systematics and diversity,Oxford University Press,journal,,False,,,True,bronze,https://academic.oup.com/isd/article-pdf/1/1/5...
1,https://openalex.org/W2904902663,https://doi.org/10.1093/isd/ixy018,Species Delineation Within the Euwallacea forn...,Species Delineation Within the Euwallacea forn...,2018,2018-11-01,{'openalex': 'https://openalex.org/W2904902663...,"{'id': 'https://openalex.org/V4210235066', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,Insect systematics and diversity,Oxford University Press,journal,https://academic.oup.com/isd/article-pdf/2/6/2...,True,publishedVersion,,True,bronze,https://academic.oup.com/isd/article-pdf/2/6/2...
2,https://openalex.org/W2996165219,https://doi.org/10.1093/isd/ixz024,Higher-Level Phylogeny and Reclassification of...,Higher-Level Phylogeny and Reclassification of...,2019,2019-11-01,{'openalex': 'https://openalex.org/W2996165219...,"{'id': 'https://openalex.org/V4210235066', 'is...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,Insect systematics and diversity,Oxford University Press,journal,,False,,,False,closed,
3,https://openalex.org/W2883729471,https://doi.org/10.1093/isd/ixy008,"Evolution, Diversification, and Biogeography o...","Evolution, Diversification, and Biogeography o...",2018,2018-07-01,{'openalex': 'https://openalex.org/W2883729471...,"{'id': 'https://openalex.org/V4210235066', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,Insect systematics and diversity,Oxford University Press,journal,https://academic.oup.com/isd/article-pdf/2/4/3...,True,publishedVersion,,True,bronze,https://academic.oup.com/isd/article-pdf/2/4/3...
4,https://openalex.org/W3123906059,https://doi.org/10.1093/isd/ixz016,Understanding UCEs: A Comprehensive Primer on ...,Understanding UCEs: A Comprehensive Primer on ...,2019,2019-09-03,{'openalex': 'https://openalex.org/W3123906059...,"{'id': 'https://openalex.org/V4210235066', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'green', 'oa_url'...",...,Insect systematics and diversity,Oxford University Press,journal,,False,,,True,green,https://www.preprints.org/manuscript/201905.03...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58955,https://openalex.org/W4313322893,https://doi.org/10.1002/tax.12863,"One in, one out: Generic circumscription withi...","One in, one out: Generic circumscription withi...",2022,2022-12-31,{'openalex': 'https://openalex.org/W4313322893...,"{'id': 'https://openalex.org/V163860444', 'iss...",journal-article,"{'is_oa': True, 'oa_status': 'green', 'oa_url'...",...,Taxon,Wiley,journal,,False,,,True,green,https://hal.inrae.fr/hal-03946235/document
58956,https://openalex.org/W4317214345,https://doi.org/10.1002/tax.12864,Phylogeny based generic reclassification of <i...,Phylogeny based generic reclassification of <i...,2023,2023-01-18,{'openalex': 'https://openalex.org/W4317214345...,"{'id': 'https://openalex.org/V163860444', 'iss...",journal-article,"{'is_oa': True, 'oa_status': 'hybrid', 'oa_url...",...,Taxon,Wiley,journal,https://doi.org/10.1002/tax.12864,True,publishedVersion,cc-by-nc,True,hybrid,https://doi.org/10.1002/tax.12864
58957,https://openalex.org/W568755969,https://doi.org/10.12705/642.13,(038–039) Two proposals to deal with reprints ...,(038–039) Two proposals to deal with reprints ...,2015,2015-05-05,{'openalex': 'https://openalex.org/W568755969'...,"{'id': 'https://openalex.org/V163860444', 'iss...",journal-article,"{'is_oa': True, 'oa_status': 'green', 'oa_url'...",...,Taxon,Wiley,journal,,False,,,True,green,https://helda.helsinki.fi/bitstream/10138/1552...
58958,https://openalex.org/W616988082,https://doi.org/10.12705/642.21,(2354) Proposal to conserve the name <i>Adelos...,(2354) Proposal to conserve the name <i>Adelos...,2015,2015-05-05,{'openalex': 'https://openalex.org/W616988082'...,"{'id': 'https://openalex.org/V163860444', 'iss...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,Taxon,Wiley,journal,,False,,,False,closed,


In [15]:
# save data 
#journals.to_csv("./data/journals_full.csv")
journals.to_pickle("./data/articles_from_journals_full.pkl")

In [7]:
authors_journals = get_authors(journals)
authors_journals

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2765496928,first,https://openalex.org/A2106323028,Christopher H. Dietrich,https://orcid.org/0000-0003-4005-4305,"Illinois Natural History Survey, Prairie Resea...",https://openalex.org/I113171187,Illinois Natural History Survey,,US,...,Insect systematics and diversity,Oxford University Press,journal,,False,,,True,bronze,https://academic.oup.com/isd/article-pdf/1/1/5...
1,https://openalex.org/W2765496928,middle,https://openalex.org/A2164661299,Julie M. Allen,https://orcid.org/0000-0002-7625-3050,"Illinois Natural History Survey, Prairie Resea...",https://openalex.org/I113171187,Illinois Natural History Survey,,US,...,Insect systematics and diversity,Oxford University Press,journal,,False,,,True,bronze,https://academic.oup.com/isd/article-pdf/1/1/5...
2,https://openalex.org/W2765496928,middle,https://openalex.org/A2316184485,Alan R. Lemmon,https://orcid.org/0000-0003-4577-5064,"Department of Scientific Computing, Florida St...",https://openalex.org/I103163165,Florida State University,https://ror.org/05g3dte14,US,...,Insect systematics and diversity,Oxford University Press,journal,,False,,,True,bronze,https://academic.oup.com/isd/article-pdf/1/1/5...
3,https://openalex.org/W2765496928,middle,https://openalex.org/A2096297567,Emily Moriarty Lemmon,https://orcid.org/0000-0001-5911-6102,"Department of Biological Science, Florida Stat...",https://openalex.org/I103163165,Florida State University,https://ror.org/05g3dte14,US,...,Insect systematics and diversity,Oxford University Press,journal,,False,,,True,bronze,https://academic.oup.com/isd/article-pdf/1/1/5...
4,https://openalex.org/W2765496928,middle,https://openalex.org/A2063246427,Daniela M. Takiya,https://orcid.org/0000-0002-6233-3615,"Laboratório de Entomologia, Departamento de Zo...",https://openalex.org/I122140584,Federal University of Rio de Janeiro,https://ror.org/03490as77,BR,...,Insect systematics and diversity,Oxford University Press,journal,,False,,,True,bronze,https://academic.oup.com/isd/article-pdf/1/1/5...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183074,https://openalex.org/W4317214345,middle,https://openalex.org/A2023720555,Dietmar Quandt,https://orcid.org/0000-0003-4304-6028,"Nees Institute for Biodiversity of Plants, Uni...",https://openalex.org/I135140700,University of Bonn,https://ror.org/041nas322,DE,...,Taxon,Wiley,journal,https://doi.org/10.1002/tax.12864,True,publishedVersion,cc-by-nc,True,hybrid,https://doi.org/10.1002/tax.12864
183075,https://openalex.org/W4317214345,last,https://openalex.org/A1982208948,Maximilian Weigend,https://orcid.org/0000-0003-0813-6650,"Nees Institute for Biodiversity of Plants, Uni...",https://openalex.org/I135140700,University of Bonn,https://ror.org/041nas322,DE,...,Taxon,Wiley,journal,https://doi.org/10.1002/tax.12864,True,publishedVersion,cc-by-nc,True,hybrid,https://doi.org/10.1002/tax.12864
183076,https://openalex.org/W568755969,first,https://openalex.org/A1966595008,Alexander N. Sennikov,https://orcid.org/0000-0001-6664-7657,Finnish Museum of Natural History;,https://openalex.org/I4210139717,Finnish Museum of Natural History,https://ror.org/03tcx6c30,FI,...,Taxon,Wiley,journal,,False,,,True,green,https://helda.helsinki.fi/bitstream/10138/1552...
183077,https://openalex.org/W616988082,first,https://openalex.org/A667935466,Arne A. Anderberg,https://orcid.org/0000-0003-1822-5235,,,,,,...,Taxon,Wiley,journal,,False,,,False,closed,


In [8]:
# save data 
#authors_journals.to_csv("./data/authors_journals.csv")
authors_journals.to_pickle("./data/authors_journals.pkl")

In [9]:
single_authors = get_single_authors(authors_journals)
single_authors

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W3171440873,first,https://openalex.org/A2302825528,Gianni Allegro,https://orcid.org/0000-0002-5079-6671,"Strada Patro 11, I-14036 Moncalvo (AT), Italy ...",,"Strada Patro 11, I-14036 Moncalvo (AT), Italy ...",,,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/60072/down...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/60072/down...
1,https://openalex.org/W2970477522,middle,https://openalex.org/A2902336126,Andrew F. Magee,https://orcid.org/0000-0002-7403-5455,"Department of Biology, University of Washingto...",https://openalex.org/I201448701,University of Washington,https://ror.org/00cvxb145,US,...,Systematic Biology,Oxford University Press,journal,,False,,,True,green,https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7...
2,https://openalex.org/W2970773727,middle,https://openalex.org/A2902336126,Andrew F. Magee,https://orcid.org/0000-0002-7403-5455,"Department of Biology, University of Washingto...",https://openalex.org/I201448701,University of Washington,https://ror.org/00cvxb145,US,...,Systematic Biology,Oxford University Press,journal,,False,,,True,bronze,https://academic.oup.com/sysbio/article-pdf/69...
3,https://openalex.org/W2553678995,last,https://openalex.org/A2706737904,Mehmet Faruk Gürbüz,,"Department of Biology, Faculty of Art and Scie...",https://openalex.org/I175124709,Süleyman Demirel University,https://ror.org/04fjtte88,TR,...,Zootaxa,Q15088586,journal,,False,,,False,closed,
4,https://openalex.org/W2294609271,middle,https://openalex.org/A2100425372,Frazer Sinclair,https://orcid.org/0000-0001-5017-3215,"Institute of Evolutionary Biology, University ...",https://openalex.org/I98677209,University of Edinburgh,https://ror.org/01nrxwf90,GB,...,Zootaxa,Q15088586,journal,,False,,,False,closed,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75417,https://openalex.org/W3015845240,last,https://openalex.org/A235841599,María Ana Tovar-Hernández,https://orcid.org/0000-0002-5263-2830,"Universidad Autónoma de Nuevo León, Facultad d...",https://openalex.org/I169046204,Universidad Autónoma de Nuevo León,https://ror.org/01fh86n78,MX,...,Zootaxa,Q15088586,journal,,False,,,False,closed,
75418,https://openalex.org/W3173633418,middle,https://openalex.org/A1222944398,Fortunato Garza-Ocañas,https://orcid.org/0000-0003-3862-8875,"Universidad Autónoma de Nuevo León, Campus Lin...",https://openalex.org/I169046204,Universidad Autónoma de Nuevo León,https://ror.org/01fh86n78,MX,...,Phytotaxa,Q15088586,journal,,False,,,False,closed,
75419,https://openalex.org/W4308950459,middle,https://openalex.org/A4308950570,ZHEN WANG,https://orcid.org/0000-0002-2450-4508,,,,,,...,Phytotaxa,Q15088586,journal,,False,,,False,closed,
75420,https://openalex.org/W1919372707,last,https://openalex.org/A1851838551,Yair Ben-Dov,https://orcid.org/0000-0001-7685-6727,"Department of Entomology, Agricultural Researc...",https://openalex.org/I376102,Agricultural Research Organization,https://ror.org/05hbrxp80,IL,...,Zootaxa,Q15088586,journal,,False,,,False,closed,


In [10]:
# save data 
#single_authors.to_csv("./data/single_authors.csv")
single_authors.to_pickle("./data/single_authors.pkl")

In [11]:
keep = []

for journal in set(authors_journals["host_id"]): # go over each journal
    journal_df = authors_journals[authors_journals["host_id"]==journal]
    keep.append(get_single_authors(journal_df))

keep

[                            article_id author_position  \
 0     https://openalex.org/W2623899450           first   
 1     https://openalex.org/W3015213345           first   
 2     https://openalex.org/W4307887592          middle   
 3     https://openalex.org/W2963593978           first   
 4     https://openalex.org/W1908672312          middle   
 ...                                ...             ...   
 3151  https://openalex.org/W3090873003            last   
 3152  https://openalex.org/W4306655052          middle   
 3153  https://openalex.org/W1573408593          middle   
 3154  https://openalex.org/W4290831095           first   
 3155  https://openalex.org/W3208860665            last   
 
                              author_id         author_display_name  \
 0     https://openalex.org/A2622763940  Nattapon Nopporncharoenkul   
 1     https://openalex.org/A3015220692                   Yi‐Qin Xu   
 2     https://openalex.org/A4307888260               Lucia Varaldo   
 3    

In [13]:
singles_per_journal = pd.concat(keep)
singles_per_journal

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2623899450,first,https://openalex.org/A2622763940,Nattapon Nopporncharoenkul,https://orcid.org/0000-0002-9477-0625,"Department of Plant Science, Faculty of Scienc...",https://openalex.org/I25399158,Mahidol University,https://ror.org/01znkr924,TH,...,Journal of Systematics and Evolution,Wiley-Blackwell,journal,,False,,,False,closed,
1,https://openalex.org/W3015213345,first,https://openalex.org/A3015220692,Yi‐Qin Xu,,Ministry of Education Key Laboratory for Biodi...,https://openalex.org/I24943067,Fudan University,https://ror.org/013q1eq08,CN,...,Journal of Systematics and Evolution,Wiley-Blackwell,journal,,False,,,False,closed,
2,https://openalex.org/W4307887592,middle,https://openalex.org/A4307888260,Lucia Varaldo,,Department of DISTAVUniversity of GenoaCorso E...,,Department of DISTAVUniversity of GenoaCorso E...,,,...,Journal of Systematics and Evolution,Wiley-Blackwell,journal,,False,,,False,closed,
3,https://openalex.org/W2963593978,first,https://openalex.org/A2490906191,Shan-Shan Li,,State Key Laboratory of Systematic and Evoluti...,,State Key Laboratory of Systematic and Evoluti...,,,...,Journal of Systematics and Evolution,Wiley-Blackwell,journal,,False,,,False,closed,
4,https://openalex.org/W1908672312,middle,https://openalex.org/A2706113771,Jinhuo Jiang,,College of Life SciencesZhejiang UniversityHan...,https://openalex.org/I118810699,Hangzhou University,,CN,...,Journal of Systematics and Evolution,Wiley-Blackwell,journal,,False,,,False,closed,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
716,https://openalex.org/W4297836291,middle,https://openalex.org/A4282626686,"Yodthong, Siriporn",,"Thaksin University, Phattalung, Thailand",https://openalex.org/I79246082,Thaksin University,https://ror.org/00t2prd39,TH,...,Zoosystematics and Evolution,Pensoft Publishers,journal,https://zse.pensoft.net/article/89591/download...,True,publishedVersion,cc-by,True,gold,https://zse.pensoft.net/article/89591/download...
717,https://openalex.org/W3033893622,first,https://openalex.org/A2126922470,Raffael Ernst,https://orcid.org/0000-0001-6347-1414,,,,,,...,Zoosystematics and Evolution,Pensoft Publishers,journal,https://zse.pensoft.net/article/51997/download...,True,publishedVersion,cc-by,True,gold,https://zse.pensoft.net/article/51997/download...
718,https://openalex.org/W2126688941,middle,https://openalex.org/A4272586102,None Seethaler,,"Charité Human Remains Project, Institute of Ve...",https://openalex.org/I7877124,Charité - University Medicine Berlin,https://ror.org/001w7jn25,DE,...,Zoosystematics and Evolution,Pensoft Publishers,journal,,False,,,False,closed,
719,https://openalex.org/W3184830308,middle,https://openalex.org/A2158309143,Renner Luiz Cerqueira Baptista,https://orcid.org/0000-0002-2372-5034,,,,,,...,Zoosystematics and Evolution,Pensoft Publishers,journal,https://zse.pensoft.net/article/67788/download...,True,publishedVersion,cc-by,True,gold,https://zse.pensoft.net/article/67788/download...


In [14]:
# save data 
singles_per_journal.to_pickle("./data/single_authors_per_journal.pkl")

## Hemiptera test

In [5]:
hemiptera = pd.read_pickle("./data/hemiptera_articles.pkl")
hemiptera

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,alternate_host_venues,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated
0,https://openalex.org/W2595220198,https://doi.org/10.1099/jgv.0.000738,ICTV Virus Taxonomy Profile: Geminiviridae,ICTV Virus Taxonomy Profile: Geminiviridae,2017,2017-02-01,{'openalex': 'https://openalex.org/W2595220198...,"{'id': 'https://openalex.org/V103035011', 'iss...",journal-article,"{'is_oa': True, 'oa_status': 'hybrid', 'oa_url...",...,"[{'id': 'https://openalex.org/V103035011', 'di...","[https://openalex.org/W1971075914, https://ope...","[https://openalex.org/W1977159985, https://ope...",https://api.openalex.org/works/W2595220198/ngrams,"{'The': [0], 'geminiviruses': [1], 'are': [2, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 2}, {'year':...",2023-01-20T04:26:11.909156,2017-03-23,
1,https://openalex.org/W2145461927,https://doi.org/10.1146/annurev-ento-120811-15...,Biology and Management of Asian Citrus Psyllid...,Biology and Management of Asian Citrus Psyllid...,2013,2013-01-14,{'openalex': 'https://openalex.org/W2145461927...,"{'id': 'https://openalex.org/V92576693', 'issn...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,"[{'id': 'https://openalex.org/V92576693', 'dis...","[https://openalex.org/W184443415, https://open...","[https://openalex.org/W1992690389, https://ope...",https://api.openalex.org/works/W2145461927/ngrams,"{'The': [0], 'Asian': [1], 'citrus': [2, 15, 3...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 4}, {'year':...",2023-01-31T07:23:16.629365,2016-06-24,
2,https://openalex.org/W2170438129,https://doi.org/10.1186/s13059-014-0521-0,Genomes of the rice pest brown planthopper and...,Genomes of the rice pest brown planthopper and...,2014,2014-12-03,{'openalex': 'https://openalex.org/W2170438129...,"{'id': 'https://openalex.org/V81160022', 'issn...",journal-article,"{'is_oa': True, 'oa_status': 'gold', 'oa_url':...",...,"[{'id': 'https://openalex.org/V81160022', 'dis...","[https://openalex.org/W22481580, https://opena...","[https://openalex.org/W35793803, https://opena...",https://api.openalex.org/works/W2170438129/ngrams,"{'The': [0], 'brown': [1, 86, 220], 'planthopp...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-01-25T20:49:25.106025,2016-06-24,
3,https://openalex.org/W2075215526,https://doi.org/10.1111/eea.12025,"Asian citrus psyllid,<i>Diaphorina citri</i>, ...","Asian citrus psyllid,<i>Diaphorina citri</i>, ...",2013,2013-02-01,{'openalex': 'https://openalex.org/W2075215526...,"{'id': 'https://openalex.org/V26596028', 'issn...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,"[{'id': 'https://openalex.org/V26596028', 'dis...","[https://openalex.org/W201519924, https://open...","[https://openalex.org/W1578899359, https://ope...",https://api.openalex.org/works/W2075215526/ngrams,"{'The': [0, 113], 'Asian': [1, 45], 'citrus': ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-01-29T06:19:54.956576,2016-06-24,
4,https://openalex.org/W2155621737,https://doi.org/10.1603/ipm14002,"Biology, Ecology, and Management of Brown Marm...","Biology, Ecology, and Management of Brown Marm...",2014,2014-09-01,{'openalex': 'https://openalex.org/W2155621737...,"{'id': 'https://openalex.org/V2734996914', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'gold', 'oa_url':...",...,"[{'id': 'https://openalex.org/V2734996914', 'd...","[https://openalex.org/W92308555, https://opena...","[https://openalex.org/W2015243873, https://ope...",https://api.openalex.org/works/W2155621737/ngrams,"{'Brown': [0], 'marmorated': [1], 'stink': [2,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 2}, {'year':...",2023-01-19T21:31:30.491445,2016-06-24,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22299,https://openalex.org/W980551846,https://doi.org/10.13016/m2ww5k,The alarm-defense system of Cimex lectularius ...,The alarm-defense system of Cimex lectularius ...,2015,2015-01-01,{'openalex': 'https://openalex.org/W980551846'...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",dissertation,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,"[{'id': None, 'display_name': None, 'type': No...",[],[https://openalex.org/W2336112283],https://api.openalex.org/works/W980551846/ngrams,,https://api.openalex.org/works?filter=cites:W9...,[],2023-02-02T08:56:24.247250,2016-06-24,
22300,https://openalex.org/W985877709,,Field Efficacy Trial of Trapping Tea Green Lea...,Field Efficacy Trial of Trapping Tea Green Lea...,2013,2013-01-01,{'openalex': 'https://openalex.org/W985877709'...,"{'id': 'https://openalex.org/V2764604342', 'is...",journal-article,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,"[{'id': 'https://openalex.org/V2764604342', 'd...",[],"[https://openalex.org/W163712400, https://open...",https://api.openalex.org/works/W985877709/ngrams,,https://api.openalex.org/works?filter=cites:W9...,[],2023-01-28T15:34:58.194714,2016-06-24,
22301,https://openalex.org/W990240456,,Lądowe pluskwiaki różnoskrzydłe (Hemiptera: He...,Lądowe pluskwiaki różnoskrzydłe (Hemiptera: He...,2013,2013-01-01,{'openalex': 'https://openalex.org/W990240456'...,"{'id': 'https://openalex.org/V4306534744', 'is...",,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,"[{'id': 'https://openalex.org/V4306534744', 'd...",[https://openalex.org/W593886676],[],https://api.openalex.org/works/W990240456/ngrams,,https://api.openalex.org/works?filter=cites:W9...,[],2023-01-28T12:45:57.263162,2016-06-24,
22302,https://openalex.org/W996494048,,"New record of leafhoppers (Hemiptera, Cicadell...","New record of leafhoppers (Hemiptera, Cicadell...",2013,2013-01-01,{'openalex': 'https://openalex.org/W996494048'...,"{'id': 'https://openalex.org/V2764572205', 'is...",journal-article,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,"[{'id': 'https://openalex.org/V2764572205', 'd...",[],"[https://openalex.org/W40651051, https://opena...",https://api.openalex.org/works/W996494048/ngrams,,https://api.openalex.org/works?filter=cites:W9...,[],2023-01-30T18:39:13.779544,2016-06-24,


In [8]:
hemiptera = get_dict_info(hemiptera)

In [9]:
hemiptera

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,host_venue,type,open_access,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2595220198,https://doi.org/10.1099/jgv.0.000738,ICTV Virus Taxonomy Profile: Geminiviridae,ICTV Virus Taxonomy Profile: Geminiviridae,2017,2017-02-01,{'openalex': 'https://openalex.org/W2595220198...,"{'id': 'https://openalex.org/V103035011', 'iss...",journal-article,"{'is_oa': True, 'oa_status': 'hybrid', 'oa_url...",...,Journal of General Virology,Microbiology Society,journal,https://doi.org/10.1099/jgv.0.000738,True,publishedVersion,cc-by,True,hybrid,https://doi.org/10.1099/jgv.0.000738
1,https://openalex.org/W2145461927,https://doi.org/10.1146/annurev-ento-120811-15...,Biology and Management of Asian Citrus Psyllid...,Biology and Management of Asian Citrus Psyllid...,2013,2013-01-14,{'openalex': 'https://openalex.org/W2145461927...,"{'id': 'https://openalex.org/V92576693', 'issn...",journal-article,"{'is_oa': False, 'oa_status': 'closed', 'oa_ur...",...,Annual Review of Entomology,Annual Reviews,journal,,False,,,False,closed,
2,https://openalex.org/W2170438129,https://doi.org/10.1186/s13059-014-0521-0,Genomes of the rice pest brown planthopper and...,Genomes of the rice pest brown planthopper and...,2014,2014-12-03,{'openalex': 'https://openalex.org/W2170438129...,"{'id': 'https://openalex.org/V81160022', 'issn...",journal-article,"{'is_oa': True, 'oa_status': 'gold', 'oa_url':...",...,Genome Biology,BioMed Central,journal,https://doi.org/10.1186/s13059-014-0521-0,True,publishedVersion,cc-by,True,gold,https://doi.org/10.1186/s13059-014-0521-0
3,https://openalex.org/W2075215526,https://doi.org/10.1111/eea.12025,"Asian citrus psyllid,<i>Diaphorina citri</i>, ...","Asian citrus psyllid,<i>Diaphorina citri</i>, ...",2013,2013-02-01,{'openalex': 'https://openalex.org/W2075215526...,"{'id': 'https://openalex.org/V26596028', 'issn...",journal-article,"{'is_oa': True, 'oa_status': 'bronze', 'oa_url...",...,Entomologia Experimentalis Et Applicata,Wiley-Blackwell,journal,https://onlinelibrary.wiley.com/doi/pdfdirect/...,True,publishedVersion,,True,bronze,https://onlinelibrary.wiley.com/doi/pdfdirect/...
4,https://openalex.org/W2155621737,https://doi.org/10.1603/ipm14002,"Biology, Ecology, and Management of Brown Marm...","Biology, Ecology, and Management of Brown Marm...",2014,2014-09-01,{'openalex': 'https://openalex.org/W2155621737...,"{'id': 'https://openalex.org/V2734996914', 'is...",journal-article,"{'is_oa': True, 'oa_status': 'gold', 'oa_url':...",...,Journal of Integrated Pest Management,Oxford University Press,journal,https://academic.oup.com/jipm/article-pdf/5/3/...,True,publishedVersion,cc-by-nc-nd,True,gold,https://academic.oup.com/jipm/article-pdf/5/3/...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22299,https://openalex.org/W980551846,https://doi.org/10.13016/m2ww5k,The alarm-defense system of Cimex lectularius ...,The alarm-defense system of Cimex lectularius ...,2015,2015-01-01,{'openalex': 'https://openalex.org/W980551846'...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",dissertation,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,,,,,False,,,False,,
22300,https://openalex.org/W985877709,,Field Efficacy Trial of Trapping Tea Green Lea...,Field Efficacy Trial of Trapping Tea Green Lea...,2013,2013-01-01,{'openalex': 'https://openalex.org/W985877709'...,"{'id': 'https://openalex.org/V2764604342', 'is...",journal-article,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,Plant diseases and pests,,journal,https://www.cabdirect.org/abstracts/2014323472...,False,,,False,,
22301,https://openalex.org/W990240456,,Lądowe pluskwiaki różnoskrzydłe (Hemiptera: He...,Lądowe pluskwiaki różnoskrzydłe (Hemiptera: He...,2013,2013-01-01,{'openalex': 'https://openalex.org/W990240456'...,"{'id': 'https://openalex.org/V4306534744', 'is...",,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,Wiadomości Entomologiczne,,journal,http://yadda.icm.edu.pl/yadda/element/bwmeta1....,False,,,False,,
22302,https://openalex.org/W996494048,,"New record of leafhoppers (Hemiptera, Cicadell...","New record of leafhoppers (Hemiptera, Cicadell...",2013,2013-01-01,{'openalex': 'https://openalex.org/W996494048'...,"{'id': 'https://openalex.org/V2764572205', 'is...",journal-article,"{'is_oa': False, 'oa_status': None, 'oa_url': ...",...,The Japanese Journal of Systematic Entomology,,journal,https://www.cabdirect.org/cabdirect/abstract/2...,False,,,False,,


In [20]:
authors_hemiptera = get_authors(hemiptera)
authors_hemiptera

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2595220198,first,https://openalex.org/A2092865295,Francisco Murilo Zerbini,https://orcid.org/0000-0001-8617-0200,"Departamento de Fitopatologia/BIOAGRO, Univers...",https://openalex.org/I146165071,Universidade Federal de Viçosa,https://ror.org/0409dgb37,BR,...,Journal of General Virology,Microbiology Society,journal,https://doi.org/10.1099/jgv.0.000738,True,publishedVersion,cc-by,True,hybrid,https://doi.org/10.1099/jgv.0.000738
1,https://openalex.org/W2595220198,middle,https://openalex.org/A2313427679,Rob W. Briddon,https://orcid.org/0000-0001-8738-0365,National Institute for Biotechnology and Genet...,https://openalex.org/I1308171952,National Institute for Biotechnology and Genet...,https://ror.org/01bh91531,PK,...,Journal of General Virology,Microbiology Society,journal,https://doi.org/10.1099/jgv.0.000738,True,publishedVersion,cc-by,True,hybrid,https://doi.org/10.1099/jgv.0.000738
2,https://openalex.org/W2595220198,middle,https://openalex.org/A2105612331,Ali M. Idris,,"School of Plant Sciences, University of Arizon...",https://openalex.org/I138006243,University of Arizona,https://ror.org/03m2x1q45,US,...,Journal of General Virology,Microbiology Society,journal,https://doi.org/10.1099/jgv.0.000738,True,publishedVersion,cc-by,True,hybrid,https://doi.org/10.1099/jgv.0.000738
3,https://openalex.org/W2595220198,middle,https://openalex.org/A2122992593,Darren J. Martin,https://orcid.org/0000-0002-8537-6765,"Computational Biology Group, Institute of Infe...",https://openalex.org/I157614274,University of Cape Town,https://ror.org/03p74gp79,ZA,...,Journal of General Virology,Microbiology Society,journal,https://doi.org/10.1099/jgv.0.000738,True,publishedVersion,cc-by,True,hybrid,https://doi.org/10.1099/jgv.0.000738
4,https://openalex.org/W2595220198,middle,https://openalex.org/A2302825744,Enrique Moriones,https://orcid.org/0000-0003-0295-7845,Instituto de Hortofruticultura Subtropical y M...,https://openalex.org/I4210137448,Instituto de Hortofruticultura Subtropical y M...,https://ror.org/04nrv3s86,ES,...,Journal of General Virology,Microbiology Society,journal,https://doi.org/10.1099/jgv.0.000738,True,publishedVersion,cc-by,True,hybrid,https://doi.org/10.1099/jgv.0.000738
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83424,https://openalex.org/W996494048,first,https://openalex.org/A2442939295,M. Hayashi,,,,,,,...,The Japanese Journal of Systematic Entomology,,journal,https://www.cabdirect.org/cabdirect/abstract/2...,False,,,False,,
83425,https://openalex.org/W996494048,last,https://openalex.org/A2163259635,K. Kogure,,,,,,,...,The Japanese Journal of Systematic Entomology,,journal,https://www.cabdirect.org/cabdirect/abstract/2...,False,,,False,,
83426,https://openalex.org/W998711502,first,https://openalex.org/A2891636283,Juliana Vilches,,,,,,,...,Munis Entomology and Zoology,,journal,https://ri.conicet.gov.ar/handle/11336/34763,False,,,False,,
83427,https://openalex.org/W998711502,middle,https://openalex.org/A2034276950,Estela M. Quirán,,,,,,,...,Munis Entomology and Zoology,,journal,https://ri.conicet.gov.ar/handle/11336/34763,False,,,False,,


In [21]:
singles_hemiptera = get_single_authors(authors_hemiptera)
singles_hemiptera

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2884631797,middle,https://openalex.org/A2160122099,Marcos Antonio Segatto Silva,,"Instituto de Ciências Biológicas e Naturais, U...",https://openalex.org/I4210106570,Federal University of Triângulo Mineiro,https://ror.org/01av3m334,BR,...,Journal of arthropod-borne diseases,Tehran University of Medical Sciences,journal,,False,,,False,,
1,https://openalex.org/W4301586508,middle,https://openalex.org/A4301790041,Gabriela Machaj,,"Jagiellonian University, Poland;",https://openalex.org/I126596746,Jagiellonian University,https://ror.org/03bqmcz70,PL,...,,,,https://doi.org/10.1101/2022.10.04.510826,False,,,True,green,https://www.biorxiv.org/content/biorxiv/early/...
2,https://openalex.org/W2914478987,middle,https://openalex.org/A2423172834,M. Larose,,Institut de recherche et de développement en a...,https://openalex.org/I4210136451,Institut de Recherche et de Développement en A...,https://ror.org/03snehy11,CA,...,Phytoprotection,Consortium Erudit,journal,,False,,,False,closed,
3,https://openalex.org/W4210831224,first,https://openalex.org/A2123484615,Antonio Escandiel de Souza,https://orcid.org/0000-0001-6531-3794,,,,,,...,,,,https://doi.org/10.11606/d.11.1999.tde-2022020...,True,publishedVersion,cc-by-nc-sa,True,gold,http://www.teses.usp.br/teses/disponiveis/11/1...
4,https://openalex.org/W4214706599,first,https://openalex.org/A4220463016,Joko Pilianto,,,,,,,...,Jurnal HPT (Jurnal Hama Penyakit Tumbuhan),Brawijaya University,journal,http://jurnalhpt.ub.ac.id/index.php/jhpt/artic...,True,publishedVersion,cc-by-nc,True,hybrid,http://jurnalhpt.ub.ac.id/index.php/jhpt/artic...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42529,https://openalex.org/W4210637711,middle,https://openalex.org/A4212670555,Jin-Ho Na,,,,,,,...,Nong'yag gwahag hoeji,The Korean Society of Pesticide Science,journal,,False,,,False,closed,
42530,https://openalex.org/W1886072261,middle,https://openalex.org/A2028986482,Joseph J. Demark,,Dow AgroSciences LLC Fayetteville AR USA,https://openalex.org/I119336515,Dow AgroSciences,,US,...,Pest Management Science,Wiley,journal,,False,,,False,closed,
42531,https://openalex.org/W233737765,last,https://openalex.org/A2251052474,Joseph P. Albano,,,,,,,...,Journal of citrus pathology,"eScholarship Publishing, University of California",journal,https://escholarship.org/content/qt3d77r55p/qt...,True,publishedVersion,cc-by,True,hybrid,https://escholarship.org/content/qt3d77r55p/qt...
42532,https://openalex.org/W4313051617,middle,https://openalex.org/A4314124415,Gailce Leo Justin C,,,,,,,...,Madras agricultural journal,Madras Agricultural Students Union,journal,http://masujournal.org/109/T2VXOdcP7thHmd3uK0T...,True,publishedVersion,cc-by,True,hybrid,http://masujournal.org/109/T2VXOdcP7thHmd3uK0T...


In [24]:
# count how many have orcids
singles_hemiptera["orcid"].count() 

14290

In [23]:
singles_hemiptera.columns

Index(['article_id', 'author_position', 'author_id', 'author_display_name',
       'orcid', 'raw_affiliation_string', 'inst_id', 'inst_display_name',
       'ror', 'inst_country_code', 'inst_type', 'id', 'doi', 'title',
       'display_name', 'publication_year', 'publication_date', 'ids',
       'host_venue', 'type', 'open_access', 'authorships', 'cited_by_count',
       'biblio', 'is_retracted', 'is_paratext', 'concepts', 'mesh',
       'alternate_host_venues', 'referenced_works', 'related_works',
       'ngrams_url', 'abstract_inverted_index', 'cited_by_api_url',
       'counts_by_year', 'updated_date', 'created_date',
       'is_authors_truncated', 'host_id', 'issn_l', 'issn',
       'host_display_name', 'publisher', 'host_type', 'host_url', 'is_host_oa',
       'host_version', 'host_license', 'is_oa', 'oa_status', 'oa_url'],
      dtype='object')

In [28]:
# count how many are european
european_codes = ["AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", "FI", "FR", "DE", "GR", "HU", "IE", # EU
                  "IT", "LV", "LT", "LU", "MT", "NL", "PL", "PT", "RO", "SK", "SI", "ES", "SE", # EU
                  "IS", "LI", "NO", "CH", "AL", "ME", "MK", "RS", "TR", "AD", "BY", "BA", "MD", "MC", # pan-Europe
                  "RU", "SM", "UA", "GB", "VA"] # pan-Europe

n = 0
for country in singles_hemiptera["inst_country_code"]:
    if country in european_codes:
        n += 1

n

3329

In [29]:
# count both at the same time

n = 0
for row in singles_hemiptera.itertuples():
    if row.orcid:
        if row.inst_country_code in european_codes:
            n += 1

n

1535

## Insects

In [30]:
insect_articles = pd.read_pickle("./data/insect_articles_search.pkl")
insect_articles

Unnamed: 0,id,doi,title,display_name,relevance_score,publication_year,publication_date,ids,host_venue,type,...,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated,order
0,https://openalex.org/W2923200093,,Further notes on Australian Coleoptera with de...,Further notes on Australian Coleoptera with de...,279.08148,2016,2016-07-02,{'openalex': 'https://openalex.org/W2923200093...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",book,...,[],"[https://openalex.org/W572957188, https://open...",https://api.openalex.org/works/W2923200093/ngrams,,https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2020, 'cited_by_count': 2}, {'year':...",2023-01-27T21:49:10.994896,2019-04-01,,Coleoptera
1,https://openalex.org/W2895970952,https://doi.org/10.3161/00034541anz2018.68.3.010,The<i>Cis multidentatus</i>Species-Group (Cole...,The<i>Cis multidentatus</i>Species-Group (Cole...,176.39905,2018,2018-10-08,{'openalex': 'https://openalex.org/W2895970952...,"{'id': 'https://openalex.org/V24891482', 'issn...",journal-article,...,"[https://openalex.org/W108158877, https://open...","[https://openalex.org/W2003951640, https://ope...",https://api.openalex.org/works/W2895970952/ngrams,"{'Cis': [0, 5, 25, 101, 129, 146, 158, 184, 20...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-01-30T18:01:22.329570,2018-10-26,,Coleoptera
2,https://openalex.org/W2759820149,https://doi.org/10.1146/annurev-ento-020117-04...,How Many Species of Insects and Other Terrestr...,How Many Species of Insects and Other Terrestr...,150.51698,2018,2018-01-11,{'openalex': 'https://openalex.org/W2759820149...,"{'id': 'https://openalex.org/V92576693', 'issn...",journal-article,...,"[https://openalex.org/W306638812, https://open...","[https://openalex.org/W1981422546, https://ope...",https://api.openalex.org/works/W2759820149/ngrams,"{'In': [0], 'the': [1, 19, 30, 69, 120], 'last...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 9}, {'year':...",2023-01-25T20:04:19.074824,2017-10-06,,Coleoptera
3,https://openalex.org/W2516041128,https://doi.org/10.3897/zookeys.610.9361,Twelve new species and fifty-three new provinc...,Twelve new species and fifty-three new provinc...,141.47995,2016,2016-08-11,{'openalex': 'https://openalex.org/W2516041128...,"{'id': 'https://openalex.org/V199213172', 'iss...",journal-article,...,"[https://openalex.org/W1964889809, https://ope...","[https://openalex.org/W1992974445, https://ope...",https://api.openalex.org/works/W2516041128/ngrams,"{'One': [0], 'hundred': [1], 'twenty': [2], 's...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 10}, {'year'...",2023-01-20T00:12:05.803217,2016-09-16,,Coleoptera
4,https://openalex.org/W2911258791,https://doi.org/10.11646/zootaxa.4555.2.1,The Onthophagus fuscus (Coleoptera: Scarabaeid...,The Onthophagus fuscus (Coleoptera: Scarabaeid...,138.39027,2019,2019-02-12,{'openalex': 'https://openalex.org/W2911258791...,"{'id': 'https://openalex.org/V171471881', 'iss...",journal-article,...,[],"[https://openalex.org/W1600544125, https://ope...",https://api.openalex.org/works/W2911258791/ngrams,"{'The': [0, 78], 'Onthophagus': [1, 60, 69], '...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-01-19T15:35:34.554999,2019-02-21,,Coleoptera
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,https://openalex.org/W4256060135,https://doi.org/10.1111/aen.12238,"Myrmecia: Volume 52, Part 3","Myrmecia: Volume 52, Part 3",1.00000,2016,2016-08-01,{'openalex': 'https://openalex.org/W4256060135...,"{'id': 'https://openalex.org/V4210180117', 'is...",journal-article,...,[],"[https://openalex.org/W224613156, https://open...",https://api.openalex.org/works/W4256060135/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2023-01-21T08:38:45.088276,2022-05-12,,Zoraptera
92,https://openalex.org/W3209163763,https://doi.org/10.5281/zenodo.256280,FIGURES 16–23 in Zorotypus weiweii (Zoraptera:...,FIGURES 16–23 in Zorotypus weiweii (Zoraptera:...,1.00000,2016,2016-12-31,{'openalex': 'https://openalex.org/W3209163763...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",,...,[],[],https://api.openalex.org/works/W3209163763/ngrams,,https://api.openalex.org/works?filter=cites:W3...,[],2023-01-28T23:28:28.188859,2021-11-08,,Zoraptera
93,https://openalex.org/W2810088221,https://doi.org/10.1002/9781118945582.ch3,Insect Biodiversity in Indochina: A Window int...,Insect Biodiversity in Indochina: A Window int...,1.00000,2018,2018-05-23,{'openalex': 'https://openalex.org/W2810088221...,"{'id': 'https://openalex.org/V4306463504', 'is...",book-chapter,...,"[https://openalex.org/W759610088, https://open...","[https://openalex.org/W249872125, https://open...",https://api.openalex.org/works/W2810088221/ngrams,,https://api.openalex.org/works?filter=cites:W2...,[],2023-01-24T10:23:59.241962,2018-07-10,,Zoraptera
94,https://openalex.org/W4247469250,https://doi.org/10.1016/b978-0-12-809633-8.010...,Social Evolution in “Other” Insects and Arachnids,Social Evolution in “Other” Insects and Arachnids,1.00000,2019,2019-01-01,{'openalex': 'https://openalex.org/W4247469250...,"{'id': 'https://openalex.org/V4306463230', 'is...",book-chapter,...,[],"[https://openalex.org/W1989463223, https://ope...",https://api.openalex.org/works/W4247469250/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2023-01-31T15:07:39.761739,2022-05-12,,Zoraptera


In [31]:
insect_articles = get_dict_info(insect_articles)
insect_articles

Unnamed: 0,id,doi,title,display_name,relevance_score,publication_year,publication_date,ids,host_venue,type,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2923200093,,Further notes on Australian Coleoptera with de...,Further notes on Australian Coleoptera with de...,279.08148,2016,2016-07-02,{'openalex': 'https://openalex.org/W2923200093...,"{'id': None, 'issn_l': None, 'issn': None, 'di...",book,...,,,,,False,,,False,,
0,https://openalex.org/W1971821649,https://doi.org/10.1590/0074-0276130434,"Rhodnius barretti, a new species of Triatomina...","Rhodnius barretti, a new species of Triatomina...",225.62585,2013,2013-01-01,{'openalex': 'https://openalex.org/W1971821649...,"{'id': 'https://openalex.org/V165991124', 'iss...",journal-article,...,,,,,False,,,False,,
0,https://openalex.org/W384240811,https://doi.org/10.11646/zootaxa.3955.3.4,<p><strong>The New World genus <em>Rhinoleucop...,<p><strong>The New World genus <em>Rhinoleucop...,221.30016,2015,2015-05-05,{'openalex': 'https://openalex.org/W384240811'...,"{'id': 'https://openalex.org/V171471881', 'iss...",journal-article,...,,,,,False,,,False,,
0,https://openalex.org/W2039380629,https://doi.org/10.1371/journal.pone.0122407,Phylogenetic Molecular Species Delimitations U...,Phylogenetic Molecular Species Delimitations U...,256.51770,2015,2015-04-08,{'openalex': 'https://openalex.org/W2039380629...,"{'id': 'https://openalex.org/V202381698', 'iss...",journal-article,...,,,,,False,,,False,,
0,https://openalex.org/W2092029632,https://doi.org/10.1007/s13127-012-0123-1,"Biogeography, ecology, acoustics and chromosom...","Biogeography, ecology, acoustics and chromosom...",148.68529,2013,2013-02-09,{'openalex': 'https://openalex.org/W2092029632...,"{'id': 'https://openalex.org/V190011727', 'iss...",journal-article,...,,,,,False,,,False,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16069,https://openalex.org/W4318756623,https://doi.org/10.25221/fee.470.2,"Two new species of Lucanus Scopoli, 1763 from ...","Two new species of Lucanus Scopoli, 1763 from ...",1.00000,2023,2023-02-02,{'openalex': 'https://openalex.org/W4318756623...,"{'id': 'https://openalex.org/V4210228065', 'is...",journal-article,...,Far Eastern entomologist,Federal Scientific Center of the East Asia Ter...,journal,,False,,,False,,
16070,https://openalex.org/W4318762625,https://doi.org/10.11646/zootaxa.5231.5.7,"A new tribe, genus and species of the tenebrio...","A new tribe, genus and species of the tenebrio...",1.00000,2023,2023-02-02,{'openalex': 'https://openalex.org/W4318762625...,"{'id': 'https://openalex.org/V171471881', 'iss...",journal-article,...,Zootaxa,Q15088586,journal,,False,,,False,,
16071,https://openalex.org/W4318826311,https://doi.org/10.11646/zootaxa.5231.5.3,"Catalog of the genus Cacia Newman (Coleoptera,...","Catalog of the genus Cacia Newman (Coleoptera,...",1.00000,2023,2023-02-02,{'openalex': 'https://openalex.org/W4318826311...,"{'id': 'https://openalex.org/V171471881', 'iss...",journal-article,...,Zootaxa,Q15088586,journal,,False,,,False,,
16072,https://openalex.org/W4318830028,https://doi.org/10.11646/zootaxa.5231.5.1,Omorgus (Omorgus) khandesh (Coleoptera: Scarab...,Omorgus (Omorgus) khandesh (Coleoptera: Scarab...,1.00000,2023,2023-02-02,{'openalex': 'https://openalex.org/W4318830028...,"{'id': 'https://openalex.org/V171471881', 'iss...",journal-article,...,Zootaxa,Q15088586,journal,,False,,,False,,


In [33]:
authors_insects = get_authors(insect_articles)
authors_insects

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2923200093,first,https://openalex.org/A2670866640,Thomas Blackburn,,,,,,,...,,,,,False,,,False,,
1,https://openalex.org/W1971821649,first,https://openalex.org/A10546243,Fernando Abad-Franch,https://orcid.org/0000-0002-7715-0328,"Fiocruz, Manaus, Brasil; Pontificia Universida...",https://openalex.org/I52325,Oswaldo Cruz Foundation,https://ror.org/04jhswv08,BR,...,,,,,False,,,False,,
2,https://openalex.org/W1971821649,middle,https://openalex.org/A2308254080,Márcio G. Pavan,https://orcid.org/0000-0002-5699-242X,Laboratorio de Epidemiologia e Sistematica Mol...,https://openalex.org/I4210091339,Laboratory of Molecular Genetics,https://ror.org/00fwhek95,PL,...,,,,,False,,,False,,
3,https://openalex.org/W1971821649,middle,https://openalex.org/A2579541549,Nicolás Jaramillo-O,,UNIVERSIDAD DE ANTIOQUIA MEDELLÍN - COLOMBIA,https://openalex.org/I35961687,University of Antioquia,https://ror.org/03bp5hc83,CO,...,,,,,False,,,False,,
4,https://openalex.org/W1971821649,middle,https://openalex.org/A2192147804,Francisco López Palomeque,https://orcid.org/0000-0002-9020-8061,", Pontificia Universidad Católica del Ecuador,...",https://openalex.org/I39398033,Pontificia Universidad Católica del Ecuador,https://ror.org/02qztda51,EC,...,,,,,False,,,False,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1014741,https://openalex.org/W4318830028,first,https://openalex.org/A4318881374,WERNER P. STRÜMPHER,https://orcid.org/0000-0002-7047-2666,,,,,,...,Zootaxa,Q15088586,journal,,False,,,False,,
1014742,https://openalex.org/W4318830028,last,https://openalex.org/A640135111,Aparna Sureshchandra Kalawate,https://orcid.org/0000-0001-6595-6749,,,,,,...,Zootaxa,Q15088586,journal,,False,,,False,,
1014743,https://openalex.org/W4318220546,first,https://openalex.org/A4318235369,Luis FIGUEROA,,"Universidad Nacional Mayor de San Marcos, Per...",https://openalex.org/I192513696,National University of San Marcos,https://ror.org/006vs7897,PE,...,Acta Amazonica,National Institute of Amazonian Research,journal,,False,,,False,closed,
1014744,https://openalex.org/W4318220546,middle,https://openalex.org/A4318235370,Julián CLAVIJO-BUSTOS,,Instituto de Investigación de Recursos Biológi...,https://openalex.org/I2799870028,Alexander von Humboldt Biological Resources Re...,https://ror.org/026dk4f10,CO,...,Acta Amazonica,National Institute of Amazonian Research,journal,,False,,,False,closed,


In [34]:
singles_insects = get_single_authors(authors_insects)
singles_insects

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2912238117,first,https://openalex.org/A2042360716,N Kotlarewski,https://orcid.org/0000-0003-2873-9547,"Australian Research Council, Centre for Forest...",https://openalex.org/I1337719021,Australian Research Council,https://ror.org/05mmh0f86,AU,...,Fragmenta entomologica,PAGEPress (Italy),journal,https://doi.org/10.4081/fe.2018.279,True,publishedVersion,cc-by-nc,True,gold,https://doi.org/10.4081/fe.2018.279
1,https://openalex.org/W2912238117,middle,https://openalex.org/A2042360716,N Kotlarewski,https://orcid.org/0000-0003-2873-9547,"Australian Research Council, Centre for Forest...",https://openalex.org/I1337719021,Australian Research Council,https://ror.org/05mmh0f86,AU,...,Fragmenta entomologica,PAGEPress (Italy),journal,https://doi.org/10.4081/fe.2018.279,True,publishedVersion,cc-by-nc,True,gold,https://doi.org/10.4081/fe.2018.279
2,https://openalex.org/W4307367223,middle,https://openalex.org/A4297053647,WERONIKA ŁASKA,,"Faculty of Geography and Geology, Institute of...",https://openalex.org/I126596746,Jagiellonian University,https://ror.org/03bqmcz70,PL,...,The Holocene,SAGE Publishing,journal,,False,,,False,closed,
3,https://openalex.org/W2007917710,first,https://openalex.org/A2096908627,Carissa L. Free,,"School of Agriculture and Food Sciences, Unive...",https://openalex.org/I165143802,University of Queensland,https://ror.org/00rqy9422,AU,...,Foods,MDPI,journal,https://www.mdpi.com/2304-8158/11/19/3075/pdf?...,True,publishedVersion,cc-by,True,gold,https://www.mdpi.com/2304-8158/11/19/3075/pdf?...
4,https://openalex.org/W2007917710,first,https://openalex.org/A2096908627,Carissa L. Free,,"School of Agriculture and Food Sciences, Unive...",https://openalex.org/I165143802,University of Queensland,https://ror.org/00rqy9422,AU,...,Insects,MDPI,journal,https://www.mdpi.com/2075-4450/9/4/170/pdf,True,publishedVersion,cc-by,True,gold,https://www.mdpi.com/2075-4450/9/4/170/pdf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
493382,https://openalex.org/W3046526526,first,https://openalex.org/A2205366518,Michael Whitby,,"Bat Conservation International, 1012 14th St N...",https://openalex.org/I2799688433,Bat Conservation International,https://ror.org/04jp6nz39,US,...,Acta Entomologica Musei Nationalis Pragae,"Národní Muzeum (National Museum, Prague)",journal,https://biotaxa.org/AEMNP/article/download/227...,False,,,False,,
493383,https://openalex.org/W3046526526,first,https://openalex.org/A2205366518,Michael Whitby,,"Bat Conservation International, 1012 14th St N...",https://openalex.org/I2799688433,Bat Conservation International,https://ror.org/04jp6nz39,US,...,Zootaxa,Q15088586,journal,,False,,,False,closed,
493384,https://openalex.org/W3046526526,first,https://openalex.org/A2205366518,Michael Whitby,,"Bat Conservation International, 1012 14th St N...",https://openalex.org/I2799688433,Bat Conservation International,https://ror.org/04jp6nz39,US,...,"Agriculture, Ecosystems & Environment",Elsevier BV,journal,,False,,,False,closed,
493385,https://openalex.org/W2757474261,middle,https://openalex.org/A2141001598,Giuseppe Lo Re,https://orcid.org/0000-0002-7479-948X,"CNR-ISPAAM, Sassari, Italy",,"CNR-ISPAAM, Sassari, Italy",,,...,Scientific Reports,Nature Portfolio,journal,https://www.nature.com/articles/s41598-021-855...,True,publishedVersion,cc-by,True,gold,https://www.nature.com/articles/s41598-021-855...


In [7]:
eu_insects = pd.read_pickle("./data/insect_articles_EU.pkl")

In [8]:
paneu_insects = pd.read_pickle("./data/insect_articles_paneurope.pkl")

In [9]:
eu_authors = get_authors(eu_insects)
eu_authors

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated,order
0,https://openalex.org/W2395607258,first,https://openalex.org/A2629692510,Matej Bocek,https://orcid.org/0000-0002-3398-6078,"Laboratory of Molecular Systematics, Departmen...",https://openalex.org/I70703428,"Palacký University, Olomouc",https://ror.org/04qxnmv42,CZ,...,"[https://openalex.org/W1588508053, https://ope...","[https://openalex.org/W1979480035, https://ope...",https://api.openalex.org/works/W2395607258/ngrams,"{'Species': [0], 'delimitation': [1, 195], 'wa...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2023-01-20T10:56:17.511997,2016-06-24,,Coleoptera
1,https://openalex.org/W2395607258,last,https://openalex.org/A2057892699,Ladislav Bocak,https://orcid.org/0000-0001-6382-8006,"Laboratory of Molecular Systematics, Departmen...",https://openalex.org/I70703428,"Palacký University, Olomouc",https://ror.org/04qxnmv42,CZ,...,"[https://openalex.org/W1588508053, https://ope...","[https://openalex.org/W1979480035, https://ope...",https://api.openalex.org/works/W2395607258/ngrams,"{'Species': [0], 'delimitation': [1, 195], 'wa...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2023-01-20T10:56:17.511997,2016-06-24,,Coleoptera
2,https://openalex.org/W2470267224,first,https://openalex.org/A2309831076,David Peris,https://orcid.org/0000-0001-9912-8802,Universitat de Barcelona,https://openalex.org/I71999127,University of Barcelona,https://ror.org/021018s57,ES,...,"[https://openalex.org/W1492713357, https://ope...","[https://openalex.org/W1581826312, https://ope...",https://api.openalex.org/works/W2470267224/ngrams,"{'Abstract': [0], 'A': [1], 'new': [2, 105], '...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-02T10:14:06.454186,2016-07-22,,Coleoptera
3,https://openalex.org/W2470267224,first,https://openalex.org/A2309831076,David Peris,https://orcid.org/0000-0001-9912-8802,Universitat de Barcelona,https://openalex.org/I71999127,University of Barcelona,https://ror.org/021018s57,ES,...,"[https://openalex.org/W1492713357, https://ope...","[https://openalex.org/W1581826312, https://ope...",https://api.openalex.org/works/W2470267224/ngrams,"{'Abstract': [0], 'A': [1], 'new': [2, 105], '...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-02T10:14:06.454186,2016-07-22,,Diptera
4,https://openalex.org/W2470267224,last,https://openalex.org/A2225365894,Jiří Háva,,Czech University of Life Sciences Kamýcká 1176,https://openalex.org/I205984670,Czech University of Life Sciences Prague,https://ror.org/0415vcw02,CZ,...,"[https://openalex.org/W1492713357, https://ope...","[https://openalex.org/W1581826312, https://ope...",https://api.openalex.org/works/W2470267224/ngrams,"{'Abstract': [0], 'A': [1], 'new': [2, 105], '...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-02T10:14:06.454186,2016-07-22,,Coleoptera
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295508,https://openalex.org/W4220779987,first,https://openalex.org/A2111295547,Rongrong Shen,,"<idGroup xmlns=""http://www.wiley.com/namespace...",https://openalex.org/I52158045,China Agricultural University,https://ror.org/04v3ywz14,CN,...,"[https://openalex.org/W1524915722, https://ope...","[https://openalex.org/W272648321, https://open...",https://api.openalex.org/works/W4220779987/ngrams,"{'Inocelliidae': [0, 171, 248], 'is': [1, 31, ...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2022, 'cited_by_count': 1}]",2023-01-31T06:51:37.351256,2022-04-03,,Raphidioptera
295509,https://openalex.org/W4220779987,middle,https://openalex.org/A708656764,Horst Aspöck,https://orcid.org/0000-0001-9407-3566,"<idGroup xmlns=""http://www.wiley.com/namespace...",https://openalex.org/I76134821,Medical University of Vienna,https://ror.org/05n3x4p02,AT,...,"[https://openalex.org/W1524915722, https://ope...","[https://openalex.org/W272648321, https://open...",https://api.openalex.org/works/W4220779987/ngrams,"{'Inocelliidae': [0, 171, 248], 'is': [1, 31, ...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2022, 'cited_by_count': 1}]",2023-01-31T06:51:37.351256,2022-04-03,,Raphidioptera
295510,https://openalex.org/W4220779987,middle,https://openalex.org/A1381364099,Ulrike Aspöck,https://orcid.org/0000-0003-1912-2609,"<idGroup xmlns=""http://www.wiley.com/namespace...",,"<idGroup xmlns=""http://www.wiley.com/namespace...",,,...,"[https://openalex.org/W1524915722, https://ope...","[https://openalex.org/W272648321, https://open...",https://api.openalex.org/works/W4220779987/ngrams,"{'Inocelliidae': [0, 171, 248], 'is': [1, 31, ...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2022, 'cited_by_count': 1}]",2023-01-31T06:51:37.351256,2022-04-03,,Raphidioptera
295511,https://openalex.org/W4220779987,middle,https://openalex.org/A2136092501,John Plant,,"<idGroup xmlns=""http://www.wiley.com/namespace...",,"<idGroup xmlns=""http://www.wiley.com/namespace...",,,...,"[https://openalex.org/W1524915722, https://ope...","[https://openalex.org/W272648321, https://open...",https://api.openalex.org/works/W4220779987/ngrams,"{'Inocelliidae': [0, 171, 248], 'is': [1, 31, ...",https://api.openalex.org/works?filter=cites:W4...,"[{'year': 2022, 'cited_by_count': 1}]",2023-01-31T06:51:37.351256,2022-04-03,,Raphidioptera


In [10]:
eu_single_authors = get_single_authors(eu_authors)
eu_single_authors

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated,order
0,https://openalex.org/W2296586433,middle,https://openalex.org/A148011749,Thibaut De Meulemeester,,"Naturalis Biodiversity Center; Darwinweg 2, Po...",https://openalex.org/I1295562517,Naturalis Biodiversity Center,https://ror.org/0566bfb96,NL,...,"[https://openalex.org/W1113752191, https://ope...","[https://openalex.org/W1970080030, https://ope...",https://api.openalex.org/works/W2296586433/ngrams,"{'The': [0], 'current': [1], 'bumblebee': [2, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2023-01-20T06:34:31.661547,2016-06-24,,Hymenoptera
1,https://openalex.org/W1972905674,first,https://openalex.org/A1971633290,Adrien Merville,,"INSA-Lyon, UMR203 BF2I, INRA, Biologie Fonctio...",https://openalex.org/I4210129169,Biologie Fonctionnelle Insectes et Interactions,https://ror.org/03d1jma17,FR,...,"[https://openalex.org/W1482207190, https://ope...","[https://openalex.org/W1972905674, https://ope...",https://api.openalex.org/works/W1972905674/ngrams,"{'BackgroundWhereas': [0], 'the': [1, 6, 18, 3...",https://api.openalex.org/works?filter=cites:W1...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2023-01-28T22:21:41.190777,2016-06-24,,Coleoptera
2,https://openalex.org/W2899728726,middle,https://openalex.org/A2193365942,Hans-Hermann Thulke,https://orcid.org/0000-0002-7670-2231,,,,,,...,"[https://openalex.org/W1970237739, https://ope...","[https://openalex.org/W38938036, https://opena...",https://api.openalex.org/works/W2899728726/ngrams,"{'The': [0, 131], 'Panel': [1], 'on': [2, 70, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 1}, {'year':...",2023-01-30T00:15:39.975476,2018-11-16,,Coleoptera
3,https://openalex.org/W1989016437,middle,https://openalex.org/A1936048201,Cécile Clouet,,Centre de Biologie et de Gestion des Populatio...,https://openalex.org/I4210147278,Centre de Biologie et de Gestion des Populations,https://ror.org/05h7ddb14,FR,...,"[https://openalex.org/W1527063271, https://ope...","[https://openalex.org/W198468377, https://open...",https://api.openalex.org/works/W1989016437/ngrams,,https://api.openalex.org/works?filter=cites:W1...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2023-01-25T08:27:12.127090,2016-06-24,,Hemiptera
4,https://openalex.org/W2891309143,middle,https://openalex.org/A2075937296,Martin Grube,https://orcid.org/0000-0001-6940-5282,"Institute of Plant Sciences, Karl Franzens Uni...",https://openalex.org/I15766117,University of Graz,https://ror.org/01faaaf77,AT,...,"[https://openalex.org/W433978374, https://open...","[https://openalex.org/W1986682860, https://ope...",https://api.openalex.org/works/W2891309143/ngrams,"{'Leaf-inhabiting': [0], 'fungi': [1, 90, 127]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-01-22T22:43:06.575806,2018-09-27,,Lepidoptera
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157843,https://openalex.org/W2968256771,middle,https://openalex.org/A2469802183,Nicolai V. Meyling,https://orcid.org/0000-0002-1147-8160,Department of Plant and Environmental Sciences...,https://openalex.org/I124055696,University of Copenhagen,https://ror.org/035b05819,DK,...,"[https://openalex.org/W106682486, https://open...","[https://openalex.org/W630885852, https://open...",https://api.openalex.org/works/W2968256771/ngrams,"{'The': [0, 91], 'effect': [1, 197], 'of': [2,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-01-23T09:32:19.488017,2019-08-22,,Diptera
157844,https://openalex.org/W2968256771,middle,https://openalex.org/A2469802183,Nicolai V. Meyling,https://orcid.org/0000-0002-1147-8160,Department of Plant and Environmental Sciences...,https://openalex.org/I124055696,University of Copenhagen,https://ror.org/035b05819,DK,...,"[https://openalex.org/W106682486, https://open...","[https://openalex.org/W630885852, https://open...",https://api.openalex.org/works/W2968256771/ngrams,"{'The': [0, 91], 'effect': [1, 197], 'of': [2,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-01-23T09:32:19.488017,2019-08-22,,Lepidoptera
157845,https://openalex.org/W2968256771,middle,https://openalex.org/A2469802183,Nicolai V. Meyling,https://orcid.org/0000-0002-1147-8160,Department of Plant and Environmental Sciences...,https://openalex.org/I124055696,University of Copenhagen,https://ror.org/035b05819,DK,...,"[https://openalex.org/W106682486, https://open...","[https://openalex.org/W630885852, https://open...",https://api.openalex.org/works/W2968256771/ngrams,"{'The': [0, 91], 'effect': [1, 197], 'of': [2,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-01-23T09:32:19.488017,2019-08-22,,Neuroptera
157846,https://openalex.org/W2968256771,middle,https://openalex.org/A2469802183,Nicolai V. Meyling,https://orcid.org/0000-0002-1147-8160,Department of Plant and Environmental Sciences...,https://openalex.org/I124055696,University of Copenhagen,https://ror.org/035b05819,DK,...,"[https://openalex.org/W106682486, https://open...","[https://openalex.org/W630885852, https://open...",https://api.openalex.org/works/W2968256771/ngrams,"{'The': [0, 91], 'effect': [1, 197], 'of': [2,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-01-23T09:32:19.488017,2019-08-22,,Hymenoptera


In [13]:
eu_single_authors.drop_duplicates(subset="author_id")

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated,order
0,https://openalex.org/W2296586433,middle,https://openalex.org/A148011749,Thibaut De Meulemeester,,"Naturalis Biodiversity Center; Darwinweg 2, Po...",https://openalex.org/I1295562517,Naturalis Biodiversity Center,https://ror.org/0566bfb96,NL,...,"[https://openalex.org/W1113752191, https://ope...","[https://openalex.org/W1970080030, https://ope...",https://api.openalex.org/works/W2296586433/ngrams,"{'The': [0], 'current': [1], 'bumblebee': [2, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2023-01-20T06:34:31.661547,2016-06-24,,Hymenoptera
1,https://openalex.org/W1972905674,first,https://openalex.org/A1971633290,Adrien Merville,,"INSA-Lyon, UMR203 BF2I, INRA, Biologie Fonctio...",https://openalex.org/I4210129169,Biologie Fonctionnelle Insectes et Interactions,https://ror.org/03d1jma17,FR,...,"[https://openalex.org/W1482207190, https://ope...","[https://openalex.org/W1972905674, https://ope...",https://api.openalex.org/works/W1972905674/ngrams,"{'BackgroundWhereas': [0], 'the': [1, 6, 18, 3...",https://api.openalex.org/works?filter=cites:W1...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2023-01-28T22:21:41.190777,2016-06-24,,Coleoptera
2,https://openalex.org/W2899728726,middle,https://openalex.org/A2193365942,Hans-Hermann Thulke,https://orcid.org/0000-0002-7670-2231,,,,,,...,"[https://openalex.org/W1970237739, https://ope...","[https://openalex.org/W38938036, https://opena...",https://api.openalex.org/works/W2899728726/ngrams,"{'The': [0, 131], 'Panel': [1], 'on': [2, 70, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 1}, {'year':...",2023-01-30T00:15:39.975476,2018-11-16,,Coleoptera
3,https://openalex.org/W1989016437,middle,https://openalex.org/A1936048201,Cécile Clouet,,Centre de Biologie et de Gestion des Populatio...,https://openalex.org/I4210147278,Centre de Biologie et de Gestion des Populations,https://ror.org/05h7ddb14,FR,...,"[https://openalex.org/W1527063271, https://ope...","[https://openalex.org/W198468377, https://open...",https://api.openalex.org/works/W1989016437/ngrams,,https://api.openalex.org/works?filter=cites:W1...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2023-01-25T08:27:12.127090,2016-06-24,,Hemiptera
4,https://openalex.org/W2891309143,middle,https://openalex.org/A2075937296,Martin Grube,https://orcid.org/0000-0001-6940-5282,"Institute of Plant Sciences, Karl Franzens Uni...",https://openalex.org/I15766117,University of Graz,https://ror.org/01faaaf77,AT,...,"[https://openalex.org/W433978374, https://open...","[https://openalex.org/W1986682860, https://ope...",https://api.openalex.org/works/W2891309143/ngrams,"{'Leaf-inhabiting': [0], 'fungi': [1, 90, 127]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-01-22T22:43:06.575806,2018-09-27,,Lepidoptera
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157790,https://openalex.org/W2560608333,last,https://openalex.org/A2124310331,Barbara Ekbom,,"Swedish Univ. of Agricultural Sciences, Uppsal...",https://openalex.org/I298625061,Swedish University of Agricultural Sciences,https://ror.org/02yy8x990,SE,...,"[https://openalex.org/W178446579, https://open...","[https://openalex.org/W1515979681, https://ope...",https://api.openalex.org/works/W2560608333/ngrams,"{'Integrating': [0], 'supporting': [1], 'and':...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-01-31T14:48:41.928751,2016-12-16,,Coleoptera
157806,https://openalex.org/W2119401136,middle,https://openalex.org/A2058359948,Tiina Ylioja,,"Finnish Forest Research Institute, Vantaa (Fin...",https://openalex.org/I164042929,Finnish Forest Research Institute,,FI,...,"[https://openalex.org/W193449909, https://open...","[https://openalex.org/W1492841635, https://ope...",https://api.openalex.org/works/W2119401136/ngrams,"{'Among': [0], 'the': [1, 16, 28, 78, 94, 110,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 4}, {'year':...",2023-02-01T14:29:43.456491,2016-06-24,,Coleoptera
157807,https://openalex.org/W2804175287,last,https://openalex.org/A2144340538,Michael J. Stout,https://orcid.org/0000-0002-1262-1408,"Department of Entomology, Louisiana State Univ...",https://openalex.org/I134386540,Louisiana State University Agricultural Center,https://ror.org/01b8rza40,US,...,"[https://openalex.org/W279804777, https://open...","[https://openalex.org/W1995270920, https://ope...",https://api.openalex.org/works/W2804175287/ngrams,"{'Plants': [0], 'face': [1], 'numerous': [2], ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-01-30T20:41:00.354340,2018-06-01,,Coleoptera
157811,https://openalex.org/W2968256771,middle,https://openalex.org/A2469802183,Nicolai V. Meyling,https://orcid.org/0000-0002-1147-8160,Department of Plant and Environmental Sciences...,https://openalex.org/I124055696,University of Copenhagen,https://ror.org/035b05819,DK,...,"[https://openalex.org/W106682486, https://open...","[https://openalex.org/W630885852, https://open...",https://api.openalex.org/works/W2968256771/ngrams,"{'The': [0, 91], 'effect': [1, 197], 'of': [2,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-01-23T09:32:19.488017,2019-08-22,,Coleoptera


In [14]:
eu_single_authors.drop_duplicates(subset="author_id")["orcid"].count() 

15881

In [None]:
paneu_authors = get_authors(paneu_insects)
paneu_authors

In [None]:
paneu_single_authors = get_single_authors(paneu_authors)
paneu_single_authors