# 1. Using OpenAlex to find taxonomists

## 1.2. Preprocessing OpenAlex article data into author data

Previously, we found a list of articles of taxonomic interest. Ultimately, we are interested in the authors, whom we assume are taxonomists or at least have relevant expertise about the taxon studied in the paper. We extract the information of the authors from the OpenAlex article data here. 

In [1]:
import pandas as pd
import matplotlib.pyplot as plt # version 3.5.2
import pickle

## Functions

In [2]:
# information locked in dictionaries inside the dataframe: open access, host (journal)

def get_dict_info(df_input): # input: articles straight from openalex
    hostcols = ['host_id', 'issn_l', 'issn', 'host_display_name', 'publisher',
                'host_type', 'host_url', 'is_host_oa', 'host_version', 'host_license']
    
    #df = pd.DataFrame(columns = hostcols + ["is_oa", "oa_status", "oa_url"])
    new_rows = []
    
    for article in df_input.itertuples():
        # get host (journal) info
        # if there is a list within the dictionary, pandas will turn it into two rows
        if article.host_venue["issn"] != None and len(article.host_venue["issn"]) != 1:
            article.host_venue["issn"] = '\n'.join(article.host_venue["issn"])
            
        l_journal = list(article.host_venue.values())
        l_oa = list(article.open_access.values())
        # unite open access and journal info from this article and previous articles
        l_new = l_journal + l_oa
        
        new_rows.append(l_new)
    
    # unite data in dictionaries with accessible data
    new_df = pd.DataFrame(new_rows, columns=hostcols + ["is_oa", "oa_status", "oa_url"])
    return df_input.merge(new_df, left_index=True, right_index=True)

In [3]:
# get authorship information from raw dataframe WITH all other data

def get_authors(df_input): # input: articles after get_dict_info
    # create empty dataframe with all authorship attributes
    df = pd.DataFrame()
    authors_list = []
    
    for article in df_input.itertuples():
        authors = pd.DataFrame(article.authorships)
        
        if len(authors) != 0:
            # disassemble author info
            for author in authors.itertuples():
                new_info = [article.id]+[author.author_position]+list(author.author.values())+[author.raw_affiliation_string]
                
                # add institution info
                if len(author.institutions) != 0:
                    new_info += list(author.institutions[0].values()) 
                else:
                    # no institution, no info
                    new_info += [None, None, None, None, None]
                authors_list.append(new_info) 
    
    new_df = pd.DataFrame(authors_list, 
                          columns=["article_id", "author_position", "author_id", "author_display_name", "orcid",
                                   "raw_affiliation_string", 
                                   "inst_id", "inst_display_name", "ror", "inst_country_code", "inst_type"])
    df = pd.concat([df, new_df])
    
    return pd.merge(df, df_input, left_on="article_id", right_on="id")

In [48]:
# keep most recent publication per author

def get_single_authors(df_input): # input: authors with doubles
    keep = []
    
    for author in set(df_input["author_id"]):
        # get all rows that match author
        publications = df_input[df_input["author_id"]==author]
        # get most recent one
        most_recent = publications["publication_date"].max()
        keep += publications[publications["publication_date"]==most_recent].values.tolist()
    
    keep_df = pd.DataFrame(keep,
                           columns = df_input.columns)
    
    # drop duplicates because some articles may have been found twice through different queries 
    # and some authors have published multiple relevant articles on the same day
    return keep_df.drop_duplicates(subset=["author_id"])

In [5]:
# filter a list of authors for authors who are asscociated with at least one European institution

def get_european_authors(df_input, pan_europe=False): # input: authors
    keep = []
    
    eu_codes = ["AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", "FI", "FR", "DE", "GR", "HU", "IE", # EU
                  "IT", "LV", "LT", "LU", "MT", "NL", "PL", "PT", "RO", "SK", "SI", "ES", "SE"] # EU
    paneu_codes = ["IS", "LI", "NO", "CH", "AL", "ME", "MK", "RS", "TR", "AD", "BY", "BA", "MD", "MC", # pan-Europe
                   "RU", "SM", "UA", "GB", "VA", "GE", "AM", "AZ"] # pan-Europe
    
    for author in df_input.itertuples():
        # check every affiliated institute
        if author.inst_country_code in eu_codes:
            keep.append(author)
        elif pan_europe and author.inst_country_code in paneu_codes:
            keep.append(author)
    
    return pd.DataFrame(keep)

## Results

In [49]:
eu_tax_articles = pd.read_pickle("./data/keyword_filtered_articles_EU27_with_taxonomy_concept_in_journal.pkl")
eu_tax_articles = get_dict_info(eu_tax_articles)
eu_tax_articles

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,primary_location,host_venue,type,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2598914299,https://doi.org/10.11646/bionomina.11.1.1,<p><strong>The nomenclatural status of <em>Hys...,<p><strong>The nomenclatural status of <em>Hys...,2017,2017-03-04,{'openalex': 'https://openalex.org/W2598914299...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,Bionomina,Q15088586,journal,,False,,,False,closed,
1,https://openalex.org/W2555892112,https://doi.org/10.11646/bionomina.10.1.1,"&lt;p&gt;&lt;strong&gt;Classes, taxa and categ...","&lt;p&gt;&lt;strong&gt;Classes, taxa and categ...",2016,2016-11-10,{'openalex': 'https://openalex.org/W2555892112...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,Bionomina,Q15088586,journal,https://www.mapress.com/j/bn/article/download/...,True,publishedVersion,,True,bronze,https://www.mapress.com/j/bn/article/download/...
2,https://openalex.org/W2597855587,https://doi.org/10.11646/bionomina.12.1.2,<strong>The need for reference specimens in zo...,<strong>The need for reference specimens in zo...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2597855587...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,Bionomina,Q15088586,journal,,False,,,False,closed,
3,https://openalex.org/W2611147945,https://doi.org/10.11646/bionomina.12.1.8,&lt;strong&gt;Diagnoses in zoological taxonomy...,&lt;strong&gt;Diagnoses in zoological taxonomy...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2611147945...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,Bionomina,Q15088586,journal,,False,,,False,closed,
4,https://openalex.org/W2996133267,https://doi.org/10.11646/bionomina.17.1.1,"<p align=""left""><strong>The Linz <em>Zoocode</...","<p align=""left""><strong>The Linz <em>Zoocode</...",2019,2019-12-17,{'openalex': 'https://openalex.org/W2996133267...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,Bionomina,Q15088586,journal,https://www.mapress.com/bn/article/download/bi...,True,publishedVersion,,True,bronze,https://www.mapress.com/bn/article/download/bi...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12115,https://openalex.org/W3193573934,https://doi.org/10.31610/zsr/2021.30.2.169,First record of the genus Colopterus (Coleopte...,First record of the genus Colopterus (Coleopte...,2021,2021-08-14,{'openalex': 'https://openalex.org/W3193573934...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,
12116,https://openalex.org/W3202031023,https://doi.org/10.31610/zsr/2021.30.2.190,New species and new records of cuckoo wasps (H...,New species and new records of cuckoo wasps (H...,2021,2021-10-05,{'openalex': 'https://openalex.org/W3202031023...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,
12117,https://openalex.org/W4226104797,https://doi.org/10.31610/zsr/2022.31.1.27,A new species Chrysotus hubenovi and new data ...,A new species Chrysotus hubenovi and new data ...,2022,2022-04-04,{'openalex': 'https://openalex.org/W4226104797...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,
12118,https://openalex.org/W4302015794,https://doi.org/10.31610/zsr/2022.31.2.182,New records of Bibionidae (Diptera) from Azerb...,New records of Bibionidae (Diptera) from Azerb...,2022,2022-10-04,{'openalex': 'https://openalex.org/W4302015794...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,


In [50]:
authors_eu_tax = get_authors(eu_tax_articles)
authors_eu_tax

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2598914299,first,https://openalex.org/A2596859121,Alain Dubois,https://orcid.org/0000-0002-6463-3435,"Institut de Systématique, Évolution, Biodivers...",https://openalex.org/I4210103322,"Institut de Systématique, Évolution, Biodiversité",https://ror.org/01dadvw90,FR,...,Bionomina,Q15088586,journal,,False,,,False,closed,
1,https://openalex.org/W2555892112,first,https://openalex.org/A164956482,Marc H.V. Van Regenmortel,https://orcid.org/0000-0002-8200-8465,UMR 7242 Biotechnologie et Signalisation Cellu...,https://openalex.org/I4210145673,Biotechnologie et Signalisation Cellulaire,https://ror.org/047fwb937,FR,...,Bionomina,Q15088586,journal,https://www.mapress.com/j/bn/article/download/...,True,publishedVersion,,True,bronze,https://www.mapress.com/j/bn/article/download/...
2,https://openalex.org/W2597855587,first,https://openalex.org/A2596859121,Alain Dubois,https://orcid.org/0000-0002-6463-3435,"Institut de Systématique, Évolution, Biodivers...",https://openalex.org/I4210103322,"Institut de Systématique, Évolution, Biodiversité",https://ror.org/01dadvw90,FR,...,Bionomina,Q15088586,journal,,False,,,False,closed,
3,https://openalex.org/W2611147945,first,https://openalex.org/A2596859121,Alain Dubois,https://orcid.org/0000-0002-6463-3435,"Institut de Systématique, Évolution, Biodivers...",https://openalex.org/I4210103322,"Institut de Systématique, Évolution, Biodiversité",https://ror.org/01dadvw90,FR,...,Bionomina,Q15088586,journal,,False,,,False,closed,
4,https://openalex.org/W2996133267,first,https://openalex.org/A2596859121,Alain Dubois,https://orcid.org/0000-0002-6463-3435,"Institut De Systématique, Évolution, Biodivers...",https://openalex.org/I4210103322,"Institut de Systématique, Évolution, Biodiversité",https://ror.org/01dadvw90,FR,...,Bionomina,Q15088586,journal,https://www.mapress.com/bn/article/download/bi...,True,publishedVersion,,True,bronze,https://www.mapress.com/bn/article/download/bi...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52648,https://openalex.org/W4302015794,middle,https://openalex.org/A4302125241,"Manko, Peter",,"Department of Ecology, Faculty of Humanities a...",https://openalex.org/I173161963,University of Prešov,https://ror.org/02ndfsn03,SK,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,
52649,https://openalex.org/W4302015794,last,https://openalex.org/A4302125242,"Oboňa, Jozef",,"Department of Ecology, Faculty of Humanities a...",https://openalex.org/I173161963,University of Prešov,https://ror.org/02ndfsn03,SK,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,
52650,https://openalex.org/W4309431963,first,https://openalex.org/A4309432000,"Dell'Angelo, Bruno",,"Via Briscata 16, 16154, Genova, Italy",,"Via Briscata 16, 16154, Genova, Italy",,,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,
52651,https://openalex.org/W4309431963,middle,https://openalex.org/A4309432001,"Sirenko, Boris I.",,"Zoological Institute, Russian Academy of Scien...",https://openalex.org/I2801680271,Zoological Institute,https://ror.org/05snbjh64,RU,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,


In [51]:
only_eu_authors = get_european_authors(authors_eu_tax)
only_eu_authors

Unnamed: 0,Index,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,0,https://openalex.org/W2598914299,first,https://openalex.org/A2596859121,Alain Dubois,https://orcid.org/0000-0002-6463-3435,"Institut de Systématique, Évolution, Biodivers...",https://openalex.org/I4210103322,"Institut de Systématique, Évolution, Biodiversité",https://ror.org/01dadvw90,...,Bionomina,Q15088586,journal,,False,,,False,closed,
1,1,https://openalex.org/W2555892112,first,https://openalex.org/A164956482,Marc H.V. Van Regenmortel,https://orcid.org/0000-0002-8200-8465,UMR 7242 Biotechnologie et Signalisation Cellu...,https://openalex.org/I4210145673,Biotechnologie et Signalisation Cellulaire,https://ror.org/047fwb937,...,Bionomina,Q15088586,journal,https://www.mapress.com/j/bn/article/download/...,True,publishedVersion,,True,bronze,https://www.mapress.com/j/bn/article/download/...
2,2,https://openalex.org/W2597855587,first,https://openalex.org/A2596859121,Alain Dubois,https://orcid.org/0000-0002-6463-3435,"Institut de Systématique, Évolution, Biodivers...",https://openalex.org/I4210103322,"Institut de Systématique, Évolution, Biodiversité",https://ror.org/01dadvw90,...,Bionomina,Q15088586,journal,,False,,,False,closed,
3,3,https://openalex.org/W2611147945,first,https://openalex.org/A2596859121,Alain Dubois,https://orcid.org/0000-0002-6463-3435,"Institut de Systématique, Évolution, Biodivers...",https://openalex.org/I4210103322,"Institut de Systématique, Évolution, Biodiversité",https://ror.org/01dadvw90,...,Bionomina,Q15088586,journal,,False,,,False,closed,
4,4,https://openalex.org/W2996133267,first,https://openalex.org/A2596859121,Alain Dubois,https://orcid.org/0000-0002-6463-3435,"Institut De Systématique, Évolution, Biodivers...",https://openalex.org/I4210103322,"Institut de Systématique, Évolution, Biodiversité",https://ror.org/01dadvw90,...,Bionomina,Q15088586,journal,https://www.mapress.com/bn/article/download/bi...,True,publishedVersion,,True,bronze,https://www.mapress.com/bn/article/download/bi...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26075,52643,https://openalex.org/W4226104797,first,https://openalex.org/A2680309858,Mihail Kechev,,"Forest Research Institute, Bulgarian Academy o...",https://openalex.org/I24768866,Bulgarian Academy of Sciences,https://ror.org/01x8hew03,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,
26076,52646,https://openalex.org/W4302015794,first,https://openalex.org/A4302125239,"Dvořák, Libor",,"Tři Sekery 21, CZ – 353 01 Mariánské Lázně, Cz...",https://openalex.org/I4210137292,Třinecké Železárny,https://ror.org/041c53527,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,
26077,52648,https://openalex.org/W4302015794,middle,https://openalex.org/A4302125241,"Manko, Peter",,"Department of Ecology, Faculty of Humanities a...",https://openalex.org/I173161963,University of Prešov,https://ror.org/02ndfsn03,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,
26078,52649,https://openalex.org/W4302015794,last,https://openalex.org/A4302125242,"Oboňa, Jozef",,"Department of Ecology, Faculty of Humanities a...",https://openalex.org/I173161963,University of Prešov,https://ror.org/02ndfsn03,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,


In [52]:
single_eu_authors = get_single_authors(only_eu_authors).reset_index(drop=True)
single_eu_authors

Unnamed: 0,Index,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,45521,https://openalex.org/W1198660990,middle,https://openalex.org/A1972576541,Bernard Laurin,,"Biogéosciences, UMR CNRS 6282, Université de B...",https://openalex.org/I4210158426,Biogéosciences,https://ror.org/04mzqjs78,...,Molecular Phylogenetics and Evolution,Elsevier BV,journal,,False,,,True,green,https://hal.archives-ouvertes.fr/hal-01205849/...
1,14863,https://openalex.org/W3135121471,last,https://openalex.org/A2223790226,Roberto Venanzoni,https://orcid.org/0000-0002-7768-0468,"Department of Chemistry, Biology and Biotechno...",https://openalex.org/I27483092,University of Perugia,https://ror.org/00x27da85,...,Phytotaxa,Q15088586,journal,,False,,,False,closed,
2,52330,https://openalex.org/W4200400740,middle,https://openalex.org/A1690698565,Benoit Gouillieux,https://orcid.org/0000-0002-5412-8582,"CNRS, Univ. Bordeaux, EPOC, UMR 5805, Station ...",https://openalex.org/I1294671590,French National Centre for Scientific Research,https://ror.org/02feahw73,...,European journal of taxonomy,Consortium of European Natural History Museums,journal,https://europeanjournaloftaxonomy.eu/index.php...,True,publishedVersion,cc-by,True,gold,https://europeanjournaloftaxonomy.eu/index.php...
3,40361,https://openalex.org/W2916627082,first,https://openalex.org/A1960157388,Filippo Milano,https://orcid.org/0000-0003-3573-2159,"Lab. di Ecologia – Ecosistemi terrestri, Dipar...",https://openalex.org/I55143463,University of Turin,https://ror.org/048tbm396,...,Zoosystema,Muséum national d'histoire naturelle,journal,,False,,,True,green,https://hal-mnhn.archives-ouvertes.fr/mnhn-027...
4,44992,https://openalex.org/W2592793920,middle,https://openalex.org/A2756326157,Arlete Ramos dos Santos,https://orcid.org/0000-0003-0217-3805,i3S - Instituto de Investigação e Inovação em ...,https://openalex.org/I4210158732,i3S - Health Research and Innovation Institute,https://ror.org/04wjk1035,...,Molecular Phylogenetics and Evolution,Elsevier BV,journal,,False,,,False,closed,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9602,12745,https://openalex.org/W2074722678,middle,https://openalex.org/A2279582608,Giovanni Perrone,https://orcid.org/0000-0002-9336-8810,Institute of Sciences of Food Production Natio...,https://openalex.org/I4210131270,Institute of Sciences of Food Production,https://ror.org/03x7xkr71,...,Studies in Mycology,Elsevier BV,journal,https://doi.org/10.1016/j.simyco.2014.07.004,True,publishedVersion,cc-by-nc-nd,True,gold,https://doi.org/10.1016/j.simyco.2014.07.004
9603,34351,https://openalex.org/W3195655516,last,https://openalex.org/A2555322479,Bernard R. Landau,,,https://openalex.org/I1295562517,Naturalis Biodiversity Center,https://ror.org/0566bfb96,...,Zootaxa,Q15088586,journal,,False,,,False,closed,
9604,23180,https://openalex.org/W2789590705,middle,https://openalex.org/A2052733317,Urban Tillmann,https://orcid.org/0000-0002-8207-4382,Alfred-Wegener-Institut – Helmholtz-Zentrum fü...,https://openalex.org/I127251866,Alfred Wegener Institute for Polar and Marine ...,https://ror.org/032e6b942,...,Taxon,Wiley,journal,,False,,,True,green,https://epic.awi.de/id/eprint/46918/1/2018_Got...
9605,16791,https://openalex.org/W2185382798,middle,https://openalex.org/A2147050111,Agustín Caballero,,"C/ Andalucía 3, 4.º dcha. 26500 Calahorra, La ...",https://openalex.org/I918821925,Regional Government of Andalusia,https://ror.org/01jem9c82,...,Phytotaxa,Q15088586,journal,,False,,,False,closed,


In [53]:
only_eu_authors.to_pickle("./data/EU27_authors_with_all_taxonomic_articles.pkl")
single_eu_authors.to_pickle("./data/EU27_authors_taxonomic_articles_no_duplicates.pkl")

We now have a list of all European authors of taxonomic articles from taxonomic journals. 

## Insects (RLIT methodology)

In [54]:
insect_articles = pd.read_pickle("./data/RLIT_method_openalex_all_insect_articles.pkl")
insect_articles = get_dict_info(insect_articles)
insect_articles

Unnamed: 0,id,doi,title,display_name,relevance_score,publication_year,publication_date,ids,primary_location,host_venue,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2075105050,https://doi.org/10.3897/zookeys.186.2947,New species and distributional records of Aleo...,New species and distributional records of Aleo...,186.240310,2012,2012-04-26,{'openalex': 'https://openalex.org/W2075105050...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S199213172', 'iss...",...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
0,https://openalex.org/W1971821649,https://doi.org/10.1590/0074-0276130434,"Rhodnius barretti, a new species of Triatomina...","Rhodnius barretti, a new species of Triatomina...",281.430050,2013,2013-01-01,{'openalex': 'https://openalex.org/W1971821649...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S165991124', 'iss...",...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
0,https://openalex.org/W2188404336,https://doi.org/10.11646/zootaxa.2740.1.1,New species of and taxonomic notes on Anastrep...,New species of and taxonomic notes on Anastrep...,231.417570,2011,2011-01-14,{'openalex': 'https://openalex.org/W2188404336...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S171471881', 'iss...",...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
0,https://openalex.org/W2039380629,https://doi.org/10.1371/journal.pone.0122407,Phylogenetic Molecular Species Delimitations U...,Phylogenetic Molecular Species Delimitations U...,237.500000,2015,2015-04-08,{'openalex': 'https://openalex.org/W2039380629...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S202381698', 'iss...",...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
0,https://openalex.org/W2092029632,https://doi.org/10.1007/s13127-012-0123-1,"Biogeography, ecology, acoustics and chromosom...","Biogeography, ecology, acoustics and chromosom...",174.616700,2013,2013-02-09,{'openalex': 'https://openalex.org/W2092029632...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S190011727', 'iss...",...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11701,https://openalex.org/W2915290188,https://doi.org/10.1134/s0013873818080134,"A Review of the Leaf-Beetle Fauna (Coleoptera,...","A Review of the Leaf-Beetle Fauna (Coleoptera,...",41.702457,2018,2018-11-01,{'openalex': 'https://openalex.org/W2915290188...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S17264655', 'issn...",...,Entomological Review,Pleiades Publishing,journal,,False,,,False,closed,
11702,https://openalex.org/W2185029227,,A preliminary study on the species diversity o...,A preliminary study on the species diversity o...,33.008816,2012,2012-01-01,{'openalex': 'https://openalex.org/W2185029227...,"{'is_oa': None, 'landing_page_url': None, 'pdf...","{'id': None, 'issn_l': None, 'issn': None, 'di...",...,,,,,False,,,False,,
11703,https://openalex.org/W2155430078,https://doi.org/10.4038/tapro.v4i2.5058,Bolboceratine scarabs of genera Bolbohamatum K...,Bolboceratine scarabs of genera Bolbohamatum K...,20.918540,2012,2012-12-25,{'openalex': 'https://openalex.org/W2155430078...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764827249', 'is...",...,TAPROBANICA: The Journal of Asian Biodiversity,"Research Center for Climate Change, University...",journal,http://tapro.sljol.info/articles/10.4038/tapro...,True,publishedVersion,cc-by,True,hybrid,http://tapro.sljol.info/articles/10.4038/tapro...
11704,https://openalex.org/W2923711627,https://doi.org/10.33800/nc.v0i5.141,Corrigenda a los escarabajos (Coleoptera) en l...,Corrigenda a los escarabajos (Coleoptera) en l...,,2012,2012-07-01,{'openalex': 'https://openalex.org/W2923711627...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210185428', 'is...",...,Novitates Caribaea,"Museo Nacional de Historia Natural, Prof. Euge...",journal,http://novitatescaribaea.do/index.php/novitate...,True,publishedVersion,cc-by-nc,True,gold,http://novitatescaribaea.do/index.php/novitate...


In [55]:
authors_insects = get_authors(insect_articles)
authors_insects

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2075105050,first,https://openalex.org/A2634481662,Adam Brunke,https://orcid.org/0000-0003-1158-936X,Zoological Museum (Natural History Museum of D...,https://openalex.org/I4210110903,Natural History Museum,https://ror.org/0166x0j30,DK,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
1,https://openalex.org/W2075105050,middle,https://openalex.org/A1954442483,Jan Klimaszewski,,,,,,,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
2,https://openalex.org/W2075105050,middle,https://openalex.org/A2592732627,Julie-Anne Dorval,,,,,,,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
3,https://openalex.org/W2075105050,middle,https://openalex.org/A2021629399,Caroline Bourdon,,,,,,,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
4,https://openalex.org/W2075105050,middle,https://openalex.org/A1430890116,S. M. Paiero,,,,,,,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199667,https://openalex.org/W2155430078,first,https://openalex.org/A2112005922,Kailash Chandra,https://orcid.org/0000-0001-9076-5442,"Zoological Survey of India, New Alipore, Kolka...",https://openalex.org/I1338006420,Zoological Survey of India,https://ror.org/00h6p6a20,IN,...,TAPROBANICA: The Journal of Asian Biodiversity,"Research Center for Climate Change, University...",journal,http://tapro.sljol.info/articles/10.4038/tapro...,True,publishedVersion,cc-by,True,hybrid,http://tapro.sljol.info/articles/10.4038/tapro...
199668,https://openalex.org/W2155430078,last,https://openalex.org/A2128748613,Devanshu Gupta,https://orcid.org/0000-0001-9188-4689,"Zoological Survey of India, Jabalpur 482002, M...",https://openalex.org/I1338006420,Zoological Survey of India,https://ror.org/00h6p6a20,IN,...,TAPROBANICA: The Journal of Asian Biodiversity,"Research Center for Climate Change, University...",journal,http://tapro.sljol.info/articles/10.4038/tapro...,True,publishedVersion,cc-by,True,hybrid,http://tapro.sljol.info/articles/10.4038/tapro...
199669,https://openalex.org/W2923711627,first,https://openalex.org/A2798555197,Daniel E. Perez-Gelabert,,United States National Museum of Natural Histo...,https://openalex.org/I1341618623,National Museum of Natural History,https://ror.org/00cz47042,US,...,Novitates Caribaea,"Museo Nacional de Historia Natural, Prof. Euge...",journal,http://novitatescaribaea.do/index.php/novitate...,True,publishedVersion,cc-by-nc,True,gold,http://novitatescaribaea.do/index.php/novitate...
199670,https://openalex.org/W2785376982,first,https://openalex.org/A2531322189,Anderson Arenas-Clavijo,https://orcid.org/0000-0001-5639-5273,"Sección de Entomología,#R##N#Departamento de B...",https://openalex.org/I91732220,University of Valle,https://ror.org/00jb9vg53,CO,...,Biota Colombiana,Alexander von Humboldt Biological Resources Re...,journal,https://doi.org/10.21068/c2017.v18n02a19,True,publishedVersion,cc-by-nc-nd,True,gold,https://doi.org/10.21068/c2017.v18n02a19


In [56]:
singles_insects = get_single_authors(authors_insects)
singles_insects

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2991362080,first,https://openalex.org/A2313154617,Marc F. DiGirolomo,https://orcid.org/0000-0001-7596-3109,"(MFD) United States Forest Service, State and ...",https://openalex.org/I1313416372,US Forest Service,https://ror.org/03zmjc935,US,...,Proceedings of the Entomological Society of Wa...,BioOne (Entomological Society of Washington),journal,,False,,,False,closed,
1,https://openalex.org/W3007928204,first,https://openalex.org/A2524792898,Edgar Uriel Garduño-Montes de Oca,,"Departamento de Biología Evolutiva, Facultad d...",https://openalex.org/I8961855,National Autonomous University of Mexico,https://ror.org/01tmp8f25,MX,...,Journal of Parasitology,American Society of Parasitologists,journal,,False,,,False,closed,
2,https://openalex.org/W2978240330,middle,https://openalex.org/A2103720066,Violeta Llanes Hernández,,"Departamento de Ecología y Recursos Naturales,...",https://openalex.org/I8961855,National Autonomous University of Mexico,https://ror.org/01tmp8f25,MX,...,Acta zoológica mexicana,"Instituto de Ecologia, A.C.",journal,https://azm.ojs.inecol.mx/index.php/azm/articl...,True,publishedVersion,cc-by-nc-sa,True,gold,https://azm.ojs.inecol.mx/index.php/azm/articl...
3,https://openalex.org/W3015756137,middle,https://openalex.org/A3012815696,Norbert Kouakou Kouadio,,,,,,,...,Zoosystematica Rossica,Zoological Institute of the Russian Academy of...,journal,,False,,,False,closed,
28,https://openalex.org/W2156582146,middle,https://openalex.org/A2107906951,Jung-Won Hwang,https://orcid.org/0000-0002-0887-6889,"Animal, Plant & Fisheries Quarantine & Inspect...",https://openalex.org/I4210160954,Animal and Plant Quarantine Agency,https://ror.org/04sbe6g90,KR,...,African Invertebrates,Pensoft Publishers,journal,https://doi.org/10.5733/afin.056.0114,True,publishedVersion,cc-by,True,gold,https://doi.org/10.5733/afin.056.0114
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91867,https://openalex.org/W4293109490,first,https://openalex.org/A2589635970,Fernando Rodríguez,https://orcid.org/0000-0002-7237-7443,,,,,,...,Кавказский энтомологический бюллетень,Southern Scientific Center of the Russian Acad...,journal,https://doi.org/10.23885/1814-3326-2012-8-1-21-23,True,publishedVersion,,True,bronze,https://doi.org/10.23885/1814-3326-2012-8-1-21-23
91868,https://openalex.org/W2511140662,last,https://openalex.org/A3030163424,Xuefeng Qin,,"Department of Plant Protection, Henan Institut...",https://openalex.org/I4210163247,Henan Institute of Science and Technology,https://ror.org/0578f1k82,CN,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/35532/down...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/35532/down...
91869,https://openalex.org/W3006371900,middle,https://openalex.org/A2720161091,Victor Phani,,"Division of Nematology, ICAR-Indian Agricultur...",https://openalex.org/I45509622,Indian Agricultural Research Institute,https://ror.org/01bzgdw81,IN,...,HAL (Le Centre pour la Communication Scientifi...,Le Centre pour la Communication Scientifique D...,repository,https://hal.archives-ouvertes.fr/hal-03608424/...,True,submittedVersion,,True,green,https://hal.archives-ouvertes.fr/hal-03608424/...
91870,https://openalex.org/W3116948575,middle,https://openalex.org/A2010641205,Christine Kerschbamer,,"Laimburg Research Centre, Laimburg 6, Pfatten ...",,"Laimburg Research Centre, Laimburg 6, Pfatten ...",,,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/3539/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/3539/downl...


In [57]:
authors_insects.to_pickle("./data/all_authors_insect_articles_RLIT_method.pkl")
singles_insects.to_pickle("./data/authors_insect_articles_RLIT_method_no_duplicates.pkl")

In [58]:
eu_insects = pd.read_pickle("./data/RLIT_method_openalex_EU27_insect_articles.pkl")
eu_insects = get_dict_info(eu_insects)
eu_insects

Unnamed: 0,id,doi,title,display_name,relevance_score,publication_year,publication_date,ids,primary_location,host_venue,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2075105050,https://doi.org/10.3897/zookeys.186.2947,New species and distributional records of Aleo...,New species and distributional records of Aleo...,186.24031,2012,2012-04-26,{'openalex': 'https://openalex.org/W2075105050...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S199213172', 'iss...",...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
1,https://openalex.org/W2074050863,https://doi.org/10.3897/zookeys.250.3715,Introduction of the Exocelina ekari-group with...,Introduction of the Exocelina ekari-group with...,134.67500,2012,2012-12-13,{'openalex': 'https://openalex.org/W2074050863...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S199213172', 'iss...",...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/3440/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/3440/downl...
2,https://openalex.org/W2124627356,https://doi.org/10.3161/000345411x622525,A New Species ofHenosepilachnaLi (Coleoptera: ...,A New Species ofHenosepilachnaLi (Coleoptera: ...,118.18758,2011,2011-12-01,{'openalex': 'https://openalex.org/W2124627356...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S24891482', 'issn...",...,Annales Zoologici,Polish Academy of Sciences,journal,,False,,,False,closed,
3,https://openalex.org/W2470267224,https://doi.org/10.1017/jpa.2016.51,New species from Late Cretaceous New Jersey am...,New species from Late Cretaceous New Jersey am...,113.15040,2016,2016-08-23,{'openalex': 'https://openalex.org/W2470267224...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S143791635', 'iss...",...,Journal of Paleontology,Paleontological Society,journal,,False,,,False,closed,
4,https://openalex.org/W2297951136,https://doi.org/10.3897/zookeys.572.6763,A contribution to the knowledge of the mountai...,A contribution to the knowledge of the mountai...,109.41350,2016,2016-03-15,{'openalex': 'https://openalex.org/W2297951136...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S199213172', 'iss...",...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/6763/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/6763/downl...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,https://openalex.org/W3000603933,https://doi.org/10.3390/insects11010051,Molecular Phylogeny and Infraordinal Classific...,Molecular Phylogeny and Infraordinal Classific...,115.29168,2020,2020-01-12,{'openalex': 'https://openalex.org/W3000603933...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S28057480', 'issn...",...,Insects,MDPI,journal,https://www.mdpi.com/2075-4450/11/1/51/pdf?ver...,True,publishedVersion,cc-by,True,gold,https://www.mdpi.com/2075-4450/11/1/51/pdf?ver...
8120,https://openalex.org/W2300829245,https://doi.org/10.1146/annurev-ento-010715-02...,Structure and Evolution of Insect Sperm: New I...,Structure and Evolution of Insect Sperm: New I...,341.63333,2016,2016-03-16,{'openalex': 'https://openalex.org/W2300829245...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S92576693', 'issn...",...,Annual Review of Entomology,Annual Reviews,journal,,False,,,False,closed,
8121,https://openalex.org/W2792039220,https://doi.org/10.7717/peerj.5126,A reference cytochrome c oxidase subunit I dat...,A reference cytochrome c oxidase subunit I dat...,118.58033,2018,2018-06-26,{'openalex': 'https://openalex.org/W2792039220...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S1983995261', 'is...",...,PeerJ,"PeerJ, Inc.",journal,https://doi.org/10.7717/peerj.5126,True,publishedVersion,cc-by,True,gold,https://doi.org/10.7717/peerj.5126
8122,https://openalex.org/W4243684135,https://doi.org/10.7287/peerj.preprints.26662,A reference cytochrome c oxidase subunit I dat...,A reference cytochrome c oxidase subunit I dat...,,2018,2018-03-12,{'openalex': 'https://openalex.org/W4243684135...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': None, 'issn_l': None, 'issn': None, 'di...",...,,,,https://doi.org/10.7287/peerj.preprints.26662,False,,,True,green,https://europepmc.org/articles/pmc6025149?pdf=...


In [59]:
eu_insect_authors = get_authors(eu_insects)
eu_insect_authors

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,https://openalex.org/W2075105050,first,https://openalex.org/A2634481662,Adam Brunke,https://orcid.org/0000-0003-1158-936X,Zoological Museum (Natural History Museum of D...,https://openalex.org/I4210110903,Natural History Museum,https://ror.org/0166x0j30,DK,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
1,https://openalex.org/W2075105050,middle,https://openalex.org/A1954442483,Jan Klimaszewski,,,,,,,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
2,https://openalex.org/W2075105050,middle,https://openalex.org/A2592732627,Julie-Anne Dorval,,,,,,,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
3,https://openalex.org/W2075105050,middle,https://openalex.org/A2021629399,Caroline Bourdon,,,,,,,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
4,https://openalex.org/W2075105050,middle,https://openalex.org/A1430890116,S. M. Paiero,,,,,,,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44683,https://openalex.org/W4243684135,last,https://openalex.org/A2168305379,Reed F. Johnson,https://orcid.org/0000-0002-4312-0550,"Department of Entomology, Ohio State Universit...",,"Department of Entomology, Ohio State Universit...",,,...,,,,https://doi.org/10.7287/peerj.preprints.26662,False,,,True,green,https://europepmc.org/articles/pmc6025149?pdf=...
44684,https://openalex.org/W4246017557,first,https://openalex.org/A2270236954,Rodney T. Richardson,https://orcid.org/0000-0002-4443-1705,"Department of Entomology, Ohio State Universit...",https://openalex.org/I52357470,The Ohio State University,https://ror.org/00rs6vg23,US,...,,,,https://doi.org/10.7287/peerj.preprints.26662v1,False,,,True,green,https://europepmc.org/articles/pmc6025149?pdf=...
44685,https://openalex.org/W4246017557,middle,https://openalex.org/A79282783,Johan Bengtsson-Palme,https://orcid.org/0000-0002-6528-3158,"Department of Infectious Diseases, Institute o...",https://openalex.org/I881427289,University of Gothenburg,https://ror.org/01tm6cn81,SE,...,,,,https://doi.org/10.7287/peerj.preprints.26662v1,False,,,True,green,https://europepmc.org/articles/pmc6025149?pdf=...
44686,https://openalex.org/W4246017557,middle,https://openalex.org/A2237159663,Mary M. Gardiner,https://orcid.org/0000-0003-1796-6146,"Department of Entomology, Ohio State Universit...",https://openalex.org/I52357470,The Ohio State University,https://ror.org/00rs6vg23,US,...,,,,https://doi.org/10.7287/peerj.preprints.26662v1,False,,,True,green,https://europepmc.org/articles/pmc6025149?pdf=...


In [63]:
only_eu_insect_authors = get_european_authors(eu_insect_authors, pan_europe=False)
only_eu_insect_authors

Unnamed: 0,Index,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,0,https://openalex.org/W2075105050,first,https://openalex.org/A2634481662,Adam Brunke,https://orcid.org/0000-0003-1158-936X,Zoological Museum (Natural History Museum of D...,https://openalex.org/I4210110903,Natural History Museum,https://ror.org/0166x0j30,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/2702/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/2702/downl...
1,6,https://openalex.org/W2074050863,first,https://openalex.org/A58389920,Helena Shaverdo,https://orcid.org/0000-0001-5034-7342,"Naturhistorisches Museum, Burgring 7, A-1010 V...",https://openalex.org/I1279955124,Natural History Museum Vienna,https://ror.org/01tv5y993,...,ZooKeys,Pensoft Publishers,journal,https://zookeys.pensoft.net/article/3440/downl...,True,publishedVersion,cc-by,True,gold,https://zookeys.pensoft.net/article/3440/downl...
2,10,https://openalex.org/W2124627356,first,https://openalex.org/A1616833297,Karol Szawaryn,https://orcid.org/0000-0002-9329-4268,"Museum and Institute of Zoology, Polish Academ...",https://openalex.org/I2802889272,Museum and Institute of Zoology,https://ror.org/00r9k8q20,...,Annales Zoologici,Polish Academy of Sciences,journal,,False,,,False,closed,
3,11,https://openalex.org/W2470267224,first,https://openalex.org/A2309831076,David Peris,https://orcid.org/0000-0001-9912-8802,Universitat de Barcelona,https://openalex.org/I71999127,University of Barcelona,https://ror.org/021018s57,...,Journal of Paleontology,Paleontological Society,journal,,False,,,False,closed,
4,12,https://openalex.org/W2470267224,last,https://openalex.org/A2225365894,Jiří Háva,,Czech University of Life Sciences Kamýcká 1176,https://openalex.org/I205984670,Czech University of Life Sciences Prague,https://ror.org/0415vcw02,...,Journal of Paleontology,Paleontological Society,journal,,False,,,False,closed,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25255,44674,https://openalex.org/W3000603933,middle,https://openalex.org/A1210563886,Ivona Horká,https://orcid.org/0000-0002-8942-9481,Department of Biology and Ecology and Institut...,https://openalex.org/I187293425,University of Ostrava,https://ror.org/00pyqav47,...,Insects,MDPI,journal,https://www.mdpi.com/2075-4450/11/1/51/pdf?ver...,True,publishedVersion,cc-by,True,gold,https://www.mdpi.com/2075-4450/11/1/51/pdf?ver...
25256,44675,https://openalex.org/W3000603933,last,https://openalex.org/A1908576969,Robin Kundrata,https://orcid.org/0000-0001-9397-1030,"Department of Zoology, Faculty of Science, Pal...",https://openalex.org/I70703428,"Palacký University, Olomouc",https://ror.org/04qxnmv42,...,Insects,MDPI,journal,https://www.mdpi.com/2075-4450/11/1/51/pdf?ver...,True,publishedVersion,cc-by,True,gold,https://www.mdpi.com/2075-4450/11/1/51/pdf?ver...
25257,44677,https://openalex.org/W2792039220,middle,https://openalex.org/A79282783,Johan Bengtsson-Palme,https://orcid.org/0000-0002-6528-3158,"Department of Infectious Diseases, Institute o...",https://openalex.org/I881427289,University of Gothenburg,https://ror.org/01tm6cn81,...,PeerJ,"PeerJ, Inc.",journal,https://doi.org/10.7717/peerj.5126,True,publishedVersion,cc-by,True,gold,https://doi.org/10.7717/peerj.5126
25258,44681,https://openalex.org/W4243684135,middle,https://openalex.org/A79282783,Johan Bengtsson-Palme,https://orcid.org/0000-0002-6528-3158,"Department of Infectious Diseases, Institute o...",https://openalex.org/I881427289,University of Gothenburg,https://ror.org/01tm6cn81,...,,,,https://doi.org/10.7287/peerj.preprints.26662,False,,,True,green,https://europepmc.org/articles/pmc6025149?pdf=...


In [64]:
eu_single_insect_authors = get_single_authors(only_eu_insect_authors)
eu_single_insect_authors

Unnamed: 0,Index,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,...,host_display_name,publisher,host_type,host_url,is_host_oa,host_version,host_license,is_oa,oa_status,oa_url
0,42085,https://openalex.org/W2023336202,first,https://openalex.org/A2887022682,Ivan Juric,,Research Institute of Organic Agriculture (FiB...,https://openalex.org/I79093821,Research Institute of Organic Agriculture,https://ror.org/03jmahf97,...,Biocontrol,Springer Science+Business Media,journal,,False,,,True,green,http://doc.rero.ch/record/330962/files/10526_2...
1,40301,https://openalex.org/W1945992140,first,https://openalex.org/A2494006551,Pietro Lo Cascio,,"Nesos - Island Biodiversity Research, Lipari",https://openalex.org/I1295562517,Naturalis Biodiversity Center,https://ror.org/0566bfb96,...,Fragmenta entomologica,PAGEPress (Italy),journal,https://doi.org/10.4081/fe.2014.76,True,publishedVersion,cc-by-nc,True,gold,https://doi.org/10.4081/fe.2014.76
2,19622,https://openalex.org/W2338054385,middle,https://openalex.org/A2290164705,Andrea Carolina Wanumen,,"Departamento de Protección de Cultivos, Escuel...",https://openalex.org/I88060688,Technical University of Madrid,https://ror.org/03n6nwv02,...,Insects,MDPI,journal,https://www.mdpi.com/2075-4450/7/2/15/pdf,True,publishedVersion,cc-by,True,gold,https://www.mdpi.com/2075-4450/7/2/15/pdf?vers...
6,2532,https://openalex.org/W2563782945,middle,https://openalex.org/A2286190915,Julio Ferrer,,"2Departement of Zoology, Swedish Museum of Nat...",https://openalex.org/I2801711128,Swedish Museum of Natural History,https://ror.org/05k323c76,...,Annales Zoologici,Polish Academy of Sciences,journal,,False,,,False,closed,
7,28179,https://openalex.org/W2101470019,middle,https://openalex.org/A2558964123,Magdalena Witek,https://orcid.org/0000-0002-6172-8981,"Department of Animal and Human Biology, Univer...",https://openalex.org/I55143463,University of Turin,https://ror.org/048tbm396,...,Ecological Entomology,Royal Entomological Society,journal,,False,,,False,closed,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12501,16711,https://openalex.org/W2120745015,middle,https://openalex.org/A1402554243,Örjan Östman,https://orcid.org/0000-0002-1930-0148,Evolutionary Biology Centre. Uppsala University,https://openalex.org/I123387679,Uppsala University,https://ror.org/048a87296,...,Bulletin of Entomological Research,Cambridge University Press,journal,,False,,,False,closed,
12510,17020,https://openalex.org/W2275215172,last,https://openalex.org/A2132235658,Andrzej Wolski,,"Department of Biosystematics, Opole University...",https://openalex.org/I170583851,Opole University,https://ror.org/04gbpnx96,...,Zootaxa,Q15088586,journal,,False,,,False,closed,
12511,5413,https://openalex.org/W2925408631,middle,https://openalex.org/A1543142765,Szymon Chowański,https://orcid.org/0000-0002-5667-1781,Department of Animal Physiology and Developmen...,https://openalex.org/I59411706,Adam Mickiewicz University in Poznań,https://ror.org/04g6bbq64,...,Frontiers in Physiology,Frontiers Media,journal,https://www.frontiersin.org/articles/10.3389/f...,True,publishedVersion,cc-by,True,gold,https://www.frontiersin.org/articles/10.3389/f...
12512,33360,https://openalex.org/W2918328222,last,https://openalex.org/A2784830561,S. V. Kovalev,,"Parkovaya str. 38, Dolgoprudnyi",https://openalex.org/I88459447,Novay,https://ror.org/00266dp40,...,Ukrainian Journal of Ecology,Alex Matsyura Publishing,journal,http://ojs.mdpu.org.ua/index.php/biol/article/...,False,,,False,,


In [66]:
only_eu_insect_authors.to_pickle("./data/EU27_all_authors_insect_articles_RLIT_method.pkl")
eu_single_insect_authors.to_pickle("./data/EU27_authors_insect_articles_RLIT_method_no_duplicates.pkl")