# 1. Using OpenAlex to find taxonomists

## 1.4. Preprocessing RLIT data into author data

Previously, we found a list of articles of taxonomic interest. Ultimately, we are interested in the authors, whom we assume are taxonomists or at least have relevant expertise about the taxon studied in the paper. We extract the information of the authors from the OpenAlex article data here. 

In [1]:
import pandas as pd
import pickle
import openalex

In [2]:
insect_articles = pd.read_pickle("./data/RLIT_method_openalex_all_insect_articles.pkl")
insect_articles = openalex.flatten_works(insect_articles)
insect_articles

Unnamed: 0,id,doi,title,display_name,relevance_score,publication_year,publication_date,ids,language,primary_location,...,source_issn,source_host_organization,source_host_organization_name,source_host_organization_lineage,source_host_organization_lineage_names,source_type,is_oa,oa_status,oa_url,any_repository_has_fulltext
0,https://openalex.org/W2075105050,https://doi.org/10.3897/zookeys.186.2947,New species and distributional records of Aleo...,New species and distributional records of Aleo...,187.254730,2012,2012-04-26,{'openalex': 'https://openalex.org/W2075105050...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://zookeys.pensoft.net/article/2702/downl...,True
0,https://openalex.org/W2615228164,https://doi.org/10.3897/zookeys.675.12024,A new species of Rhodnius from Brazil (Hemipte...,A new species of Rhodnius from Brazil (Hemipte...,284.476440,2017,2017-05-18,{'openalex': 'https://openalex.org/W2615228164...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://zookeys.pensoft.net/article/2702/downl...,True
0,https://openalex.org/W2188404336,https://doi.org/10.11646/zootaxa.2740.1.1,New species of and taxonomic notes on Anastrep...,New species of and taxonomic notes on Anastrep...,230.836330,2011,2011-01-14,{'openalex': 'https://openalex.org/W2188404336...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://zookeys.pensoft.net/article/2702/downl...,True
0,https://openalex.org/W2039380629,https://doi.org/10.1371/journal.pone.0122407,Phylogenetic Molecular Species Delimitations U...,Phylogenetic Molecular Species Delimitations U...,241.927760,2015,2015-04-08,{'openalex': 'https://openalex.org/W2039380629...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://zookeys.pensoft.net/article/2702/downl...,True
0,https://openalex.org/W2092029632,https://doi.org/10.1007/s13127-012-0123-1,"Biogeography, ecology, acoustics and chromosom...","Biogeography, ecology, acoustics and chromosom...",174.887340,2013,2013-02-09,{'openalex': 'https://openalex.org/W2092029632...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://zookeys.pensoft.net/article/2702/downl...,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11688,https://openalex.org/W3106964018,https://doi.org/10.31396/biodiv.jour.2020.11.3...,On the footsteps of Théodore Monod: biogeograp...,On the footsteps of Théodore Monod: biogeograp...,,2020,2020-09-30,{'openalex': 'https://openalex.org/W3106964018...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,"[2039-0408, 2039-0394]",https://openalex.org/P4310317657,"Edizioni Danaus, Palermo",[https://openalex.org/P4310317657],"[Edizioni Danaus, Palermo]",journal,True,gold,https://doi.org/10.31396/biodiv.jour.2020.11.3...,False
11689,https://openalex.org/W2915290188,https://doi.org/10.1134/s0013873818080134,"A Review of the Leaf-Beetle Fauna (Coleoptera,...","A Review of the Leaf-Beetle Fauna (Coleoptera,...",41.247375,2018,2018-11-01,{'openalex': 'https://openalex.org/W2915290188...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"[1555-6689, 0013-8738]",https://openalex.org/P4310320267,Pleiades Publishing,[https://openalex.org/P4310320267],[Pleiades Publishing],journal,False,closed,,False
11690,https://openalex.org/W2155430078,https://doi.org/10.4038/tapro.v4i2.5058,Bolboceratine scarabs of genera Bolbohamatum K...,Bolboceratine scarabs of genera Bolbohamatum K...,21.038770,2012,2012-12-25,{'openalex': 'https://openalex.org/W2155430078...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,[1800-427X],,,[],[],journal,True,hybrid,https://doi.org/10.4038/tapro.v4i2.5058,False
11691,https://openalex.org/W2923711627,https://doi.org/10.33800/nc.v0i5.141,Corrigenda a los escarabajos (Coleoptera) en l...,Corrigenda a los escarabajos (Coleoptera) en l...,,2012,2012-07-01,{'openalex': 'https://openalex.org/W2923711627...,es,"{'is_oa': True, 'landing_page_url': 'https://d...",...,"[2071-9841, 2079-0139]",,,[],[],journal,True,gold,http://novitatescaribaea.do/index.php/novitate...,False


In [4]:
authors_insects = openalex.get_authors(insect_articles)
authors_insects

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,source_issn,source_host_organization,source_host_organization_name,source_host_organization_lineage,source_host_organization_lineage_names,source_type,is_oa,oa_status,oa_url,any_repository_has_fulltext
0,https://openalex.org/W2075105050,first,https://openalex.org/A4355311813,Adam Brunke,,,,,,,...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://zookeys.pensoft.net/article/2702/downl...,True
1,https://openalex.org/W2075105050,middle,https://openalex.org/A4353575558,Jan Klimaszewski,,,,,,,...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://zookeys.pensoft.net/article/2702/downl...,True
2,https://openalex.org/W2075105050,middle,https://openalex.org/A2592732627,Julie-Anne Dorval,,,,,,,...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://zookeys.pensoft.net/article/2702/downl...,True
3,https://openalex.org/W2075105050,middle,https://openalex.org/A2021629399,Caroline Bourdon,,,,,,,...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://zookeys.pensoft.net/article/2702/downl...,True
4,https://openalex.org/W2075105050,middle,https://openalex.org/A4334848636,S. M. Paiero,,,,,,,...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://zookeys.pensoft.net/article/2702/downl...,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209929,https://openalex.org/W2155430078,first,https://openalex.org/A4357148897,Kailash Chandra,,"Zoological Survey of India, New Alipore, Kolka...",https://openalex.org/I1338006420,Zoological Survey of India,https://ror.org/00h6p6a20,IN,...,[1800-427X],,,[],[],journal,True,hybrid,https://doi.org/10.4038/tapro.v4i2.5058,False
209930,https://openalex.org/W2155430078,last,https://openalex.org/A2128748613,Devanshu Gupta,https://orcid.org/0000-0001-9188-4689,"Zoological Survey of India, Jabalpur 482002, M...",https://openalex.org/I1338006420,Zoological Survey of India,https://ror.org/00h6p6a20,IN,...,[1800-427X],,,[],[],journal,True,hybrid,https://doi.org/10.4038/tapro.v4i2.5058,False
209931,https://openalex.org/W2923711627,first,https://openalex.org/A4342256765,Daniel E. Perez-Gelabert,,United States National Museum of Natural Histo...,https://openalex.org/I1341618623,National Museum of Natural History,https://ror.org/00cz47042,US,...,"[2071-9841, 2079-0139]",,,[],[],journal,True,gold,http://novitatescaribaea.do/index.php/novitate...,False
209932,https://openalex.org/W2785376982,first,https://openalex.org/A4341999956,Anderson Arenas-Clavijo,,"Sección de Entomología,#R##N#Departamento de B...",https://openalex.org/I91732220,University of Valle,https://ror.org/00jb9vg53,CO,...,"[0124-5376, 2539-200X]",https://openalex.org/P4310318943,Alexander von Humboldt Biological Resources Re...,[https://openalex.org/P4310318943],[Alexander von Humboldt Biological Resources R...,journal,True,gold,https://doi.org/10.21068/c2017.v18n02a19,False


In [5]:
singles_insects = openalex.get_single_authors(authors_insects)
singles_insects

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,source_issn,source_host_organization,source_host_organization_name,source_host_organization_lineage,source_host_organization_lineage_names,source_type,is_oa,oa_status,oa_url,any_repository_has_fulltext
0,https://openalex.org/W2753618715,last,https://openalex.org/A2172427898,H. H. Neunzig,,"(HHN) Department of Entomology, North Carolina...",https://openalex.org/I137902535,North Carolina State University,https://ror.org/04tj63d06,US,...,,,,,,,False,closed,,False
1,https://openalex.org/W2805527414,first,https://openalex.org/A4334900143,Seyyed Javad Seyyed-Zadeh,,"Social Determinants of Health, Research Centre...",https://openalex.org/I38476204,Urmia University,https://ror.org/032fk0x53,IR,...,"[1175-5334, 1175-5326]",https://openalex.org/P4310321855,Q15088586,[https://openalex.org/P4310321855],[Q15088586],journal,False,closed,,False
2,https://openalex.org/W2946332939,middle,https://openalex.org/A2945382973,Francisco Ageu de Sousa Nobrega,,Universidade da Integração Internacional da Lu...,https://openalex.org/I3130695932,University for International Integration of th...,https://ror.org/02p928v94,BR,...,"[1175-5334, 1175-5326]",https://openalex.org/P4310321855,Q15088586,[https://openalex.org/P4310321855],[Q15088586],journal,False,closed,,False
3,https://openalex.org/W2760700099,middle,https://openalex.org/A4354669980,Gábor Csorba,,"Dept. of Zoology, Hungarian Natural History Mu...",https://openalex.org/I2799841867,Hungarian Natural History Museum,https://ror.org/04y1zat75,HU,...,"[0097-4463, 1943-6300]",,,[],[],journal,False,closed,,False
4,https://openalex.org/W2149098213,first,https://openalex.org/A2972734906,Alejandro Salinas-Castro,,"Col. Unidad del Bosque, Xalapa, México",,"Col. Unidad del Bosque, Xalapa, México",,,...,[2624-2834],https://openalex.org/P4310321855,Q15088586,[https://openalex.org/P4310321855],[Q15088586],journal,False,closed,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107591,https://openalex.org/W2013095442,first,https://openalex.org/A2911241944,Yanyun Duan,https://orcid.org/0000-0002-0520-4518,Chinese Academy of Agricultural Sciences,https://openalex.org/I4210138501,Chinese Academy of Agricultural Sciences,https://ror.org/0313jb750,CN,...,,,,[],[],journal,False,closed,,False
107592,https://openalex.org/W3197725411,middle,https://openalex.org/A3196655860,Дина Елисовецкая,,,,,,,...,"[1175-5334, 1175-5326]",https://openalex.org/P4310321855,Q15088586,[https://openalex.org/P4310321855],[Q15088586],journal,False,closed,,False
107593,https://openalex.org/W2746723884,first,https://openalex.org/A2278503789,Marika Tuiwawa,,,,,,,...,,,,,,,False,closed,,False
107594,https://openalex.org/W2105654530,last,https://openalex.org/A4353514826,Peter W. Mason,,"Agriculture and Agri-Food Canada,Research Cent...",https://openalex.org/I1331897569,Agriculture and Agri-Food Canada,https://ror.org/051dzs374,CA,...,"[1175-5334, 1175-5326]",https://openalex.org/P4310321855,Q15088586,[https://openalex.org/P4310321855],[Q15088586],journal,False,closed,,False


In [6]:
authors_insects.to_pickle("./data/all_authors_insect_articles_RLIT_method.pkl")
singles_insects.to_pickle("./data/authors_insect_articles_RLIT_method_no_duplicates.pkl")

In [8]:
eu_insect_articles = pd.read_pickle("./data/RLIT_method_openalex_EU27_insect_articles.pkl")
eu_insect_articles = openalex.flatten_works(eu_insect_articles)
eu_insect_articles

Unnamed: 0,id,doi,title,display_name,relevance_score,publication_year,publication_date,ids,language,primary_location,...,source_issn,source_host_organization,source_host_organization_name,source_host_organization_lineage,source_host_organization_lineage_names,source_type,is_oa,oa_status,oa_url,any_repository_has_fulltext
0,https://openalex.org/W2074050863,https://doi.org/10.3897/zookeys.250.3715,Introduction of the Exocelina ekari-group with...,Introduction of the Exocelina ekari-group with...,134.51106,2012,2012-12-13,{'openalex': 'https://openalex.org/W2074050863...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://doi.org/10.3897/zookeys.250.3715,True
1,https://openalex.org/W2124627356,https://doi.org/10.3161/000345411x622525,A New Species ofHenosepilachnaLi (Coleoptera: ...,A New Species ofHenosepilachnaLi (Coleoptera: ...,123.24875,2011,2011-12-01,{'openalex': 'https://openalex.org/W2124627356...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"[1734-1833, 0003-4541]",https://openalex.org/P4310317485,Polish Academy of Sciences,[https://openalex.org/P4310317485],[Polish Academy of Sciences],journal,False,closed,,False
2,https://openalex.org/W2470267224,https://doi.org/10.1017/jpa.2016.51,New species from Late Cretaceous New Jersey am...,New species from Late Cretaceous New Jersey am...,120.11381,2016,2016-05-01,{'openalex': 'https://openalex.org/W2470267224...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"[1937-2337, 0022-3360]",https://openalex.org/P4310315979,Paleontological Society,[https://openalex.org/P4310315979],[Paleontological Society],journal,False,closed,,False
3,https://openalex.org/W2595316164,https://doi.org/10.3161/00034541anz2017.67.1.009,"Brochocoleus Zhiyuani, a New Species of Brocho...","Brochocoleus Zhiyuani, a New Species of Brocho...",105.57870,2017,2017-03-16,{'openalex': 'https://openalex.org/W2595316164...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"[1734-1833, 0003-4541]",https://openalex.org/P4310317485,Polish Academy of Sciences,[https://openalex.org/P4310317485],[Polish Academy of Sciences],journal,False,closed,,False
4,https://openalex.org/W1981080163,https://doi.org/10.11646/zootaxa.3755.5.5,<strong>A review of Drilini (Coleoptera: Elate...,<strong>A review of Drilini (Coleoptera: Elate...,102.48853,2014,2014-01-24,{'openalex': 'https://openalex.org/W1981080163...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"[1175-5334, 1175-5326]",https://openalex.org/P4310321855,Q15088586,[https://openalex.org/P4310321855],[Q15088586],journal,False,closed,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8155,https://openalex.org/W3000603933,https://doi.org/10.3390/insects11010051,Molecular Phylogeny and Infraordinal Classific...,Molecular Phylogeny and Infraordinal Classific...,119.16625,2020,2020-01-12,{'openalex': 'https://openalex.org/W3000603933...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,[2075-4450],https://openalex.org/P4310310987,MDPI,[https://openalex.org/P4310310987],[MDPI],journal,True,gold,https://www.mdpi.com/2075-4450/11/1/51/pdf?ver...,True
8156,https://openalex.org/W2300829245,https://doi.org/10.1146/annurev-ento-010715-02...,Structure and Evolution of Insect Sperm: New I...,Structure and Evolution of Insect Sperm: New I...,359.53284,2016,2016-03-16,{'openalex': 'https://openalex.org/W2300829245...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"[0066-4170, 1545-4487]",https://openalex.org/P4310320373,Annual Reviews,[https://openalex.org/P4310320373],[Annual Reviews],journal,False,closed,,False
8157,https://openalex.org/W2792039220,https://doi.org/10.7717/peerj.5126,A reference cytochrome c oxidase subunit I dat...,A reference cytochrome c oxidase subunit I dat...,129.17558,2018,2018-06-26,{'openalex': 'https://openalex.org/W2792039220...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,[2167-8359],https://openalex.org/P4310320104,"PeerJ, Inc.",[https://openalex.org/P4310320104],"[PeerJ, Inc.]",journal,True,gold,https://peerj.com/articles/5126.pdf,True
8158,https://openalex.org/W4243684135,https://doi.org/10.7287/peerj.preprints.26662,A reference cytochrome c oxidase subunit I dat...,A reference cytochrome c oxidase subunit I dat...,,2018,2018-03-12,{'openalex': 'https://openalex.org/W4243684135...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,,,,,,,True,green,https://doi.org/10.7287/peerj.preprints.26662,True


In [10]:
eu_insect_authors = openalex.get_authors(eu_insect_articles)
eu_insect_authors

Unnamed: 0,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,inst_country_code,...,source_issn,source_host_organization,source_host_organization_name,source_host_organization_lineage,source_host_organization_lineage_names,source_type,is_oa,oa_status,oa_url,any_repository_has_fulltext
0,https://openalex.org/W2074050863,first,https://openalex.org/A4357020418,Helena Shaverdo,,"Naturhistorisches Museum, Burgring 7, A-1010 V...",https://openalex.org/I1279955124,Natural History Museum Vienna,https://ror.org/01tv5y993,AT,...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://doi.org/10.3897/zookeys.250.3715,True
1,https://openalex.org/W2074050863,middle,https://openalex.org/A4347934641,Suriani Surbakti,,,,,,,...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://doi.org/10.3897/zookeys.250.3715,True
2,https://openalex.org/W2074050863,middle,https://openalex.org/A2070216457,Lars Hendrich,https://orcid.org/0000-0001-8366-0749,,,,,,...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://doi.org/10.3897/zookeys.250.3715,True
3,https://openalex.org/W2074050863,last,https://openalex.org/A4354063702,Michael Balke,,,,,,,...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://doi.org/10.3897/zookeys.250.3715,True
4,https://openalex.org/W2124627356,first,https://openalex.org/A1616833297,Karol Szawaryn,https://orcid.org/0000-0002-9329-4268,"Museum and Institute of Zoology, Polish Academ...",https://openalex.org/I2802889272,Museum and Institute of Zoology,https://ror.org/00r9k8q20,PL,...,"[1734-1833, 0003-4541]",https://openalex.org/P4310317485,Polish Academy of Sciences,[https://openalex.org/P4310317485],[Polish Academy of Sciences],journal,False,closed,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50065,https://openalex.org/W4243684135,last,https://openalex.org/A4359872967,Reed F. Johnson,,"Department of Entomology, Ohio State Universit...",,"Department of Entomology, Ohio State Universit...",,,...,,,,,,,True,green,https://doi.org/10.7287/peerj.preprints.26662,True
50066,https://openalex.org/W4246017557,first,https://openalex.org/A4334947916,Rodney T. Richardson,,"Department of Entomology, Ohio State Universit...",https://openalex.org/I52357470,The Ohio State University,https://ror.org/00rs6vg23,US,...,,,,,,,True,green,https://doi.org/10.7287/peerj.preprints.26662v1,True
50067,https://openalex.org/W4246017557,middle,https://openalex.org/A79282783,Johan Bengtsson-Palme,https://orcid.org/0000-0002-6528-3158,"Department of Infectious Diseases, Institute o...",https://openalex.org/I881427289,University of Gothenburg,https://ror.org/01tm6cn81,SE,...,,,,,,,True,green,https://doi.org/10.7287/peerj.preprints.26662v1,True
50068,https://openalex.org/W4246017557,middle,https://openalex.org/A4337018164,Mary M. Gardiner,,"Department of Entomology, Ohio State Universit...",https://openalex.org/I52357470,The Ohio State University,https://ror.org/00rs6vg23,US,...,,,,,,,True,green,https://doi.org/10.7287/peerj.preprints.26662v1,True


In [11]:
only_eu_insect_authors = openalex.get_eu_authors(eu_insect_authors, pan_europe=False)
only_eu_insect_authors

Unnamed: 0,Index,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,...,source_issn,source_host_organization,source_host_organization_name,source_host_organization_lineage,source_host_organization_lineage_names,source_type,is_oa,oa_status,oa_url,any_repository_has_fulltext
0,0,https://openalex.org/W2074050863,first,https://openalex.org/A4357020418,Helena Shaverdo,,"Naturhistorisches Museum, Burgring 7, A-1010 V...",https://openalex.org/I1279955124,Natural History Museum Vienna,https://ror.org/01tv5y993,...,"[1313-2970, 1313-2989]",https://openalex.org/P4310321056,Pensoft Publishers,[https://openalex.org/P4310321056],[Pensoft Publishers],journal,True,gold,https://doi.org/10.3897/zookeys.250.3715,True
1,4,https://openalex.org/W2124627356,first,https://openalex.org/A1616833297,Karol Szawaryn,https://orcid.org/0000-0002-9329-4268,"Museum and Institute of Zoology, Polish Academ...",https://openalex.org/I2802889272,Museum and Institute of Zoology,https://ror.org/00r9k8q20,...,"[1734-1833, 0003-4541]",https://openalex.org/P4310317485,Polish Academy of Sciences,[https://openalex.org/P4310317485],[Polish Academy of Sciences],journal,False,closed,,False
2,5,https://openalex.org/W2470267224,first,https://openalex.org/A4357506019,David Peris,,Universitat de Barcelona,https://openalex.org/I71999127,University of Barcelona,https://ror.org/021018s57,...,"[1937-2337, 0022-3360]",https://openalex.org/P4310315979,Paleontological Society,[https://openalex.org/P4310315979],[Paleontological Society],journal,False,closed,,False
3,6,https://openalex.org/W2470267224,last,https://openalex.org/A4354758409,Jiří Háva,,Czech University of Life Sciences Kamýcká 1176,https://openalex.org/I205984670,Czech University of Life Sciences Prague,https://ror.org/0415vcw02,...,"[1937-2337, 0022-3360]",https://openalex.org/P4310315979,Paleontological Society,[https://openalex.org/P4310315979],[Paleontological Society],journal,False,closed,,False
4,8,https://openalex.org/W2595316164,middle,https://openalex.org/A4355414633,Jingjing Tan,,4State Key Laboratory of Palaeobiology and Str...,https://openalex.org/I4210141937,"Czech Academy of Sciences, Institute of Geology",https://ror.org/04wh80b80,...,"[1734-1833, 0003-4541]",https://openalex.org/P4310317485,Polish Academy of Sciences,[https://openalex.org/P4310317485],[Polish Academy of Sciences],journal,False,closed,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28712,50056,https://openalex.org/W3000603933,middle,https://openalex.org/A4355925457,Ivona Horká,,Department of Biology and Ecology & Institute ...,https://openalex.org/I187293425,University of Ostrava,https://ror.org/00pyqav47,...,[2075-4450],https://openalex.org/P4310310987,MDPI,[https://openalex.org/P4310310987],[MDPI],journal,True,gold,https://www.mdpi.com/2075-4450/11/1/51/pdf?ver...,True
28713,50057,https://openalex.org/W3000603933,last,https://openalex.org/A1908576969,Robin Kundrata,https://orcid.org/0000-0001-9397-1030,"Department of Zoology, Faculty of Science, Pal...",https://openalex.org/I70703428,"Palacký University, Olomouc",https://ror.org/04qxnmv42,...,[2075-4450],https://openalex.org/P4310310987,MDPI,[https://openalex.org/P4310310987],[MDPI],journal,True,gold,https://www.mdpi.com/2075-4450/11/1/51/pdf?ver...,True
28714,50059,https://openalex.org/W2792039220,middle,https://openalex.org/A79282783,Johan Bengtsson-Palme,https://orcid.org/0000-0002-6528-3158,"Department of Infectious Diseases, Institute o...",https://openalex.org/I881427289,University of Gothenburg,https://ror.org/01tm6cn81,...,[2167-8359],https://openalex.org/P4310320104,"PeerJ, Inc.",[https://openalex.org/P4310320104],"[PeerJ, Inc.]",journal,True,gold,https://peerj.com/articles/5126.pdf,True
28715,50063,https://openalex.org/W4243684135,middle,https://openalex.org/A79282783,Johan Bengtsson-Palme,https://orcid.org/0000-0002-6528-3158,"Department of Infectious Diseases, Institute o...",https://openalex.org/I881427289,University of Gothenburg,https://ror.org/01tm6cn81,...,,,,,,,True,green,https://doi.org/10.7287/peerj.preprints.26662,True


In [12]:
eu_single_insect_authors = openalex.get_single_authors(only_eu_insect_authors)
eu_single_insect_authors

Unnamed: 0,Index,article_id,author_position,author_id,author_display_name,orcid,raw_affiliation_string,inst_id,inst_display_name,ror,...,source_issn,source_host_organization,source_host_organization_name,source_host_organization_lineage,source_host_organization_lineage_names,source_type,is_oa,oa_status,oa_url,any_repository_has_fulltext
0,47181,https://openalex.org/W2028420837,first,https://openalex.org/A4356756587,Albena Lapeva-Gjonova,,"Department of Zoology and Anthropology, Facult...",https://openalex.org/I58918642,Sofia University,https://ror.org/02jv3k292,...,"[1687-7438, 0033-2615]",https://openalex.org/P4310315927,Cambridge Entomological Club,[https://openalex.org/P4310315927],[Cambridge Entomological Club],journal,True,gold,https://doi.org/10.1155/2012/391525,False
1,33044,https://openalex.org/W2990555863,middle,https://openalex.org/A2990811786,Dimitra Pangea,,"Department of Genetics, Development and Molecu...",https://openalex.org/I21370196,Aristotle University of Thessaloniki,https://ror.org/02j61yw88,...,[2075-4450],https://openalex.org/P4310310987,MDPI,[https://openalex.org/P4310310987],[MDPI],journal,True,gold,https://www.mdpi.com/2075-4450/10/12/429/pdf?v...,True
2,20412,https://openalex.org/W2760700099,middle,https://openalex.org/A4354669980,Gábor Csorba,,"Dept. of Zoology, Hungarian Natural History Mu...",https://openalex.org/I2799841867,Hungarian Natural History Museum,https://ror.org/04y1zat75,...,[1756-3305],https://openalex.org/P4310319900,Springer Science+Business Media,"[https://openalex.org/P4310319965, https://ope...","[Springer Nature, Springer Science+Business Me...",journal,True,gold,https://doi.org/10.1186/s13071-017-2376-1,True
3,41940,https://openalex.org/W2809102417,middle,https://openalex.org/A4351099800,Simone Cardoni,,Dipartimento di Scienze Ecologiche e Biologich...,https://openalex.org/I138938424,Tuscia University,https://ror.org/03svwq685,...,"[2475-0263, 2475-0255]",https://openalex.org/P4310320547,Taylor & Francis,[https://openalex.org/P4310320547],[Taylor & Francis],journal,True,gold,https://doi.org/10.1080/24750263.2018.1480732,True
4,21042,https://openalex.org/W2950874535,middle,https://openalex.org/A4360003498,Maria José Santos,,"Institut de Génomique Fonctionnelle de Lyon, U...",https://openalex.org/I4210125167,Institut de Génomique Fonctionnelle de Lyon,https://ror.org/038fcbc74,...,"[1474-7596, 1474-760X]",https://openalex.org/P4310320256,BioMed Central,"[https://openalex.org/P4310319965, https://ope...","[Springer Nature, BioMed Central]",journal,True,gold,https://genomebiology.biomedcentral.com/track/...,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16067,30547,https://openalex.org/W3042182083,middle,https://openalex.org/A4353463302,Alexandros Iosifidis,,Department of Engineering Aarhus University Ro...,https://openalex.org/I204337017,Aarhus University,https://ror.org/01aj84f44,...,[2041-210X],https://openalex.org/P4310320503,Wiley-Blackwell,"[https://openalex.org/P4310320503, https://ope...","[Wiley-Blackwell, Wiley]",journal,True,hybrid,https://onlinelibrary.wiley.com/doi/pdfdirect/...,True
16068,48089,https://openalex.org/W2799466714,middle,https://openalex.org/A4354863000,Pilar De la Rúa,,Departamento de Zoología y Antropología Física...,https://openalex.org/I80180929,University of Murcia,https://ror.org/03p3aeb86,...,"[0022-2011, 1096-0805]",https://openalex.org/P4310320990,Elsevier BV,[https://openalex.org/P4310320990],[Elsevier BV],journal,False,closed,,False
16069,39871,https://openalex.org/W2769827266,middle,https://openalex.org/A2089758524,Stefan Farkas,,Faculty of Agricultural and Environmental Scie...,https://openalex.org/I4210117141,University of Kapsovar,https://ror.org/02ff27430,...,[2348-0394],https://openalex.org/P4310320855,Sciencedomain International,[https://openalex.org/P4310320855],[Sciencedomain International],journal,False,closed,,False
16070,2634,https://openalex.org/W2081467341,middle,https://openalex.org/A4344797922,Kris M. Hart,,Dublin City University,https://openalex.org/I42934936,Dublin City University,https://ror.org/04a1a1e81,...,"[0032-0633, 1873-5088]",https://openalex.org/P4310320990,Elsevier BV,[https://openalex.org/P4310320990],[Elsevier BV],journal,False,closed,,False


In [13]:
only_eu_insect_authors.to_pickle("./data/EU27_all_authors_insect_articles_RLIT_method.pkl")
eu_single_insect_authors.to_pickle("./data/EU27_authors_insect_articles_RLIT_method_no_duplicates.pkl")