# 1. Using OpenAlex to find taxonomists

## 1.3. Filter journal articles for accuracy

Not all articles in taxonomic journals are strictly "taxonomical" (describing new species). Some are about methodology or further research into specific organisms. To remedy this, we searched our list of articles for terms similar to the WoS comparison search terms, adding "nov" and dropping the order name. 

In [1]:
import numpy as np
import pandas as pd
import pickle
import openalex

In [4]:
# filter all found articles
articles = pd.read_pickle("./data/articles_global_with_taxonomy_concept_in_journal.pkl")
filtered_articles = openalex.filter_keywords(articles)
filtered_articles

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,language,primary_location,type,...,grants,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated
0,https://openalex.org/W2097461610,,A systematic monograph of the Recent Pentastom...,A systematic monograph of the Recent Pentastom...,2013,2013-01-01,{'openalex': 'https://openalex.org/W2097461610...,en,"{'is_oa': False, 'landing_page_url': 'https://...",journal-article,...,[],[],"[https://openalex.org/W1451478857, https://ope...",https://api.openalex.org/works/W2097461610/ngrams,"{'We': [0], 'compile': [1], 'all': [2], 'publi...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2020, 'cited_by_count': 1}, {'year':...",2023-06-01T01:06:38.350874,2016-06-24,
1,https://openalex.org/W2098184878,,Mycale species (Porifera: Poecilosclerida) of ...,Mycale species (Porifera: Poecilosclerida) of ...,2014,2014-01-01,{'openalex': 'https://openalex.org/W2098184878...,en,"{'is_oa': False, 'landing_page_url': 'http://w...",journal-article,...,[],"[https://openalex.org/W5267670, https://openal...","[https://openalex.org/W1870150722, https://ope...",https://api.openalex.org/works/W2098184878/ngrams,"{'Based': [0], 'on': [1], 'collections': [2, 5...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2017, 'cited_by_count': 1}, {'year':...",2023-05-26T23:14:12.444095,2016-06-24,
2,https://openalex.org/W1841372523,,Dinotrema brevis spec. nov. (Hymenoptera: Brac...,Dinotrema brevis spec. nov. (Hymenoptera: Brac...,2014,2014-01-01,{'openalex': 'https://openalex.org/W1841372523...,en,"{'is_oa': False, 'landing_page_url': 'https://...",journal-article,...,[],"[https://openalex.org/W38046216, https://opena...","[https://openalex.org/W893905258, https://open...",https://api.openalex.org/works/W1841372523/ngrams,"{'A': [0, 17], 'new': [1], 'species': [2, 21],...",https://api.openalex.org/works?filter=cites:W1...,"[{'year': 2019, 'cited_by_count': 1}]",2023-05-27T02:13:06.320098,2016-06-24,
3,https://openalex.org/W813610620,,Sigalphus anjae spec. nov. (Hymenoptera: Braco...,Sigalphus anjae spec. nov. (Hymenoptera: Braco...,2014,2014-01-01,{'openalex': 'https://openalex.org/W813610620'...,en,"{'is_oa': False, 'landing_page_url': 'https://...",journal-article,...,[],"[https://openalex.org/W1537125554, https://ope...",[],https://api.openalex.org/works/W813610620/ngrams,"{'A': [0, 20], 'new': [1], 'species': [2, 25],...",https://api.openalex.org/works?filter=cites:W8...,"[{'year': 2016, 'cited_by_count': 1}]",2023-05-26T07:36:47.829660,2016-06-24,
4,https://openalex.org/W2581956133,,"Land snails of the islet of Misali, off Pemba ...","Land snails of the islet of Misali, off Pemba ...",2013,2013-01-01,{'openalex': 'https://openalex.org/W2581956133...,en,"{'is_oa': False, 'landing_page_url': 'https://...",journal-article,...,[],[],[https://openalex.org/W3159842438],https://api.openalex.org/works/W2581956133/ngrams,"{'A': [0], 'litter': [1], 'sample': [2], 'coll...",https://api.openalex.org/works?filter=cites:W2...,[],2023-05-27T01:55:15.761329,2017-02-03,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50603,https://openalex.org/W4316923273,https://doi.org/10.21684/0132-8077-2022-30-2-2...,REDESCRIPTION OF TYPHLODROMUS (ANTHOSEIUS) KUZ...,REDESCRIPTION OF TYPHLODROMUS (ANTHOSEIUS) KUZ...,2022,2022-01-01,{'openalex': 'https://openalex.org/W4316923273...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",journal-article,...,[{'funder': 'https://openalex.org/F4320324099'...,"[https://openalex.org/W1964529878, https://ope...","[https://openalex.org/W2075568141, https://ope...",https://api.openalex.org/works/W4316923273/ngrams,"{'Typhlodromus': [0], '(Anthoseius)': [1], 'ku...",https://api.openalex.org/works?filter=cites:W4...,[],2023-05-29T11:27:36.010166,2023-01-18,
50604,https://openalex.org/W4316923294,https://doi.org/10.21684/0132-8077-2022-30-2-1...,THE MITE GENUS HISTIOGASTER (ACARI: ACARIDAE) ...,THE MITE GENUS HISTIOGASTER (ACARI: ACARIDAE) ...,2022,2022-01-01,{'openalex': 'https://openalex.org/W4316923294...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",journal-article,...,[],"[https://openalex.org/W1550069160, https://ope...","[https://openalex.org/W1977168516, https://ope...",https://api.openalex.org/works/W4316923294/ngrams,"{'Two': [0], 'new': [1], 'species': [2, 61, 75...",https://api.openalex.org/works?filter=cites:W4...,[],2023-05-30T07:43:48.170462,2023-01-18,
50605,https://openalex.org/W4316923321,https://doi.org/10.21684/0132-8077-2022-30-2-2...,A NEW LARVAL SPECIES OF THE GENUS SMARIS (SMAR...,A NEW LARVAL SPECIES OF THE GENUS SMARIS (SMAR...,2022,2022-01-01,{'openalex': 'https://openalex.org/W4316923321...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",journal-article,...,[{'funder': 'https://openalex.org/F4320321091'...,"[https://openalex.org/W800259873, https://open...","[https://openalex.org/W570431877, https://open...",https://api.openalex.org/works/W4316923321/ngrams,"{'Here,': [0], 'we': [1], 'describe': [2], 'a'...",https://api.openalex.org/works?filter=cites:W4...,[],2023-05-31T13:50:18.657480,2023-01-18,
50606,https://openalex.org/W4316923341,https://doi.org/10.21684/0132-8077-2022-30-2-2...,FIRST RECORD OF ADAMYSTIDAE (ACARI: PROSTIGMAT...,FIRST RECORD OF ADAMYSTIDAE (ACARI: PROSTIGMAT...,2022,2022-01-01,{'openalex': 'https://openalex.org/W4316923341...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",journal-article,...,[],"[https://openalex.org/W1534787709, https://ope...","[https://openalex.org/W876471368, https://open...",https://api.openalex.org/works/W4316923341/ngrams,"{'The': [0], 'family': [1], 'Adamystidae': [2]...",https://api.openalex.org/works?filter=cites:W4...,[],2023-06-01T12:05:38.865862,2023-01-18,


In [5]:
filtered_articles.to_pickle("./data/keyword_filtered_articles_global_with_taxonomy_concept_in_journal.pkl")

In [8]:
# filter EU27 articles
eu_articles = pd.read_pickle("./data/articles_EU27_with_taxonomy_concept_in_journal.pkl")
filtered_eu_articles = openalex.filter_keywords(eu_articles)
filtered_eu_articles

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,language,primary_location,type,...,grants,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated
0,https://openalex.org/W2282635459,https://doi.org/10.1071/is15033,Integrative systematic studies on tardigrades ...,Integrative systematic studies on tardigrades ...,2016,2016-08-31,{'openalex': 'https://openalex.org/W2282635459...,en,"{'is_oa': False, 'landing_page_url': 'https://...",journal-article,...,[],"[https://openalex.org/W133481079, https://open...","[https://openalex.org/W1816690434, https://ope...",https://api.openalex.org/works/W2282635459/ngrams,"{'Tardigrades': [0], 'represent': [1], 'one': ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-05-29T00:22:56.109044,2016-06-24,
1,https://openalex.org/W1534942643,https://doi.org/10.1071/is13002,Morphological and molecular insights on Megalo...,Morphological and molecular insights on Megalo...,2013,2013-01-01,{'openalex': 'https://openalex.org/W1534942643...,en,"{'is_oa': False, 'landing_page_url': 'https://...",journal-article,...,[],"[https://openalex.org/W19893924, https://opena...","[https://openalex.org/W1534942643, https://ope...",https://api.openalex.org/works/W1534942643/ngrams,"{'The': [0, 73, 92], 'genus': [1], 'Megalothor...",https://api.openalex.org/works?filter=cites:W1...,"[{'year': 2023, 'cited_by_count': 4}, {'year':...",2023-06-10T05:55:09.259272,2016-06-24,
2,https://openalex.org/W2159262135,https://doi.org/10.1071/is13030,Two distinct evolutionary lineages of the Asta...,Two distinct evolutionary lineages of the Asta...,2014,2014-01-01,{'openalex': 'https://openalex.org/W2159262135...,en,"{'is_oa': False, 'landing_page_url': 'https://...",journal-article,...,[],"[https://openalex.org/W247704383, https://open...","[https://openalex.org/W1985896783, https://ope...",https://api.openalex.org/works/W2159262135/ngrams,"{'Narrow-clawed': [0], 'crayfish': [1, 13, 85]...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 2}, {'year':...",2023-06-07T02:46:48.359217,2016-06-24,
3,https://openalex.org/W2271608622,https://doi.org/10.1071/is15023,Molecular evidence for non-monophyly of the pi...,Molecular evidence for non-monophyly of the pi...,2016,2016-01-01,{'openalex': 'https://openalex.org/W2271608622...,en,"{'is_oa': False, 'landing_page_url': 'https://...",journal-article,...,[],"[https://openalex.org/W108157262, https://open...","[https://openalex.org/W2001794630, https://ope...",https://api.openalex.org/works/W2271608622/ngrams,"{'The': [0, 18, 285], 'crabs': [1, 305], 'of':...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-06-09T01:19:20.151699,2016-06-24,
4,https://openalex.org/W2209115793,https://doi.org/10.1071/is14019,Mitochondrial DNA analyses reveal widespread t...,Mitochondrial DNA analyses reveal widespread t...,2015,2015-01-01,{'openalex': 'https://openalex.org/W2209115793...,en,"{'is_oa': False, 'landing_page_url': 'https://...",journal-article,...,[],"[https://openalex.org/W800964941, https://open...","[https://openalex.org/W1556637991, https://ope...",https://api.openalex.org/works/W2209115793/ngrams,"{'Antarctica': [0, 26, 233], 'contains': [1], ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 2}, {'year':...",2023-06-08T13:47:31.428930,2016-06-24,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11649,https://openalex.org/W4253285772,https://doi.org/10.21684/0132-8077-2019-27-2-1...,"MERISTACARUS BOCHKOVI (ACARI, ORIBATIDA, LOHMA...","MERISTACARUS BOCHKOVI (ACARI, ORIBATIDA, LOHMA...",2019,2019-12-30,{'openalex': 'https://openalex.org/W4253285772...,en,"{'is_oa': False, 'landing_page_url': 'https://...",journal-article,...,[{'funder': 'https://openalex.org/F4320323641'...,"[https://openalex.org/W2290344131, https://ope...","[https://openalex.org/W1160645582, https://ope...",https://api.openalex.org/works/W4253285772/ngrams,"{'A': [0], 'new': [1], 'oribatid': [2], 'mite'...",https://api.openalex.org/works?filter=cites:W4...,[],2023-05-24T14:50:19.997838,2022-05-12,
11650,https://openalex.org/W4253410054,https://doi.org/10.21684/0132-8077-2021-29-1-3-9,TAXONOMIC CONTRIBUTION TO THE KNOWLEDGE OF THE...,TAXONOMIC CONTRIBUTION TO THE KNOWLEDGE OF THE...,2021,2021-01-01,{'openalex': 'https://openalex.org/W4253410054...,en,"{'is_oa': False, 'landing_page_url': 'https://...",journal-article,...,[],"[https://openalex.org/W652923663, https://open...","[https://openalex.org/W2000844831, https://ope...",https://api.openalex.org/works/W4253410054/ngrams,"{'A': [0, 15], 'new': [1, 16], 'oribatid': [2]...",https://api.openalex.org/works?filter=cites:W4...,[],2023-05-29T20:38:15.338954,2022-05-12,
11651,https://openalex.org/W4254963337,https://doi.org/10.21684/0132-8077-2019-27-2-2...,A NEW SYRINGOPHILID MITE (ACARIFORMES: SYRINGO...,A NEW SYRINGOPHILID MITE (ACARIFORMES: SYRINGO...,2019,2019-12-30,{'openalex': 'https://openalex.org/W4254963337...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",journal-article,...,[],"[https://openalex.org/W562402537, https://open...","[https://openalex.org/W1567510525, https://ope...",https://api.openalex.org/works/W4254963337/ngrams,"{'A': [0], 'new': [1, 33], 'quill': [2], 'mite...",https://api.openalex.org/works?filter=cites:W4...,[],2023-05-29T02:07:22.218820,2022-05-12,
11652,https://openalex.org/W4255590455,https://doi.org/10.21684/0132-8077-2019-27-2-1...,"HYPOZETES ANDREII (ACARI, ORIBATIDA, TEGORIBAT...","HYPOZETES ANDREII (ACARI, ORIBATIDA, TEGORIBAT...",2019,2019-12-30,{'openalex': 'https://openalex.org/W4255590455...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",journal-article,...,[{'funder': 'https://openalex.org/F4320321079'...,"[https://openalex.org/W2760483133, https://ope...","[https://openalex.org/W2000844831, https://ope...",https://api.openalex.org/works/W4255590455/ngrams,"{'A': [0], 'new': [1, 44], 'oribatid': [2], 'm...",https://api.openalex.org/works?filter=cites:W4...,[],2023-05-29T14:33:08.088537,2022-05-12,


In [9]:
filtered_eu_articles.to_pickle("./data/keyword_filtered_articles_EU27_with_taxonomy_concept_in_journal.pkl")