# 1. Using OpenAlex to find taxonomists

## 1.1. Retrieving articles from taxonomic journals

The OpenAlex API was used to retrieve all recent articles (2013-2022) published in taxonomic journals. A list of journals was obtained via a SPARQL query on Wikidata, with subjects such as "taxonomy", "phylogeny", "systematics" and "biological nomenclature" and another list of journals via OpenAlex by searching for sources with the concept "taxonomy" associated with it. The articles from these journals were retrieved with the OpenAlex API. This data contains information on the authors and associated institutions, publishers, and abstract, among other things.

In [2]:
import requests
import numpy as np
import pandas as pd
import pickle
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
import time

## Functions

In [3]:
# RETRIEVE ALL RECENT ARTICLES WITH ONE FILTER
def request_pubs_openalex(category, openalex_id, other_id=False):
    # open persistent session to shorten processing time between requests
    s = requests.Session()
    # FIRST PAGE
    publications = s.get("https://api.openalex.org/works?cursor=*&per-page=200&filter=from_publication_date:2013-01-01,"
                         +category+openalex_id)
    
    # print display name associated with OpenAlex ID (if given)
    if not other_id:
        query_name = s.get("https://api.openalex.org/"+openalex_id).json()["display_name"]
        print("Number of publications associated with \""+query_name+
              "\": "+str(publications.json()["meta"]["count"]))
    
    next_pubs = publications.json()
    next_cursor = next_pubs["meta"]["next_cursor"]

    publications_results = next_pubs["results"]
    
    # RETRIEVE ALL PAGES
    while next_pubs["meta"]["next_cursor"] != None:
        # get next page
        next_pubs = s.get("https://api.openalex.org/works?per-page=200&filter=from_publication_date:2013-01-01,"
                                 +category+openalex_id+"&cursor="+next_cursor)

        next_pubs = next_pubs.json()
        next_cursor = next_pubs["meta"]["next_cursor"] # remember next cursor

        publications_results.extend(next_pubs["results"])
    
    publications_df = pd.DataFrame.from_dict(publications_results)
    return publications_df

In [4]:
# RETRIEVE ALL ARTICLES BETWEEN 2011 AND 2020 WITH A GIVEN FILTER (comparison with RLIT)
def search_2011_2020(query):
    s = requests.Session()
    
    publications = s.get("https://api.openalex.org/works?per-page=200&filter="
                         +query+",from_publication_date:2011-01-01,to_publication_date:2020-12-31&cursor=*")
    
    next_pubs = publications.json()
    next_cursor = next_pubs["meta"]["next_cursor"]

    publications_results = next_pubs["results"]

    while next_pubs["meta"]["next_cursor"] != None:
        # get next page
        next_pubs = s.get("https://api.openalex.org/works?per-page=200&filter="
                          +query+",from_publication_date:2011-01-01,to_publication_date:2020-12-31&cursor="
                          +next_cursor)
        next_pubs = next_pubs.json()
        next_cursor = next_pubs["meta"]["next_cursor"] # remember next cursor

        # add new results to total
        publications_results.extend(next_pubs["results"])

    publications_df = pd.DataFrame.from_dict(publications_results)
    return publications_df

In [5]:
# GET RESULTS OF SPARQLE QUERIES (code from WikiData's query service)
endpoint_url = "https://query.wikidata.org/sparql"

def get_sparql_results(query, endpoint_url=endpoint_url):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    # TODO adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return pd.DataFrame.from_dict(results["results"]["bindings"])

In [6]:
# BUILD SPARQL QUERIES FOR MULTIPLE
def build_query_subjects(list):
    # first, it must be an instance of (P31) a scientific journal (Q5633421) or academic journal (Q737498)
    query = """SELECT DISTINCT ?item ?itemLabel ?issn ?issn_l WHERE {
    ?item wdt:P236 ?issn.
    ?item wdt:P7363 ?issn_l.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". }
  {
    SELECT DISTINCT ?item WHERE {
      {
        ?item p:P31 ?statement0.
        ?statement0 (ps:P31/(wdt:P279*)) wd:Q5633421.
      }
      UNION
      {
        ?item p:P31 ?statement1.
        ?statement1 (ps:P31/(wdt:P279*)) wd:Q737498.
      }"""
    
    # add specific requirements (taxonomy, phylogeny,...) for field of work (P101) or main subject (P921)
    for i, subject in enumerate(list):
        i = 2 + i*2
        addition = """\n      {
        ?item p:P921 ?statement""" + str(i) + """.
        ?statement""" + str(i) + """ (ps:P921/(wdt:P279*)) wd:""" + subject + """.
      }
      UNION
      {
        ?item p:P101 ?statement""" + str(i+1) + """.
        ?statement""" + str(i+1) + """ (ps:P101/(wdt:P279*)) wd:""" + subject + """.
      }"""
        if i != 2:
            addition = "\n      UNION" + addition
        
        query += addition
    
    query += """\n    }
  }
}"""
    return query

In [9]:
# filter all articles: at least one of the institutions associated with one of the authors, must be EU
def filter_eu_articles(df_input):
    # two-letter country codes of all EU27 countries
    eu_codes = ["AT", "BE", "BG", "HR", "CY", "CZ", "DK", "EE", "FI", "FR", "DE", "GR", "HU", "IE", # EU
                      "IT", "LV", "LT", "LU", "MT", "NL", "PL", "PT", "RO", "SK", "SI", "ES", "SE"] # EU

    eu_articles = []

    for article in df_input.itertuples():
        # check every author
        for author in article.authorships:
            stop = False
            # check every affiliated institute
            for institute in author["institutions"]:
                if institute:
                    country = institute["country_code"]
                    # european?
                    if country in eu_codes:
                        eu_articles.append(list(article))
                        stop=True # each article should only be included once
                        break # stop going over institutes of this author

            if stop:
                break # stop going over authors of this article

    eu_articles = pd.DataFrame(eu_articles)
    eu_articles = eu_articles.iloc[:,1:]
    eu_articles.columns = df_input.columns
    
    return eu_articles

In [27]:
# QUERY GIVEN LIST OF ARTICLES FOR SPECIFIC WORDS AND CONCEPTS TO FILTER OUT IRRELEVANT ARTICLES
def query_articles(articles):
    queries1 = ["taxonomy", "taxonomic", "taxon", "checklist"] # one-word queries
    queries2 = ["new species", "novel species", "new genus", "new genera"] # two-word queries
    concepts = ["C58642233", "C71640776", "C2779356329"] # OpenAlex IDs of concepts
                                                         # taxonomy, taxon, checklist
    
    keep = []
    
    for article in articles.itertuples():
        cont = False
        # SEARCH TITLE
        if article.display_name != None:
            # single-word queries
            for query in queries1 + queries2 + ["nov.",]:
                if query in article.display_name.lower():
                    keep.append(article)
                    cont = True
                    break # stop querying
            if cont:
                continue # move on to next article

        # SEARCH ABSTRACT
        # get list of words from the abstract without distracting characters or uppercase
        if article.abstract_inverted_index != None:
            abstract_words = article.abstract_inverted_index.keys()
            abstract_words = [x.lower().strip(",;.?!\'()-]") for x in abstract_words]
        
            # search "nov." without stripping abstract of period
            if "nov." in article.abstract_inverted_index.keys():
                keep.append(article)
                continue
            if cont:
                continue # move on to next article
            
            # one-word queries
            for query in queries1:
                if query in abstract_words:
                    keep.append(article)
                    cont = True
                    break
            if cont:
                continue # move on to next article
                    
            # two-word queries
            for query in queries2:
                if query.split()[0] in abstract_words and query.split()[1] in abstract_words:
                    keep.append(article)
                    cont = True
                    break
            if cont:
                continue # move on to next article
            
        # SEARCH CONCEPTS BY ID
        for concept in concepts:
            # make list of concepts (by OpenAlex ID) associated with the article
            conc_ids = []
            for art_conc in article.concepts:
                conc_ids.append(art_conc["id"])

            if concept in conc_ids:
                keep.append(article)
                break
    
    return pd.DataFrame(keep).drop_duplicates(subset="id", ignore_index=True).iloc[:,1:]

## Results

### 1.1.1. Following the RLIT methodology

The European Red List of Insect Taxonomists is an important predecessor to our methodology (https://cloud.pensoft.net/s/mGpyQYUPQOMPs8C). They used Web of Science (WoS) to find articles related to specific insect orders, specifically by searching WoS with the following query (https://www.webofscience.com/wos/woscc/summary/5f6f7d2a-89dd-4709-bf49-494b6f2522bc-73383729/relevance/1):

    ALL=(Plecoptera  AND  (taxonom*  OR  “new  species”  OR  
    “novel species” OR “checklist” OR “new genus” OR “new genera”))

To see if OpenAlex can find the same or even more articles, we replicate their methodology here.

Some differences between the two methodologies exist: OpenAlex, for copyright reasons, only records the abstract in the form of an inverted index - listing every word in it and its place(s) in the abstract as a key-value pair, respectively. For this reason, word groups can't be searched exactly: "new species" is searched as "new" and "species", not necessarily adjacent. Furthermore, OpenAlex doesn't record author keywords nor does it feature WoS' "keywords plus" which are based on the references, but it does associate "concepts" with every article. We searched these concepts instead.

Title and abstract were searched concurrently and explicitly since the simpler "search" function of the OpenAlex API searches the full text as well as title and abstract.

In [11]:
# replicating WoS queries

# search every insect order listed in the RLIT
insect_orders = ["Coleoptera", "Hemiptera", "Diptera", "Lepidoptera", "Orthoptera", 
                 "Odonata", "Blattodea", "Ephemeroptera", "Psocodea", "Grylloblattodea", 
                 "Neuroptera", "Mecoptera", "Trichoptera", "Plecoptera", "Dermaptera", 
                 "Mantodea", "Siphonaptera", "Strepsiptera", "Embioptera", "Hymenoptera",
                 "Phasmida", "Raphidioptera", "Isoptera", "Megaloptera", "Thysanoptera",
                 "Zygentoma", "Mantophasmatodea", "Archaeognatha", "Zoraptera"]
insect_articles = pd.DataFrame()

for order in insect_orders:
    start = time.time()
    results = []
    
    # search each of the WoS search terms in abstract or title or concepts
    # the order must also be found in abstract or title (only some orders exist as concepts)
    # OpenAlex OR function in search not useable because it excludes results with both search terms
    
    # Plecoptera AND
    for query in ["title.search:"+order+",title.search:%22new species%22", # OR "new species"
                  "title.search:"+order+",abstract.search:new species",
                  "title.search:%22new species%22,abstract.search:"+order,
                  "abstract.search:"+order+" new species", 

                  "title.search:"+order+" AND %22novel species%22", # OR "novel species"
                  "title.search:"+order+",abstract.search:novel species",
                  "title.search:%22novel species%22,abstract.search:"+order,
                  "abstract.search:"+order+" novel species",

                  "title.search:"+order+" AND %22new genus%22", # OR "new genus"
                  "title.search:"+order+",abstract.search:new genus",
                  "title.search:%22new genus%22,abstract.search:"+order,
                  "abstract.search:"+order+" new genus",

                  "title.search:"+order+" AND %22new genera%22", # OR "new genera"
                  "title.search:"+order+",abstract.search:new genera",
                  "title.search:%22new genera%22,abstract.search:"+order,
                  "abstract.search:"+order+" new genera",

                  "title.search:"+order+" AND checklist", # OR "checklist"
                  "title.search:"+order+",abstract.search:checklist",
                  "title.search:checklist,abstract.search:"+order,
                  "abstract.search:"+order+" checklist",

                  "title.search:"+order+" AND taxonomy", # taxonom* (OpenAlex automatically stems)
                  "title.search:"+order+",abstract.search:taxonomy",
                  "title.search:taxonomy,abstract.search:"+order,
                  "abstract.search:"+order+" taxonomy",

                  # concepts
                  "title.search:"+order+",concepts.id:C58642233", # taxonomy
                  "abstract.search:"+order+",concepts.id:C58642233",

                  "title.search:"+order+",concepts.id:C71640776", # taxon
                  "abstract.search:"+order+",concepts.id:C71640776",

                  "title.search:"+order+",concepts.id:C2779356329", # checklist
                  "abstract.search:"+order+",concepts.id:C2779356329",
                 ]:
            articles = search_2011_2020(query)
            results.append(articles)
    
    # combine results and remove duplicates
    order_articles = pd.concat(results, ignore_index=True).drop_duplicates(subset="id", ignore_index=True)
    order_articles["order"] = order
    insect_articles = pd.concat([insect_articles, order_articles])
    
    end=time.time()
    print(order + " done in "+str(end-start)+" seconds")

insect_articles

Coleoptera done in 177.8367350101471 seconds
Hemiptera done in 96.45085620880127 seconds
Diptera done in 113.07567143440247 seconds
Lepidoptera done in 81.12866353988647 seconds
Orthoptera done in 40.846145153045654 seconds
Odonata done in 28.75708031654358 seconds
Blattodea done in 20.415952444076538 seconds
Ephemeroptera done in 28.3929705619812 seconds
Psocodea done in 17.617497205734253 seconds
Grylloblattodea done in 15.492295026779175 seconds
Neuroptera done in 23.86776900291443 seconds
Mecoptera done in 19.07567000389099 seconds
Trichoptera done in 34.25717878341675 seconds
Plecoptera done in 26.19587469100952 seconds
Dermaptera done in 17.666473150253296 seconds
Mantodea done in 20.767014503479004 seconds
Siphonaptera done in 19.379395246505737 seconds
Strepsiptera done in 17.385285139083862 seconds
Embioptera done in 16.830060720443726 seconds
Hymenoptera done in 106.68805193901062 seconds
Phasmida done in 15.820414781570435 seconds
Raphidioptera done in 16.1629421710968 secon

Unnamed: 0,id,doi,title,display_name,relevance_score,publication_year,publication_date,ids,primary_location,host_venue,...,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated,order
0,https://openalex.org/W2075105050,https://doi.org/10.3897/zookeys.186.2947,New species and distributional records of Aleo...,New species and distributional records of Aleo...,186.24031,2012,2012-04-26,{'openalex': 'https://openalex.org/W2075105050...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S199213172', 'iss...",...,"[https://openalex.org/W239374901, https://open...","[https://openalex.org/W1994200612, https://ope...",https://api.openalex.org/works/W2075105050/ngrams,"{'The': [0, 118, 155], 'Aleocharinae': [1, 112...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 1}, {'year':...",2023-02-26T14:04:17.683820,2016-06-24,,Coleoptera
1,https://openalex.org/W2129944008,https://doi.org/10.11646/zootaxa.2883.1.2,New species and new records of mites of the fa...,New species and new records of mites of the fa...,184.40216,2011,2011-05-19,{'openalex': 'https://openalex.org/W2129944008...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S171471881', 'iss...",...,"[https://openalex.org/W296414054, https://open...","[https://openalex.org/W2123082750, https://ope...",https://api.openalex.org/works/W2129944008/ngrams,"{'We': [0], 'report': [1], 'on': [2], 'a': [3,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-25T19:35:22.574749,2016-06-24,,Coleoptera
2,https://openalex.org/W2048741281,https://doi.org/10.1016/j.cretres.2011.10.010,"Prosolierius, a new mid-Cretaceous genus of So...","Prosolierius, a new mid-Cretaceous genus of So...",163.62822,2012,2012-04-01,{'openalex': 'https://openalex.org/W2048741281...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S48852984', 'issn...",...,"[https://openalex.org/W121843087, https://open...","[https://openalex.org/W2101287629, https://ope...",https://api.openalex.org/works/W2048741281/ngrams,"{'Investigation': [0], 'of': [1, 17, 27, 34, 3...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-03-01T15:19:24.106579,2016-06-24,,Coleoptera
3,https://openalex.org/W2516041128,https://doi.org/10.3897/zookeys.610.9361,Twelve new species and fifty-three new provinc...,Twelve new species and fifty-three new provinc...,158.00716,2016,2016-08-11,{'openalex': 'https://openalex.org/W2516041128...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S199213172', 'iss...",...,"[https://openalex.org/W1964889809, https://ope...","[https://openalex.org/W1992974445, https://ope...",https://api.openalex.org/works/W2516041128/ngrams,"{'One': [0], 'hundred': [1], 'twenty': [2], 's...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 10}, {'year'...",2023-02-26T08:04:05.003443,2016-09-16,,Coleoptera
4,https://openalex.org/W2134062392,https://doi.org/10.1603/an10136,A New Species of <i>Laricobius</i> (Coleoptera...,A New Species of <i>Laricobius</i> (Coleoptera...,148.71063,2011,2011-05-09,{'openalex': 'https://openalex.org/W2134062392...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S79992667', 'issn...",...,"[https://openalex.org/W1968197856, https://ope...","[https://openalex.org/W1974991915, https://ope...",https://api.openalex.org/works/W2134062392/ngrams,"{'Abstract': [0], 'Laricobius': [1, 108, 111, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 4}, {'year':...",2023-02-12T16:50:31.874307,2016-06-24,,Coleoptera
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24,https://openalex.org/W2300829245,https://doi.org/10.1146/annurev-ento-010715-02...,Structure and Evolution of Insect Sperm: New I...,Structure and Evolution of Insect Sperm: New I...,341.63333,2016,2016-03-16,{'openalex': 'https://openalex.org/W2300829245...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S92576693', 'issn...",...,"[https://openalex.org/W607865467, https://open...","[https://openalex.org/W154122946, https://open...",https://api.openalex.org/works/W2300829245/ngrams,"{'This': [0], 'comprehensive': [1], 'review': ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-02-27T02:07:49.818221,2016-06-24,,Zoraptera
25,https://openalex.org/W2540536377,https://doi.org/10.1038/srep36175,Molecular phylogeny of Polyneoptera (Insecta) ...,Molecular phylogeny of Polyneoptera (Insecta) ...,288.42947,2016,2016-10-26,{'openalex': 'https://openalex.org/W2540536377...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S196734849', 'iss...",...,"[https://openalex.org/W1557992090, https://ope...","[https://openalex.org/W1994597060, https://ope...",https://api.openalex.org/works/W2540536377/ngrams,"{'Abstract': [0], 'The': [1], 'Polyneoptera': ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-02-12T20:51:02.779114,2016-11-04,,Zoraptera
26,https://openalex.org/W2792039220,https://doi.org/10.7717/peerj.5126,A reference cytochrome c oxidase subunit I dat...,A reference cytochrome c oxidase subunit I dat...,118.58033,2018,2018-06-26,{'openalex': 'https://openalex.org/W2792039220...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S1983995261', 'is...",...,"[https://openalex.org/W1571552535, https://ope...","[https://openalex.org/W1999884409, https://ope...",https://api.openalex.org/works/W2792039220/ngrams,"{'Metabarcoding': [0], 'is': [1, 179], 'a': [2...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-02-26T14:22:56.189321,2018-03-29,,Zoraptera
27,https://openalex.org/W4243684135,https://doi.org/10.7287/peerj.preprints.26662,A reference cytochrome c oxidase subunit I dat...,A reference cytochrome c oxidase subunit I dat...,,2018,2018-03-12,{'openalex': 'https://openalex.org/W4243684135...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': None, 'issn_l': None, 'issn': None, 'di...",...,[],"[https://openalex.org/W1070900, https://openal...",https://api.openalex.org/works/W4243684135/ngrams,"{'Metabarcoding': [0], 'is': [1, 176], 'a': [2...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-12T15:01:27.714270,2022-05-12,,Zoraptera


In [12]:
insect_articles.to_pickle("./data/RLIT_method_openalex_all_insect_articles.pkl")

In [13]:
insect_eu_articles = filter_eu_articles(insect_articles)
insect_eu_articles

Unnamed: 0,id,doi,title,display_name,relevance_score,publication_year,publication_date,ids,primary_location,host_venue,...,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated,order
0,https://openalex.org/W2075105050,https://doi.org/10.3897/zookeys.186.2947,New species and distributional records of Aleo...,New species and distributional records of Aleo...,186.24031,2012,2012-04-26,{'openalex': 'https://openalex.org/W2075105050...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S199213172', 'iss...",...,"[https://openalex.org/W239374901, https://open...","[https://openalex.org/W1994200612, https://ope...",https://api.openalex.org/works/W2075105050/ngrams,"{'The': [0, 118, 155], 'Aleocharinae': [1, 112...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 1}, {'year':...",2023-02-26T14:04:17.683820,2016-06-24,,Coleoptera
1,https://openalex.org/W2074050863,https://doi.org/10.3897/zookeys.250.3715,Introduction of the Exocelina ekari-group with...,Introduction of the Exocelina ekari-group with...,134.67500,2012,2012-12-13,{'openalex': 'https://openalex.org/W2074050863...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S199213172', 'iss...",...,"[https://openalex.org/W283890424, https://open...","[https://openalex.org/W2074050863, https://ope...",https://api.openalex.org/works/W2074050863/ngrams,"{'The': [0, 23, 139], 'Exocelina': [1, 42, 46,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2023-02-26T10:06:37.573963,2016-06-24,,Coleoptera
2,https://openalex.org/W2124627356,https://doi.org/10.3161/000345411x622525,A New Species ofHenosepilachnaLi (Coleoptera: ...,A New Species ofHenosepilachnaLi (Coleoptera: ...,118.18758,2011,2011-12-01,{'openalex': 'https://openalex.org/W2124627356...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S24891482', 'issn...",...,"[https://openalex.org/W252341036, https://open...","[https://openalex.org/W1975229913, https://ope...",https://api.openalex.org/works/W2124627356/ngrams,"{'Abstract.': [0], 'Henosepilachna': [1, 14], ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 1}, {'year':...",2023-02-26T10:53:50.702070,2016-06-24,,Coleoptera
3,https://openalex.org/W2470267224,https://doi.org/10.1017/jpa.2016.51,New species from Late Cretaceous New Jersey am...,New species from Late Cretaceous New Jersey am...,113.15040,2016,2016-08-23,{'openalex': 'https://openalex.org/W2470267224...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S143791635', 'iss...",...,"[https://openalex.org/W1492713357, https://ope...","[https://openalex.org/W1581826312, https://ope...",https://api.openalex.org/works/W2470267224/ngrams,"{'Abstract': [0], 'A': [1], 'new': [2, 105], '...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-25T03:53:00.273936,2016-07-22,,Coleoptera
4,https://openalex.org/W2297951136,https://doi.org/10.3897/zookeys.572.6763,A contribution to the knowledge of the mountai...,A contribution to the knowledge of the mountai...,109.41350,2016,2016-03-15,{'openalex': 'https://openalex.org/W2297951136...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S199213172', 'iss...",...,"[https://openalex.org/W1539113718, https://ope...","[https://openalex.org/W1929083957, https://ope...",https://api.openalex.org/works/W2297951136/ngrams,"{'Recent': [0], 'intensive': [1], 'samplings':...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 3}, {'year':...",2023-02-26T21:25:54.445025,2016-06-24,,Coleoptera
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,https://openalex.org/W3000603933,https://doi.org/10.3390/insects11010051,Molecular Phylogeny and Infraordinal Classific...,Molecular Phylogeny and Infraordinal Classific...,115.29168,2020,2020-01-12,{'openalex': 'https://openalex.org/W3000603933...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S28057480', 'issn...",...,"[https://openalex.org/W1480665129, https://ope...","[https://openalex.org/W1554287491, https://ope...",https://api.openalex.org/works/W3000603933/ngrams,"{'Zoraptera': [0, 31, 117], 'is': [1], 'a': [2...",https://api.openalex.org/works?filter=cites:W3...,"[{'year': 2023, 'cited_by_count': 2}, {'year':...",2023-02-19T17:11:22.476385,2020-01-23,,Zoraptera
8120,https://openalex.org/W2300829245,https://doi.org/10.1146/annurev-ento-010715-02...,Structure and Evolution of Insect Sperm: New I...,Structure and Evolution of Insect Sperm: New I...,341.63333,2016,2016-03-16,{'openalex': 'https://openalex.org/W2300829245...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S92576693', 'issn...",...,"[https://openalex.org/W607865467, https://open...","[https://openalex.org/W154122946, https://open...",https://api.openalex.org/works/W2300829245/ngrams,"{'This': [0], 'comprehensive': [1], 'review': ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-02-27T02:07:49.818221,2016-06-24,,Zoraptera
8121,https://openalex.org/W2792039220,https://doi.org/10.7717/peerj.5126,A reference cytochrome c oxidase subunit I dat...,A reference cytochrome c oxidase subunit I dat...,118.58033,2018,2018-06-26,{'openalex': 'https://openalex.org/W2792039220...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S1983995261', 'is...",...,"[https://openalex.org/W1571552535, https://ope...","[https://openalex.org/W1999884409, https://ope...",https://api.openalex.org/works/W2792039220/ngrams,"{'Metabarcoding': [0], 'is': [1, 179], 'a': [2...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2023, 'cited_by_count': 1}, {'year':...",2023-02-26T14:22:56.189321,2018-03-29,,Zoraptera
8122,https://openalex.org/W4243684135,https://doi.org/10.7287/peerj.preprints.26662,A reference cytochrome c oxidase subunit I dat...,A reference cytochrome c oxidase subunit I dat...,,2018,2018-03-12,{'openalex': 'https://openalex.org/W4243684135...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': None, 'issn_l': None, 'issn': None, 'di...",...,[],"[https://openalex.org/W1070900, https://openal...",https://api.openalex.org/works/W4243684135/ngrams,"{'Metabarcoding': [0], 'is': [1, 176], 'a': [2...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-12T15:01:27.714270,2022-05-12,,Zoraptera


In [14]:
insect_eu_articles.to_pickle("./data/RLIT_method_openalex_EU27_insect_articles.pkl")

A Red List Score was calculated for every insect order by counting the number of articles found for the order and calculating how many articles there are per 100 species in the order (later in a spreadsheet):

    (N_pubs / N_species) x 100

In [15]:
# how many articles per order?
for order in insect_orders:
    print(order + ": " 
          + str(len(insect_eu_articles[insect_eu_articles["order"]==order].index)) 
          + " articles")

Coleoptera: 2499 articles
Hemiptera: 715 articles
Diptera: 1351 articles
Lepidoptera: 963 articles
Orthoptera: 287 articles
Odonata: 179 articles
Blattodea: 35 articles
Ephemeroptera: 116 articles
Psocodea: 18 articles
Grylloblattodea: 5 articles
Neuroptera: 94 articles
Mecoptera: 19 articles
Trichoptera: 186 articles
Plecoptera: 114 articles
Dermaptera: 21 articles
Mantodea: 43 articles
Siphonaptera: 30 articles
Strepsiptera: 14 articles
Embioptera: 6 articles
Hymenoptera: 1277 articles
Phasmida: 9 articles
Raphidioptera: 11 articles
Isoptera: 35 articles
Megaloptera: 13 articles
Thysanoptera: 36 articles
Zygentoma: 15 articles
Mantophasmatodea: 9 articles
Archaeognatha: 10 articles
Zoraptera: 14 articles


### 1.1.2. Starting from taxonomic journals
Since we want to find taxonomists for all animals, fungi and plants, searching for each order becomes unwieldy and potentially contentious since determining which orders should be searched could be difficult. Even in the insect example, Isoptera for example is often seen as an infraorder. Moreover, different clades may be studied (and named in articles) at different taxonomic levels, for example the family level is more important for botanists than the order. Even within insect taxonomy, Coleoptera and many other orders are far too large and expertise must be studied at lower clades.

Instead, we start from a list of taxonomic journals an retrieve their articles, then filter these articles for relevant content.

A list of journals was found by searching OpenAlex for sources with taxonomy as an associated concept, and by searching Wikidata for journals (instance of = scientific journal OR academic journal) whose main subject or field of study is taxonomy, biological nomenclature, phylogenetics, or related concepts.  

In [16]:
# search wikidata for journals

query = build_query_subjects(["Q8269924", # taxonomy
                              "Q11398", # biological classification
                              "Q1138178", # plant taxonomy
                              "Q1469725", # animal taxonomy
                              "Q3516404", # systematics
                              "Q522190", # biological nomenclature
                              "Q3310776", # botanical nomenclature
                              "Q3343211"]) # zoological nomenclature
query2 = build_query_subjects(["Q171184", # phylogenetics
                               "Q115135896"]) # animal phylogeny

results_df = get_sparql_results(query)
results_df2 = get_sparql_results(query2)

wd_journals = pd.concat([results_df, results_df2], ignore_index=True)
wd_journals

Unnamed: 0,item,issn_l,issn,itemLabel
0,"{'type': 'uri', 'value': 'http://www.wikidata....","{'type': 'literal', 'value': '1313-2970'}","{'type': 'literal', 'value': '1313-2970'}","{'type': 'literal', 'value': 'Q219980'}"
1,"{'type': 'uri', 'value': 'http://www.wikidata....","{'type': 'literal', 'value': '1313-2970'}","{'type': 'literal', 'value': '1313-2989'}","{'type': 'literal', 'value': 'Q219980'}"
2,"{'type': 'uri', 'value': 'http://www.wikidata....","{'type': 'literal', 'value': '1759-6831'}","{'type': 'literal', 'value': '1674-4918'}","{'type': 'literal', 'value': 'Q15733644'}"
3,"{'type': 'uri', 'value': 'http://www.wikidata....","{'type': 'literal', 'value': '1759-6831'}","{'type': 'literal', 'value': '1759-6831'}","{'type': 'literal', 'value': 'Q15733644'}"
4,"{'type': 'uri', 'value': 'http://www.wikidata....","{'type': 'literal', 'value': '2118-9773'}","{'type': 'literal', 'value': '2118-9773'}","{'type': 'literal', 'value': 'Q21386186'}"
5,"{'type': 'uri', 'value': 'http://www.wikidata....","{'type': 'literal', 'value': '1175-5326'}","{'type': 'literal', 'value': '1175-5326'}","{'type': 'literal', 'value': 'Q220370'}"
6,"{'type': 'uri', 'value': 'http://www.wikidata....","{'type': 'literal', 'value': '1175-5326'}","{'type': 'literal', 'value': '1175-5334'}","{'type': 'literal', 'value': 'Q220370'}"
7,"{'type': 'uri', 'value': 'http://www.wikidata....","{'type': 'literal', 'value': '0367-5041'}","{'type': 'literal', 'value': '0367-5041'}","{'type': 'literal', 'value': 'Q94579799'}"
8,"{'type': 'uri', 'value': 'http://www.wikidata....","{'type': 'literal', 'value': '0367-5041'}","{'type': 'literal', 'value': '1989-953X'}","{'type': 'literal', 'value': 'Q94579799'}"
9,"{'type': 'uri', 'value': 'http://www.wikidata....","{'type': 'literal', 'value': '2399-3421'}","{'type': 'literal', 'value': '2399-3421'}","{'type': 'literal', 'value': 'Q58315628'}"


In [17]:
# get ISSN-L of each journal
# a journal may have several ISSNs but only one ISSN-L, which is always the same as one of its ISSNs
issn_wikidata = [] 
for journal in wd_journals["issn_l"]:
    issn_wikidata.append(journal["value"])
    
issn_wikidata

['1313-2970',
 '1313-2970',
 '1759-6831',
 '1759-6831',
 '2118-9773',
 '1175-5326',
 '1175-5326',
 '0367-5041',
 '0367-5041',
 '2399-3421',
 '1063-5157',
 '1063-5157',
 '0511-9618',
 '0511-9618',
 '0075-5974',
 '0075-5974',
 '1435-1935',
 '1435-1935',
 '1346-7565',
 '1346-7565',
 '1225-8318',
 '1225-8318',
 '1394-5130',
 '0132-8069',
 '0307-6970',
 '0307-6970',
 '1179-7649',
 '1179-7649',
 '1030-1887',
 '1030-1887',
 '0085-4417',
 '0085-4417',
 '1055-7903',
 '1055-7903',
 '2373-7697',
 '1179-3155',
 '1179-3155',
 '1314-2003',
 '1314-2003',
 '2234-6953',
 '2234-6953',
 '0300-3256',
 '0300-3256',
 '1314-2488',
 '1314-2488',
 '0007-5167',
 '0007-5167',
 '0376-4443',
 '2703-3082',
 '2703-3082',
 '0004-8038',
 '0004-8038',
 '0077-1813',
 '0077-1813',
 '1522-8339',
 '1522-8339',
 '2095-6827',
 '1055-7903',
 '1055-7903']

In [18]:
# the journal Taxon could not be found this way, but it is an important player in taxonomy

issn_wikidata.append("0040-0262") # Taxon

In [19]:
# search OpenAlex for journals

tax_journals = requests.get("https://api.openalex.org/sources?filter=concepts.id:C58642233&per_page=200")
tax_journals = pd.DataFrame(tax_journals.json()["results"])
tax_journals

Unnamed: 0,id,issn_l,issn,display_name,publisher,host_organization,works_count,cited_by_count,is_oa,is_in_doaj,...,country_code,societies,alternate_titles,abbreviated_title,type,x_concepts,counts_by_year,works_api_url,updated_date,created_date
0,https://openalex.org/S171471881,1175-5326,"[1175-5334, 1175-5326]",Zootaxa,Q15088586,https://openalex.org/P4310321855,41241,187381,False,False,...,NZ,[],[],,journal,"[{'id': 'https://openalex.org/C86803240', 'wik...","[{'year': 2023, 'works_count': 304, 'cited_by_...",https://api.openalex.org/works?filter=host_ven...,2023-03-01T03:49:41.957930,2016-06-24
1,https://openalex.org/S100787194,1179-3155,"[1179-3163, 1179-3155]",Phytotaxa,Q15088586,https://openalex.org/P4310321855,11109,37458,False,False,...,NZ,[],[],,journal,"[{'id': 'https://openalex.org/C86803240', 'wik...","[{'year': 2023, 'works_count': 145, 'cited_by_...",https://api.openalex.org/works?filter=host_ven...,2023-03-02T19:43:02.101057,2016-06-24
2,https://openalex.org/S199213172,1313-2970,"[1313-2970, 1313-2989]",ZooKeys,Pensoft Publishers,https://openalex.org/P4310321056,6697,42210,True,True,...,BG,[],[],,journal,"[{'id': 'https://openalex.org/C86803240', 'wik...","[{'year': 2023, 'works_count': 71, 'cited_by_c...",https://api.openalex.org/works?filter=host_ven...,2023-03-01T15:35:58.151496,2016-06-24
3,https://openalex.org/S44318362,0093-4666,"[0093-4666, 2154-8889]",Mycotaxon,Mycotaxon Publications,https://openalex.org/P4310322165,5294,20084,False,False,...,US,[],[],,journal,"[{'id': 'https://openalex.org/C86803240', 'wik...","[{'year': 2023, 'works_count': 0, 'cited_by_co...",https://api.openalex.org/works?filter=host_ven...,2023-03-02T15:35:43.210608,2016-06-24
4,https://openalex.org/S2764437286,0007-5167,"[2057-0570, 0007-5167]",The Bulletin of zoological nomenclature,International Trust for Zoological Nomenclature,https://openalex.org/P4310322357,4298,5865,False,False,...,GB,[],[],,journal,"[{'id': 'https://openalex.org/C86803240', 'wik...","[{'year': 2023, 'works_count': 0, 'cited_by_co...",https://api.openalex.org/works?filter=host_ven...,2023-03-01T16:22:22.881280,2017-10-27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,https://openalex.org/S4306489654,,,Śląski Uniwersytet Medyczny w Katowicach/Medic...,Śląski Uniwersytet Medyczny w Katowicach/Medic...,,2,2,,,...,,[],[],,ebook platform,"[{'id': 'https://openalex.org/C18903297', 'wik...","[{'year': 2021, 'works_count': 2, 'cited_by_co...",https://api.openalex.org/works?filter=host_ven...,2023-02-24T03:53:57.865660,2022-10-17
128,https://openalex.org/S4306490621,,,VNIIP – FSC VIEV eBooks,VNIIP – FSC VIEV,,2,2,,,...,,[],[],,ebook platform,"[{'id': 'https://openalex.org/C86803240', 'wik...","[{'year': 2022, 'works_count': 2, 'cited_by_co...",https://api.openalex.org/works?filter=host_ven...,2023-02-28T14:17:10.209042,2022-10-17
129,https://openalex.org/S936918658,1015-8243,[1015-8243],Priamus,Centre for Entomological Studies Ankara,https://openalex.org/P4310318387,2,2,,,...,TR,[],[],,journal,"[{'id': 'https://openalex.org/C58642233', 'wik...","[{'year': 2013, 'works_count': 0, 'cited_by_co...",https://api.openalex.org/works?filter=host_ven...,2023-02-08T07:30:24.454571,2016-06-24
130,https://openalex.org/S4306485908,,,All-Russian Scientific Research Institute for ...,All-Russian Scientific Research Institute for ...,,1,1,,,...,,[],[],,ebook platform,"[{'id': 'https://openalex.org/C18903297', 'wik...","[{'year': 2021, 'works_count': 1, 'cited_by_co...",https://api.openalex.org/works?filter=host_ven...,2023-02-07T19:25:01.846105,2022-10-17


In [20]:
# WikiData and OpenAlex journals all in one neat list by ISSN-L

issns = list(filter(None, tax_journals["issn_l"])) # remove None values
issns.extend(issn_wikidata)
issns = set(issns) # remove duplicates
len(issns)

129

In [21]:
# get all articles from taxonomic journals

articles = []

for issn in issns:
    start = time.time()
    
    # OpenAlex can't search ISSN-L, but ISSN works just as well
    journal_articles = request_pubs_openalex("host_venue.issn:", issn, other_id=True)
    articles.append(journal_articles)
    
    end = time.time()
    print(issn+" done in "+str(end-start)+" seconds")

articles = pd.concat(articles)
articles

1179-7649 done in 1.1741840839385986 seconds
0164-7954 done in 4.669461011886597 seconds
2667-5684 done in 0.8384706974029541 seconds
1816-8396 done in 0.6536159515380859 seconds
2363-7684 done in 0.35227251052856445 seconds
2628-2429 done in 0.797914981842041 seconds
1313-2970 done in 19.964337825775146 seconds
1394-5130 done in 0.35872697830200195 seconds
0181-1584 done in 2.0823800563812256 seconds
2234-6953 done in 1.4130210876464844 seconds
1522-8339 done in 1.2734253406524658 seconds
0367-5041 done in 1.4215292930603027 seconds
0313-4245 done in 0.3930220603942871 seconds
0160-239X done in 0.5379266738891602 seconds
0737-8211 done in 0.6530144214630127 seconds
1063-5157 done in 9.161934614181519 seconds
0004-8038 done in 5.6893532276153564 seconds
0019-5227 done in 0.3684813976287842 seconds
0165-5752 done in 3.0998008251190186 seconds
1676-6180 done in 0.749126672744751 seconds
0027-4100 done in 0.8923940658569336 seconds
0749-6737 done in 1.8756358623504639 seconds
0373-2967 do

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,primary_location,host_venue,type,...,alternate_host_venues,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated
0,https://openalex.org/W2098169155,https://doi.org/10.11646/bionomina.7.1.1,<strong>Proposal of an integrated framework of...,<strong>Proposal of an integrated framework of...,2014,2014-06-06,{'openalex': 'https://openalex.org/W2098169155...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...","[https://openalex.org/W37250742, https://opena...","[https://openalex.org/W1281026, https://openal...",https://api.openalex.org/works/W2098169155/ngrams,"{'An': [0], 'integrated': [1], 'framework': [2...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2020, 'cited_by_count': 27}, {'year'...",2023-02-12T18:02:01.812245,2016-06-24,
1,https://openalex.org/W2598914299,https://doi.org/10.11646/bionomina.11.1.1,<p><strong>The nomenclatural status of <em>Hys...,<p><strong>The nomenclatural status of <em>Hys...,2017,2017-03-04,{'openalex': 'https://openalex.org/W2598914299...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W26558877, https://opena...",https://api.openalex.org/works/W2598914299/ngrams,"{'Dozens': [0], 'of': [1, 18, 43, 45, 64, 72, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-23T21:36:09.799334,2017-04-07,
2,https://openalex.org/W2555892112,https://doi.org/10.11646/bionomina.10.1.1,"&lt;p&gt;&lt;strong&gt;Classes, taxa and categ...","&lt;p&gt;&lt;strong&gt;Classes, taxa and categ...",2016,2016-11-10,{'openalex': 'https://openalex.org/W2555892112...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...","[https://openalex.org/W6974918, https://openal...","[https://openalex.org/W26558877, https://opena...",https://api.openalex.org/works/W2555892112/ngrams,"{'The': [0, 25, 241, 583], 'species': [1, 30, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-21T12:07:31.454306,2016-11-30,
3,https://openalex.org/W2597855587,https://doi.org/10.11646/bionomina.12.1.2,<strong>The need for reference specimens in zo...,<strong>The need for reference specimens in zo...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2597855587...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W2315760709, https://ope...",https://api.openalex.org/works/W2597855587/ngrams,"{'A': [0], 'debate': [1], 'is': [2, 9, 94, 101...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-03-02T02:34:20.414492,2017-04-07,
4,https://openalex.org/W2602324962,https://doi.org/10.11646/bionomina.12.1.7,<strong>Loosing the connection between the obs...,<strong>Loosing the connection between the obs...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2602324962...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[https://openalex.org/W2143318053],"[https://openalex.org/W71136975, https://opena...",https://api.openalex.org/works/W2602324962/ngrams,"{'The': [0], 'original': [1, 156], 'efforts': ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 5}, {'year':...",2023-02-13T09:01:42.183944,2017-04-07,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
314,https://openalex.org/W4313308490,https://doi.org/10.31610/zsr/2022.31.2.304,A review of the Ponto-Caspian genus Caspiohydr...,A review of the Ponto-Caspian genus Caspiohydr...,2022,2022-12-29,{'openalex': 'https://openalex.org/W4313308490...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W1981266307, https://ope...","[https://openalex.org/W26558877, https://opena...",https://api.openalex.org/works/W4313308490/ngrams,"{'The': [0, 24, 67, 116], 'paper': [1], 'revie...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-18T23:22:01.499511,2023-01-06,
315,https://openalex.org/W4313308504,https://doi.org/10.31610/zsr/2022.31.2.289,Synopsis of the genus Greeffiella (Nematoda: D...,Synopsis of the genus Greeffiella (Nematoda: D...,2022,2022-12-29,{'openalex': 'https://openalex.org/W4313308504...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W1976605109, https://ope...","[https://openalex.org/W809671639, https://open...",https://api.openalex.org/works/W4313308504/ngrams,"{'The': [0, 61], 'genus': [1, 62], 'Greeffiell...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-18T23:22:11.961169,2023-01-06,
316,https://openalex.org/W4313308738,https://doi.org/10.31610/zsr/2022.31.2.332,"Takahashia japonica (Homoptera: Coccinea), a n...","Takahashia japonica (Homoptera: Coccinea), a n...",2022,2022-12-30,{'openalex': 'https://openalex.org/W4313308738...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W2504258836, https://ope...","[https://openalex.org/W1988211463, https://ope...",https://api.openalex.org/works/W4313308738/ngrams,"{'The': [0, 33], 'adventive': [1], 'species': ...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-13T09:03:52.439472,2023-01-06,
317,https://openalex.org/W4313309051,https://doi.org/10.31610/zsr/2022.31.2.329,First record of Halyomorpha halys (Heteroptera...,First record of Halyomorpha halys (Heteroptera...,2022,2022-12-30,{'openalex': 'https://openalex.org/W4313309051...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W2489982840, https://ope...","[https://openalex.org/W1993548424, https://ope...",https://api.openalex.org/works/W4313309051/ngrams,"{'The': [0], 'invasive': [1], 'stink': [2], 'b...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-13T09:03:52.456178,2023-01-06,


In [22]:
articles.to_pickle("./data/articles_global_with_taxonomy_concept_in_journal.pkl")

In [23]:
# filter for EU articles
eu_articles = filter_eu_articles(articles)
eu_articles

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,primary_location,host_venue,type,...,alternate_host_venues,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated
0,https://openalex.org/W2598914299,https://doi.org/10.11646/bionomina.11.1.1,<p><strong>The nomenclatural status of <em>Hys...,<p><strong>The nomenclatural status of <em>Hys...,2017,2017-03-04,{'openalex': 'https://openalex.org/W2598914299...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W26558877, https://opena...",https://api.openalex.org/works/W2598914299/ngrams,"{'Dozens': [0], 'of': [1, 18, 43, 45, 64, 72, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-23T21:36:09.799334,2017-04-07,
1,https://openalex.org/W2555892112,https://doi.org/10.11646/bionomina.10.1.1,"&lt;p&gt;&lt;strong&gt;Classes, taxa and categ...","&lt;p&gt;&lt;strong&gt;Classes, taxa and categ...",2016,2016-11-10,{'openalex': 'https://openalex.org/W2555892112...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...","[https://openalex.org/W6974918, https://openal...","[https://openalex.org/W26558877, https://opena...",https://api.openalex.org/works/W2555892112/ngrams,"{'The': [0, 25, 241, 583], 'species': [1, 30, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-21T12:07:31.454306,2016-11-30,
2,https://openalex.org/W2597855587,https://doi.org/10.11646/bionomina.12.1.2,<strong>The need for reference specimens in zo...,<strong>The need for reference specimens in zo...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2597855587...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W2315760709, https://ope...",https://api.openalex.org/works/W2597855587/ngrams,"{'A': [0], 'debate': [1], 'is': [2, 9, 94, 101...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-03-02T02:34:20.414492,2017-04-07,
3,https://openalex.org/W2602324962,https://doi.org/10.11646/bionomina.12.1.7,<strong>Loosing the connection between the obs...,<strong>Loosing the connection between the obs...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2602324962...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[https://openalex.org/W2143318053],"[https://openalex.org/W71136975, https://opena...",https://api.openalex.org/works/W2602324962/ngrams,"{'The': [0], 'original': [1, 156], 'efforts': ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 5}, {'year':...",2023-02-13T09:01:42.183944,2017-04-07,
4,https://openalex.org/W2611147945,https://doi.org/10.11646/bionomina.12.1.8,&lt;strong&gt;Diagnoses in zoological taxonomy...,&lt;strong&gt;Diagnoses in zoological taxonomy...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2611147945...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W1977074375, https://ope...",https://api.openalex.org/works/W2611147945/ngrams,"{'The': [0, 26], 'use': [1, 44, 97], 'of': [2,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 4}, {'year':...",2023-02-27T23:25:06.093812,2017-05-12,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17279,https://openalex.org/W3202031023,https://doi.org/10.31610/zsr/2021.30.2.190,New species and new records of cuckoo wasps (H...,New species and new records of cuckoo wasps (H...,2021,2021-10-05,{'openalex': 'https://openalex.org/W3202031023...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W2050287949, https://ope...","[https://openalex.org/W1488927740, https://ope...",https://api.openalex.org/works/W3202031023/ngrams,"{'A': [0], 'contribution': [1], 'to': [2, 162]...",https://api.openalex.org/works?filter=cites:W3...,[],2023-02-25T03:00:06.051100,2021-10-11,
17280,https://openalex.org/W4226104797,https://doi.org/10.31610/zsr/2022.31.1.27,A new species Chrysotus hubenovi and new data ...,A new species Chrysotus hubenovi and new data ...,2022,2022-04-04,{'openalex': 'https://openalex.org/W4226104797...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W2063243637, https://ope...","[https://openalex.org/W344369735, https://open...",https://api.openalex.org/works/W4226104797/ngrams,"{'Chrysotus': [0], 'hubenovi': [1], 'sp.': [2]...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-19T10:37:59.326525,2022-05-05,
17281,https://openalex.org/W4302015794,https://doi.org/10.31610/zsr/2022.31.2.182,New records of Bibionidae (Diptera) from Azerb...,New records of Bibionidae (Diptera) from Azerb...,2022,2022-10-04,{'openalex': 'https://openalex.org/W4302015794...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W1977484214, https://ope...","[https://openalex.org/W1965469033, https://ope...",https://api.openalex.org/works/W4302015794/ngrams,"{'New': [0], 'records': [1], 'of': [2, 46], 'e...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-25T07:14:09.707666,2022-10-06,
17282,https://openalex.org/W4309431963,https://doi.org/10.31610/zsr/2022.31.2.195,Lepidochitona granpoderi sp. nov. (Mollusca: P...,Lepidochitona granpoderi sp. nov. (Mollusca: P...,2022,2022-11-20,{'openalex': 'https://openalex.org/W4309431963...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W1982827023, https://ope...","[https://openalex.org/W2000463452, https://ope...",https://api.openalex.org/works/W4309431963/ngrams,"{'A': [0], 'new': [1], 'chiton': [2], 'species...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-13T07:04:38.051842,2022-11-27,


In [24]:
eu_articles.to_pickle("./data/articles_EU27_with_taxonomy_concept_in_journal.pkl")

### 1.1.3. Filter journal articles for accuracy

Not all articles in taxonomic journals are strictly "taxonomical" (describing new species). Some are about methodology or further research into specific organisms. To remedy this, we searched our list of articles for terms similar to the WoS comparison search terms, adding "nov" and dropping the order name. 

In [28]:
# filter all found articles
filtered_articles = query_articles(articles)
filtered_articles

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,primary_location,host_venue,type,...,alternate_host_venues,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated
0,https://openalex.org/W2098169155,https://doi.org/10.11646/bionomina.7.1.1,<strong>Proposal of an integrated framework of...,<strong>Proposal of an integrated framework of...,2014,2014-06-06,{'openalex': 'https://openalex.org/W2098169155...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...","[https://openalex.org/W37250742, https://opena...","[https://openalex.org/W1281026, https://openal...",https://api.openalex.org/works/W2098169155/ngrams,"{'An': [0], 'integrated': [1], 'framework': [2...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2020, 'cited_by_count': 27}, {'year'...",2023-02-12T18:02:01.812245,2016-06-24,
1,https://openalex.org/W2598914299,https://doi.org/10.11646/bionomina.11.1.1,<p><strong>The nomenclatural status of <em>Hys...,<p><strong>The nomenclatural status of <em>Hys...,2017,2017-03-04,{'openalex': 'https://openalex.org/W2598914299...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W26558877, https://opena...",https://api.openalex.org/works/W2598914299/ngrams,"{'Dozens': [0], 'of': [1, 18, 43, 45, 64, 72, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-23T21:36:09.799334,2017-04-07,
2,https://openalex.org/W2555892112,https://doi.org/10.11646/bionomina.10.1.1,"&lt;p&gt;&lt;strong&gt;Classes, taxa and categ...","&lt;p&gt;&lt;strong&gt;Classes, taxa and categ...",2016,2016-11-10,{'openalex': 'https://openalex.org/W2555892112...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...","[https://openalex.org/W6974918, https://openal...","[https://openalex.org/W26558877, https://opena...",https://api.openalex.org/works/W2555892112/ngrams,"{'The': [0, 25, 241, 583], 'species': [1, 30, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-21T12:07:31.454306,2016-11-30,
3,https://openalex.org/W2597855587,https://doi.org/10.11646/bionomina.12.1.2,<strong>The need for reference specimens in zo...,<strong>The need for reference specimens in zo...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2597855587...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W2315760709, https://ope...",https://api.openalex.org/works/W2597855587/ngrams,"{'A': [0], 'debate': [1], 'is': [2, 9, 94, 101...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-03-02T02:34:20.414492,2017-04-07,
4,https://openalex.org/W2611147945,https://doi.org/10.11646/bionomina.12.1.8,&lt;strong&gt;Diagnoses in zoological taxonomy...,&lt;strong&gt;Diagnoses in zoological taxonomy...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2611147945...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W1977074375, https://ope...",https://api.openalex.org/works/W2611147945/ngrams,"{'The': [0, 26], 'use': [1, 44, 97], 'of': [2,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 4}, {'year':...",2023-02-27T23:25:06.093812,2017-05-12,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50490,https://openalex.org/W4312083256,https://doi.org/10.31610/zsr/2022.31.2.272,A new symbiotic scale worm (Polychaeta: Polyno...,A new symbiotic scale worm (Polychaeta: Polyno...,2022,2022-12-21,{'openalex': 'https://openalex.org/W4312083256...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W1977521985, https://ope...","[https://openalex.org/W617456748, https://open...",https://api.openalex.org/works/W4312083256/ngrams,"{'A': [0], 'new': [1, 65], 'scale': [2, 45, 75...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-20T23:24:38.359937,2023-01-04,
50491,https://openalex.org/W4313308490,https://doi.org/10.31610/zsr/2022.31.2.304,A review of the Ponto-Caspian genus Caspiohydr...,A review of the Ponto-Caspian genus Caspiohydr...,2022,2022-12-29,{'openalex': 'https://openalex.org/W4313308490...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W1981266307, https://ope...","[https://openalex.org/W26558877, https://opena...",https://api.openalex.org/works/W4313308490/ngrams,"{'The': [0, 24, 67, 116], 'paper': [1], 'revie...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-18T23:22:01.499511,2023-01-06,
50492,https://openalex.org/W4313308504,https://doi.org/10.31610/zsr/2022.31.2.289,Synopsis of the genus Greeffiella (Nematoda: D...,Synopsis of the genus Greeffiella (Nematoda: D...,2022,2022-12-29,{'openalex': 'https://openalex.org/W4313308504...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W1976605109, https://ope...","[https://openalex.org/W809671639, https://open...",https://api.openalex.org/works/W4313308504/ngrams,"{'The': [0, 61], 'genus': [1, 62], 'Greeffiell...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-18T23:22:11.961169,2023-01-06,
50493,https://openalex.org/W4313308738,https://doi.org/10.31610/zsr/2022.31.2.332,"Takahashia japonica (Homoptera: Coccinea), a n...","Takahashia japonica (Homoptera: Coccinea), a n...",2022,2022-12-30,{'openalex': 'https://openalex.org/W4313308738...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W2504258836, https://ope...","[https://openalex.org/W1988211463, https://ope...",https://api.openalex.org/works/W4313308738/ngrams,"{'The': [0, 33], 'adventive': [1], 'species': ...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-13T09:03:52.439472,2023-01-06,


In [29]:
filtered_articles.to_pickle("./data/keyword_filtered_articles_global_with_taxonomy_concept_in_journal.pkl")

In [30]:
# filter EU27 articles
filtered_eu_articles = query_articles(eu_articles)
filtered_eu_articles

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,primary_location,host_venue,type,...,alternate_host_venues,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated
0,https://openalex.org/W2598914299,https://doi.org/10.11646/bionomina.11.1.1,<p><strong>The nomenclatural status of <em>Hys...,<p><strong>The nomenclatural status of <em>Hys...,2017,2017-03-04,{'openalex': 'https://openalex.org/W2598914299...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W26558877, https://opena...",https://api.openalex.org/works/W2598914299/ngrams,"{'Dozens': [0], 'of': [1, 18, 43, 45, 64, 72, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-23T21:36:09.799334,2017-04-07,
1,https://openalex.org/W2555892112,https://doi.org/10.11646/bionomina.10.1.1,"&lt;p&gt;&lt;strong&gt;Classes, taxa and categ...","&lt;p&gt;&lt;strong&gt;Classes, taxa and categ...",2016,2016-11-10,{'openalex': 'https://openalex.org/W2555892112...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...","[https://openalex.org/W6974918, https://openal...","[https://openalex.org/W26558877, https://opena...",https://api.openalex.org/works/W2555892112/ngrams,"{'The': [0, 25, 241, 583], 'species': [1, 30, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-02-21T12:07:31.454306,2016-11-30,
2,https://openalex.org/W2597855587,https://doi.org/10.11646/bionomina.12.1.2,<strong>The need for reference specimens in zo...,<strong>The need for reference specimens in zo...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2597855587...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W2315760709, https://ope...",https://api.openalex.org/works/W2597855587/ngrams,"{'A': [0], 'debate': [1], 'is': [2, 9, 94, 101...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 2}, {'year':...",2023-03-02T02:34:20.414492,2017-04-07,
3,https://openalex.org/W2611147945,https://doi.org/10.11646/bionomina.12.1.8,&lt;strong&gt;Diagnoses in zoological taxonomy...,&lt;strong&gt;Diagnoses in zoological taxonomy...,2017,2017-03-24,{'openalex': 'https://openalex.org/W2611147945...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W1977074375, https://ope...",https://api.openalex.org/works/W2611147945/ngrams,"{'The': [0, 26], 'use': [1, 44, 97], 'of': [2,...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2022, 'cited_by_count': 4}, {'year':...",2023-02-27T23:25:06.093812,2017-05-12,
4,https://openalex.org/W2996133267,https://doi.org/10.11646/bionomina.17.1.1,"<p align=""left""><strong>The Linz <em>Zoocode</...","<p align=""left""><strong>The Linz <em>Zoocode</...",2019,2019-12-17,{'openalex': 'https://openalex.org/W2996133267...,"{'is_oa': True, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S4210230724', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S4210230724', 'd...",[],"[https://openalex.org/W844779971, https://open...",https://api.openalex.org/works/W2996133267/ngrams,"{'In': [0], 'July': [1], '2014,': [2], 'the': ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2021, 'cited_by_count': 5}, {'year':...",2023-02-16T19:26:42.400286,2019-12-26,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12115,https://openalex.org/W3193573934,https://doi.org/10.31610/zsr/2021.30.2.169,First record of the genus Colopterus (Coleopte...,First record of the genus Colopterus (Coleopte...,2021,2021-08-14,{'openalex': 'https://openalex.org/W3193573934...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W1886369874, https://ope...","[https://openalex.org/W2090750, https://openal...",https://api.openalex.org/works/W3193573934/ngrams,"{'A': [0], 'sap': [1], 'beetle': [2], 'of': [3...",https://api.openalex.org/works?filter=cites:W3...,[],2023-02-13T06:37:14.733931,2021-08-30,
12116,https://openalex.org/W3202031023,https://doi.org/10.31610/zsr/2021.30.2.190,New species and new records of cuckoo wasps (H...,New species and new records of cuckoo wasps (H...,2021,2021-10-05,{'openalex': 'https://openalex.org/W3202031023...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W2050287949, https://ope...","[https://openalex.org/W1488927740, https://ope...",https://api.openalex.org/works/W3202031023/ngrams,"{'A': [0], 'contribution': [1], 'to': [2, 162]...",https://api.openalex.org/works?filter=cites:W3...,[],2023-02-25T03:00:06.051100,2021-10-11,
12117,https://openalex.org/W4226104797,https://doi.org/10.31610/zsr/2022.31.1.27,A new species Chrysotus hubenovi and new data ...,A new species Chrysotus hubenovi and new data ...,2022,2022-04-04,{'openalex': 'https://openalex.org/W4226104797...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W2063243637, https://ope...","[https://openalex.org/W344369735, https://open...",https://api.openalex.org/works/W4226104797/ngrams,"{'Chrysotus': [0], 'hubenovi': [1], 'sp.': [2]...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-19T10:37:59.326525,2022-05-05,
12118,https://openalex.org/W4302015794,https://doi.org/10.31610/zsr/2022.31.2.182,New records of Bibionidae (Diptera) from Azerb...,New records of Bibionidae (Diptera) from Azerb...,2022,2022-10-04,{'openalex': 'https://openalex.org/W4302015794...,"{'is_oa': None, 'landing_page_url': 'https://d...","{'id': 'https://openalex.org/S2764648274', 'is...",journal-article,...,"[{'id': 'https://openalex.org/S2764648274', 'd...","[https://openalex.org/W1977484214, https://ope...","[https://openalex.org/W1965469033, https://ope...",https://api.openalex.org/works/W4302015794/ngrams,"{'New': [0], 'records': [1], 'of': [2, 46], 'e...",https://api.openalex.org/works?filter=cites:W4...,[],2023-02-25T07:14:09.707666,2022-10-06,


In [31]:
filtered_eu_articles.to_pickle("./data/keyword_filtered_articles_EU27_with_taxonomy_concept_in_journal.pkl")