In [1]:
import pandas as pd
import requests
import numpy as np

In [2]:
# GET ALL SOURCES (JOURNALS) FROM OPENALEX API WITH SPECIFIED REQUIREMENTS
def request_institutions(filter_string, email):
    # build query (e-mail included for "polite pool")
    query = "https://api.openalex.org/institutions?per-page=200&filter="+filter_string+"&mailto="+email
            
    # open persistent session to shorten processing time between requests
    s = requests.Session()
    # FIRST PAGE
    sources = s.get(query+"&cursor=*")
    next_sources = sources.json()
    next_cursor = next_sources["meta"]["next_cursor"]
    sources_results = next_sources["results"]
    
    # RETRIEVE ALL PAGES
    while next_sources["meta"]["next_cursor"] != None:
        # get next page with cursor
        next_sources = s.get(query+"&cursor="+next_cursor)
        next_sources = next_sources.json()
        next_cursor = next_sources["meta"]["next_cursor"] # remember next cursor
        sources_results.extend(next_sources["results"])
    
    sources_df = pd.DataFrame.from_dict(sources_results)
    return sources_df

In [3]:
data = pd.read_pickle("../data/processed/authors_disambiguated_truncated.pkl")
data.head()

Unnamed: 0,author_id,author_display_name,author_orcid,inst_id,inst_display_name,species_subject,truncatedName,strippedName,order,kingdom
0,https://openalex.org/A5036634573,N.E. O’Connell,https://orcid.org/0000-0001-6831-2041,https://openalex.org/I126231945,Queen's University Belfast,[],N O’Connell,NEO’Connell,[],[]
1,https://openalex.org/A5020000998,Steffen Handel,,https://openalex.org/I197323543,Johannes Gutenberg University Mainz,"[Sugiyamaella mastotermitis, Papiliotrema odon...",S Handel,SteffenHandel,"[Saccharomycetales, Tremellales, Blattodea]","[Fungi, Animalia]"
2,https://openalex.org/A5111365987,Hans‐Peter Tschorsnig,,https://openalex.org/I2799483237,Staatliches Museum für Naturkunde Stuttgart,"[Billaea irrorata, Billaea atkinsoni, Billaea ...",H Tschorsnig,HansPeterTschorsnig,[Diptera],[Animalia]
3,https://openalex.org/A5056871906,Petra Oražem,,https://openalex.org/I153976015,University of Ljubljana,[],P Oražem,PetraOražem,[],[]
4,https://openalex.org/A5000385579,Zophonı́as O. Jónsson,https://orcid.org/0000-0001-5798-9647,https://openalex.org/I165368041,University of Iceland,"[Buccinum undatum, Salmo trutta]",Z Jónsson,Zophonı́asOJónsson,"[Neogastropoda, Salmoniformes]",[Animalia]


In [4]:
email = input("Enter e-mail address for OpenAlex API: ").strip()
iso_countrycode = input("Enter ISO 3166-1 alpha-2 country code: ").strip()
be_inst = request_institutions("country_code:"+iso_countrycode, email)

Enter e-mail address for OpenAlex API:  quentin.groom@plantentuinmeise.be
Enter ISO 3166-1 alpha-2 country code:  BE


In [5]:
be_inst.head()

Unnamed: 0,id,ror,display_name,country_code,type,type_id,lineage,homepage_url,image_url,image_thumbnail_url,...,associated_institutions,counts_by_year,roles,topics,topic_share,x_concepts,is_super_system,works_api_url,updated_date,created_date
0,https://openalex.org/I99464096,https://ror.org/05f950310,KU Leuven,BE,funder,https://openalex.org/institution-types/funder,[https://openalex.org/I99464096],http://www.kuleuven.be/english,https://commons.wikimedia.org/w/index.php?titl...,https://commons.wikimedia.org/w/index.php?titl...,...,"[{'id': 'https://openalex.org/I4210142472', 'r...","[{'year': 2025, 'works_count': 4397, 'cited_by...","[{'role': 'institution', 'id': 'https://openal...","[{'id': 'https://openalex.org/T10472', 'displa...","[{'id': 'https://openalex.org/T13064', 'displa...","[{'id': 'https://openalex.org/C86803240', 'wik...",False,https://api.openalex.org/works?filter=institut...,2025-07-17T00:52:50.711761,2016-06-24
1,https://openalex.org/I157674565,https://ror.org/00afp2z80,University of Liège,BE,funder,https://openalex.org/institution-types/funder,[https://openalex.org/I157674565],https://www.uliege.be,https://commons.wikimedia.org/w/index.php?titl...,https://commons.wikimedia.org/w/index.php?titl...,...,"[{'id': 'https://openalex.org/I27895137', 'ror...","[{'year': 2025, 'works_count': 1131, 'cited_by...","[{'role': 'institution', 'id': 'https://openal...","[{'id': 'https://openalex.org/T11475', 'displa...","[{'id': 'https://openalex.org/T13152', 'displa...","[{'id': 'https://openalex.org/C86803240', 'wik...",False,https://api.openalex.org/works?filter=institut...,2025-07-12T06:50:49.306277,2016-06-24
2,https://openalex.org/I32597200,https://ror.org/00cv9y106,Ghent University,BE,funder,https://openalex.org/institution-types/funder,[https://openalex.org/I32597200],https://www.ugent.be,https://commons.wikimedia.org/w/index.php?titl...,https://commons.wikimedia.org/w/index.php?titl...,...,"[{'id': 'https://openalex.org/I4210132857', 'r...","[{'year': 2025, 'works_count': 2562, 'cited_by...","[{'role': 'institution', 'id': 'https://openal...","[{'id': 'https://openalex.org/T10299', 'displa...","[{'id': 'https://openalex.org/T12483', 'displa...","[{'id': 'https://openalex.org/C86803240', 'wik...",False,https://api.openalex.org/works?filter=institut...,2025-07-16T13:17:56.187709,2016-06-24
3,https://openalex.org/I95674353,https://ror.org/02495e989,UCLouvain,BE,funder,https://openalex.org/institution-types/funder,[https://openalex.org/I95674353],https://uclouvain.be,https://commons.wikimedia.org/w/index.php?titl...,https://commons.wikimedia.org/w/index.php?titl...,...,"[{'id': 'https://openalex.org/I4210121589', 'r...","[{'year': 2025, 'works_count': 1037, 'cited_by...","[{'role': 'funder', 'id': 'https://openalex.or...","[{'id': 'https://openalex.org/T10048', 'displa...","[{'id': 'https://openalex.org/T13387', 'displa...","[{'id': 'https://openalex.org/C86803240', 'wik...",False,https://api.openalex.org/works?filter=institut...,2025-07-14T03:10:38.343412,2016-06-24
4,https://openalex.org/I132053463,https://ror.org/01r9htc13,Université Libre de Bruxelles,BE,funder,https://openalex.org/institution-types/funder,[https://openalex.org/I132053463],https://www.ulb.be,https://commons.wikimedia.org/w/index.php?titl...,https://commons.wikimedia.org/w/index.php?titl...,...,"[{'id': 'https://openalex.org/I4210139176', 'r...","[{'year': 2025, 'works_count': 1186, 'cited_by...","[{'role': 'funder', 'id': 'https://openalex.or...","[{'id': 'https://openalex.org/T10048', 'displa...","[{'id': 'https://openalex.org/T13458', 'displa...","[{'id': 'https://openalex.org/C86803240', 'wik...",False,https://api.openalex.org/works?filter=institut...,2025-07-17T08:49:09.265775,2016-06-24


In [6]:
inst_exp = {}
for inst in be_inst["id"]:
    inst_exp[inst] = []

for row in data.itertuples():
    if row.inst_id in list(be_inst["id"]):
        if type(row.order) is list or type(row.order) is set:
            inst_exp[row.inst_id].extend(row.order)
        else:
            inst_exp[row.inst_id].append(row.order)

In [7]:
inst_exp

{'https://openalex.org/I99464096': ['Diptera',
  'Diptera',
  'Asparagales',
  'Myrtales',
  'Diptera',
  'Asparagales',
  'Orthoptera',
  'Ericales',
  'Myrtales',
  'Asparagales',
  'Myrtales',
  'Hymenoptera',
  'Perciformes',
  'Orthoptera',
  'Poales',
  'Dactylogyridea',
  'Perciformes',
  'Mugiliformes',
  'Clupeiformes',
  'Gentianales',
  'Rodentia',
  'Myrtales',
  'Myrtales',
  'Dactylogyridea',
  'Characiformes',
  'Perciformes',
  'Rhabdocoela',
  'Rodentia',
  'Perciformes',
  'Myrtales',
  'Araneae',
  'Asparagales',
  'Clupeiformes',
  'Dactylogyridea',
  'Orthoptera',
  'Rhabdocoela',
  'Dactylogyridea',
  'Perciformes',
  'Decapoda',
  'Galliformes',
  'Gentianales',
  'Solanales',
  'Sapindales',
  'Diptera',
  'Oxalidales',
  'Ericales'],
 'https://openalex.org/I157674565': ['Peltigerales',
  'Peltigerales',
  'Caliciales',
  'Arctomiales',
  'Lecanorales',
  'Jungermanniales',
  'Orthotrichales',
  'Hypnales',
  'Ophidiiformes',
  'Sapindales',
  'Carnivora',
  'Cy

In [8]:
expertise = pd.DataFrame({"inst_id":inst_exp.keys(), "inst_name":be_inst["display_name"], "orders":inst_exp.values()})
expertise["orders_unique"] = [list(np.unique(x)) for x in expertise["orders"]]
expertise[expertise["orders"].map(lambda d: len(d)) > 0]

Unnamed: 0,inst_id,inst_name,orders,orders_unique
0,https://openalex.org/I99464096,KU Leuven,"[Diptera, Diptera, Asparagales, Myrtales, Dipt...","[Araneae, Asparagales, Characiformes, Clupeifo..."
1,https://openalex.org/I157674565,University of Liège,"[Peltigerales, Peltigerales, Caliciales, Arcto...","[Arctomiales, Caliciales, Carnivora, Cyprinifo..."
2,https://openalex.org/I32597200,Ghent University,"[Gentianales, Rhabditida, Zingiberales, Doryla...","[Agaricales, Amphisphaeriales, Anura, Apiales,..."
3,https://openalex.org/I95674353,UCLouvain,"[Lepidoptera, Hemiptera, Hemiptera, Agaricales...","[Acrospermales, Agaricales, Amphisphaeriales, ..."
4,https://openalex.org/I132053463,Université Libre de Bruxelles,"[Magnoliales, Rosales, Malpighiales, Sapindale...","[Alismatales, Amphipoda, Apiales, Asparagales,..."
5,https://openalex.org/I149213910,University of Antwerp,"[Perciformes, Chiroptera, Chiroptera, Spirobol...","[Architaenioglossa, Carnivora, Chiroptera, Lep..."
6,https://openalex.org/I13469542,Vrije Universiteit Brussel,"[Hymenoptera, Hymenoptera, Hymenoptera, Anura,...","[Alismatales, Anura, Chiroptera, Hymenoptera, ..."
7,https://openalex.org/I2801227569,Ghent University Hospital,"[Poales, Caryophyllales, Fabales, Malvales, Er...","[Caryophyllales, Ericales, Fabales, Magnoliale..."
9,https://openalex.org/I130929987,University of Mons,"[Hymenoptera, Hymenoptera, Valvatida, Paxillos...","[Diptera, Forcipulatida, Hymenoptera, Ophidiif..."
10,https://openalex.org/I878454856,Hasselt University,"[Rhabdocoela, Clupeiformes, Dactylogyridea, Rh...","[Clupeiformes, Dactylogyridea, Diversisporales..."


In [9]:
orders = list(np.unique(list(pd.core.common.flatten(expertise["orders"]))))
orders

[np.str_('Abrothallales'),
 np.str_('Acrospermales'),
 np.str_('Agaricales'),
 np.str_('Alismatales'),
 np.str_('Amphipoda'),
 np.str_('Amphisphaeriales'),
 np.str_('Amplistromatales'),
 np.str_('Anthoathecata'),
 np.str_('Anura'),
 np.str_('Apiales'),
 np.str_('Apodida'),
 np.str_('Araneae'),
 np.str_('Architaenioglossa'),
 np.str_('Arctomiales'),
 np.str_('Arecales'),
 np.str_('Arthoniales'),
 np.str_('Artiodactyla'),
 np.str_('Asparagales'),
 np.str_('Asterales'),
 np.str_('Asterinales'),
 np.str_('Atractiellales'),
 np.str_('Atractosporales'),
 np.str_('Auriculariales'),
 np.str_('Axinellida'),
 np.str_('Baeomycetales'),
 np.str_('Blattodea'),
 np.str_('Boletales'),
 np.str_('Boraginales'),
 np.str_('Botryosphaeriales'),
 np.str_('Brassicales'),
 np.str_('Bryopsidales'),
 np.str_('Buxales'),
 np.str_('Caliciales'),
 np.str_('Camarodonta'),
 np.str_('Cantharellales'),
 np.str_('Capnodiales'),
 np.str_('Carnivora'),
 np.str_('Caryophyllales'),
 np.str_('Caudata'),
 np.str_('Celastral

In [10]:
expertise2 = pd.DataFrame({"order":orders, "inst_ids":None, "inst_names":None})
expertise2 = expertise2.set_index("order")
expertise2["inst_ids"] = [[] for _ in orders]
expertise2["inst_names"] = [[] for _ in orders]

In [11]:
for row in expertise.itertuples():
    for order in row.orders_unique:
        expertise2.loc[order, "inst_ids"].append(row.inst_id)
        expertise2.loc[order, "inst_names"].append(row.inst_name)

In [12]:
expertise2

Unnamed: 0_level_0,inst_ids,inst_names
order,Unnamed: 1_level_1,Unnamed: 2_level_1
Abrothallales,[https://openalex.org/I4210151123],[French Community of Belgium]
Acrospermales,"[https://openalex.org/I95674353, https://opena...","[UCLouvain, French Community of Belgium]"
Agaricales,"[https://openalex.org/I32597200, https://opena...","[Ghent University, UCLouvain, Sciensano (Belgi..."
Alismatales,"[https://openalex.org/I132053463, https://open...","[Université Libre de Bruxelles, Vrije Universi..."
Amphipoda,[https://openalex.org/I132053463],[Université Libre de Bruxelles]
...,...,...
Vitales,"[https://openalex.org/I32597200, https://opena...","[Ghent University, UCLouvain, Meise Botanic Ga..."
Welwitschiales,[https://openalex.org/I95674353],[UCLouvain]
Xylariales,"[https://openalex.org/I32597200, https://opena...","[Ghent University, UCLouvain, Meise Botanic Ga..."
Zeloasperisporiales,[https://openalex.org/I95674353],[UCLouvain]


In [13]:
expertise.to_excel("../data/processed/national_taxonomische_expertise_per_instituut.xlsx")

In [14]:
expertise2.to_excel("../data/processed/national_taxonomische_expertise_per_orde.xlsx")