In [1]:
import pandas as pd
import requests
import numpy as np

In [2]:
# GET ALL SOURCES (JOURNALS) FROM OPENALEX API WITH SPECIFIED REQUIREMENTS
def request_institutions(filter_string, email):
    # build query (e-mail included for "polite pool")
    query = "https://api.openalex.org/institutions?per-page=200&filter="+filter_string+"&mailto="+email
            
    # open persistent session to shorten processing time between requests
    s = requests.Session()
    # FIRST PAGE
    sources = s.get(query+"&cursor=*")
    next_sources = sources.json()
    next_cursor = next_sources["meta"]["next_cursor"]
    sources_results = next_sources["results"]
    
    # RETRIEVE ALL PAGES
    while next_sources["meta"]["next_cursor"] != None:
        # get next page with cursor
        next_sources = s.get(query+"&cursor="+next_cursor)
        next_sources = next_sources.json()
        next_cursor = next_sources["meta"]["next_cursor"] # remember next cursor
        sources_results.extend(next_sources["results"])
    
    sources_df = pd.DataFrame.from_dict(sources_results)
    return sources_df

In [3]:
data = pd.read_pickle("../data/processed/authors_disambiguated_truncated.pkl")
data.head()

Unnamed: 0,author_id,author_display_name,author_orcid,inst_id,inst_display_name,species_subject,truncatedName,strippedName,order
0,https://openalex.org/A5086496821,Paolo Citton,https://orcid.org/0000-0002-6503-5541,https://openalex.org/I861853513,Sapienza University of Rome,[],P Citton,PaoloCitton,[]
1,https://openalex.org/A5071440263,Nadine Gautheron,https://orcid.org/0000-0001-9351-610X,https://openalex.org/I177064439,Université de Bourgogne,"[Fusarium incarnatum, Fusarium equiseti]",N Gautheron,NadineGautheron,[Hypocreales]
2,https://openalex.org/A5086851894,V. N. Fursov,https://orcid.org/0000-0002-3318-2491,https://openalex.org/I4210141199,I.I. Schmalhausen Institute of Zoology,[],V Fursov,VNFursov,[]
3,https://openalex.org/A5031720116,Ralf T. Voegele,https://orcid.org/0000-0002-6068-8244,https://openalex.org/I110079840,University of Hohenheim,[Vitis vinifera],R Voegele,RalfTVoegele,[Vitales]
4,https://openalex.org/A5017720305,Marcello Nicoletti,https://orcid.org/0000-0002-2471-1785,https://openalex.org/I861853513,Sapienza University of Rome,[],M Nicoletti,MarcelloNicoletti,[]


In [4]:
be_inst = request_institutions("country_code:BE", email="melanie.denolf@plantentuinmeise.be")

In [5]:
be_inst.head()

Unnamed: 0,id,ror,display_name,country_code,type,type_id,lineage,homepage_url,image_url,image_thumbnail_url,...,associated_institutions,counts_by_year,roles,topics,topic_share,x_concepts,is_super_system,works_api_url,updated_date,created_date
0,https://openalex.org/I99464096,https://ror.org/05f950310,KU Leuven,BE,education,https://openalex.org/institution-types/education,[https://openalex.org/I99464096],http://www.kuleuven.be/english,https://commons.wikimedia.org/w/index.php?titl...,https://commons.wikimedia.org/w/index.php?titl...,...,"[{'id': 'https://openalex.org/I4210142472', 'r...","[{'year': 2024, 'works_count': 8196, 'cited_by...","[{'role': 'funder', 'id': 'https://openalex.or...","[{'id': 'https://openalex.org/T10472', 'displa...","[{'id': 'https://openalex.org/T13064', 'displa...","[{'id': 'https://openalex.org/C86803240', 'wik...",False,https://api.openalex.org/works?filter=institut...,2024-10-06T13:45:06.035414,2016-06-24
1,https://openalex.org/I157674565,https://ror.org/00afp2z80,University of Liège,BE,education,https://openalex.org/institution-types/education,[https://openalex.org/I157674565],https://www.uliege.be/cms/c_8699436/fr/portail...,https://commons.wikimedia.org/w/index.php?titl...,https://commons.wikimedia.org/w/index.php?titl...,...,"[{'id': 'https://openalex.org/I27895137', 'ror...","[{'year': 2024, 'works_count': 2226, 'cited_by...","[{'role': 'funder', 'id': 'https://openalex.or...","[{'id': 'https://openalex.org/T11475', 'displa...","[{'id': 'https://openalex.org/T13152', 'displa...","[{'id': 'https://openalex.org/C86803240', 'wik...",False,https://api.openalex.org/works?filter=institut...,2024-10-03T01:44:31.317775,2016-06-24
2,https://openalex.org/I32597200,https://ror.org/00cv9y106,Ghent University,BE,education,https://openalex.org/institution-types/education,[https://openalex.org/I32597200],https://www.ugent.be/en,https://commons.wikimedia.org/w/index.php?titl...,https://commons.wikimedia.org/w/index.php?titl...,...,"[{'id': 'https://openalex.org/I4210132857', 'r...","[{'year': 2024, 'works_count': 5004, 'cited_by...","[{'role': 'publisher', 'id': 'https://openalex...","[{'id': 'https://openalex.org/T10299', 'displa...","[{'id': 'https://openalex.org/T12483', 'displa...","[{'id': 'https://openalex.org/C86803240', 'wik...",False,https://api.openalex.org/works?filter=institut...,2024-10-06T04:12:51.484367,2016-06-24
3,https://openalex.org/I95674353,https://ror.org/02495e989,UCLouvain,BE,education,https://openalex.org/institution-types/education,[https://openalex.org/I95674353],https://uclouvain.be,https://commons.wikimedia.org/w/index.php?titl...,https://commons.wikimedia.org/w/index.php?titl...,...,"[{'id': 'https://openalex.org/I4210093617', 'r...","[{'year': 2024, 'works_count': 2071, 'cited_by...","[{'role': 'institution', 'id': 'https://openal...","[{'id': 'https://openalex.org/T10048', 'displa...","[{'id': 'https://openalex.org/T13387', 'displa...","[{'id': 'https://openalex.org/C86803240', 'wik...",False,https://api.openalex.org/works?filter=institut...,2024-10-04T14:23:38.135400,2016-06-24
4,https://openalex.org/I132053463,https://ror.org/01r9htc13,Université Libre de Bruxelles,BE,education,https://openalex.org/institution-types/education,[https://openalex.org/I132053463],https://www.ulb.be,https://commons.wikimedia.org/w/index.php?titl...,https://commons.wikimedia.org/w/index.php?titl...,...,"[{'id': 'https://openalex.org/I4210139176', 'r...","[{'year': 2024, 'works_count': 2376, 'cited_by...","[{'role': 'funder', 'id': 'https://openalex.or...","[{'id': 'https://openalex.org/T10093', 'displa...","[{'id': 'https://openalex.org/T13458', 'displa...","[{'id': 'https://openalex.org/C86803240', 'wik...",False,https://api.openalex.org/works?filter=institut...,2024-10-06T17:43:06.979275,2016-06-24


In [6]:
inst_exp = {}
for inst in be_inst["id"]:
    inst_exp[inst] = []

for row in data.itertuples():
    if row.inst_id in list(be_inst["id"]):
        if type(row.order) is list or type(row.order) is set:
            inst_exp[row.inst_id].extend(row.order)
        else:
            inst_exp[row.inst_id].append(row.order)

In [7]:
inst_exp

{'https://openalex.org/I99464096': ['Orthoptera',
  'Myrtales',
  'Perciformes',
  'Sepiida',
  'Perciformes',
  'Myrtales',
  'Orthoptera',
  'Dactylogyridea',
  'Araneae',
  'Hymenoptera',
  'Perciformes',
  'Myrtales',
  'Cyprinodontiformes',
  'Coleoptera',
  'Cyprinodontiformes'],
 'https://openalex.org/I157674565': ['Diptera',
  'Leptopterygiidae',
  'Emydidae',
  'Rodentia',
  'Carnivora',
  'Scorpaeniformes',
  'Alismatales',
  'Ophidiiformes',
  'Magnoliales',
  'Sapindales',
  'Orthotrichales',
  'Rodentia',
  'Magnoliales',
  'Agaricales',
  'Rodentia',
  'Cypriniformes',
  'Cetacea',
  'Cypriniformes',
  'Cetacea',
  'Rodentia'],
 'https://openalex.org/I32597200': ['Agaricales',
  'Odonata',
  'Odonata',
  'Desmoscolecida',
  'Auriculariales',
  'Russulales',
  'Diptera',
  'Desmoscolecida',
  'Gentianales',
  'Scleractinia',
  'Gymnophthalmidae',
  'Monhysterida',
  'Hymenoptera',
  'Diptera',
  'Colubridae',
  'Siluriformes',
  'Chromadorida',
  'Carnivora',
  'Rhabditida

In [8]:
expertise = pd.DataFrame({"inst_id":inst_exp.keys(), "inst_name":be_inst["display_name"], "orders":inst_exp.values()})
expertise["orders_unique"] = [list(np.unique(x)) for x in expertise["orders"]]
expertise[expertise["orders"].map(lambda d: len(d)) > 0]

Unnamed: 0,inst_id,inst_name,orders,orders_unique
0,https://openalex.org/I99464096,KU Leuven,"[Orthoptera, Myrtales, Perciformes, Sepiida, P...","[Araneae, Coleoptera, Cyprinodontiformes, Dact..."
1,https://openalex.org/I157674565,University of Liège,"[Diptera, Leptopterygiidae, Emydidae, Rodentia...","[Agaricales, Alismatales, Carnivora, Cetacea, ..."
2,https://openalex.org/I32597200,Ghent University,"[Agaricales, Odonata, Odonata, Desmoscolecida,...","[Agaricales, Alismatales, Anura, Araneae, Arch..."
3,https://openalex.org/I95674353,UCLouvain,"[Hymenoptera, Thysanoptera, Lamiales, Hemipter...","[Hemiptera, Hymenochaetales, Hymenoptera, Lami..."
4,https://openalex.org/I132053463,Université Libre de Bruxelles,"[Velatida, Blattodea, Sapindales, Apiales, Gen...","[Alismatales, Amphipoda, Apiales, Asparagales,..."
5,https://openalex.org/I13469542,Vrije Universiteit Brussel,"[Hymenoptera, Myrtales, Hymenoptera, Myrtales,...","[Alismatales, Anura, Hymenoptera, Iguanodontid..."
6,https://openalex.org/I149213910,University of Antwerp,"[Perciformes, Rhabditida, Rodentia, Caudata, R...","[Carnivora, Caudata, Diptera, Lacertidae, Pass..."
7,https://openalex.org/I2801227569,Ghent University Hospital,"[Rhabditida, Rhabditida, Fabales]","[Fabales, Rhabditida]"
9,https://openalex.org/I130929987,University of Mons,"[Ophidiiformes, Hymenoptera, Hymenoptera, Valv...","[Hymenoptera, Ophidiiformes, Paxillosida, Squa..."
10,https://openalex.org/I878454856,Hasselt University,"[Dactylogyridea, Perciformes, Rhabdocoela, Rha...","[Brassicales, Dactylogyridea, Diversisporales,..."


In [9]:
orders = list(np.unique(list(pd.core.common.flatten(expertise["orders"]))))
orders

['Agamidae',
 'Agaricales',
 'Alismatales',
 'Amphipoda',
 'Anura',
 'Apiales',
 'Araneae',
 'Architaenioglossa',
 'Arthoniales',
 'Asparagales',
 'Asterales',
 'Auriculariales',
 'Azhdarchidae',
 'Blattodea',
 'Boletales',
 'Bothremydidae',
 'Brassicales',
 'Campanilidae',
 'Capnodiales',
 'Carnivora',
 'Caryophyllales',
 'Caudata',
 'Cetacea',
 'Cheilostomatida',
 'Cheloniidae',
 'Chromadorida',
 'Coleoptera',
 'Colubridae',
 'Crassiclitellata',
 'Cypriniformes',
 'Cyprinodontiformes',
 'Dactylogyridea',
 'Decapoda',
 'Dendrochirotida',
 'Desmodorida',
 'Desmoscolecida',
 'Diplostraca',
 'Diptera',
 'Diversisporales',
 'Emydidae',
 'Ericales',
 'Erinaceomorpha',
 'Fabales',
 'Gekkonidae',
 'Gelidiales',
 'Gentianales',
 'Glomerales',
 'Gymnophthalmidae',
 'Hemiptera',
 'Hymenochaetales',
 'Hymenophyllales',
 'Hymenoptera',
 'Hypocreales',
 'Iguanodontidae',
 'Isopoda',
 'Lacertidae',
 'Lamiales',
 'Lepidoptera',
 'Leptopterygiidae',
 'Littorinimorpha',
 'Macroscelidea',
 'Magnoliales

In [10]:
expertise2 = pd.DataFrame({"order":orders, "inst_ids":None, "inst_names":None})
expertise2 = expertise2.set_index("order")
expertise2["inst_ids"] = [[] for _ in orders]
expertise2["inst_names"] = [[] for _ in orders]

In [11]:
for row in expertise.itertuples():
    for order in row.orders_unique:
        expertise2.loc[order, "inst_ids"].append(row.inst_id)
        expertise2.loc[order, "inst_names"].append(row.inst_name)

In [12]:
expertise2

Unnamed: 0_level_0,inst_ids,inst_names
order,Unnamed: 1_level_1,Unnamed: 2_level_1
Agamidae,[https://openalex.org/I2801223450],[Institute of Natural Sciences]
Agaricales,"[https://openalex.org/I157674565, https://open...","[University of Liège, Ghent University, Confed..."
Alismatales,"[https://openalex.org/I157674565, https://open...","[University of Liège, Ghent University, Univer..."
Amphipoda,"[https://openalex.org/I132053463, https://open...","[Université Libre de Bruxelles, Institute of N..."
Anura,"[https://openalex.org/I32597200, https://opena...","[Ghent University, Vrije Universiteit Brussel]"
...,...,...
Trochida,[https://openalex.org/I2801223450],[Institute of Natural Sciences]
Trombidiformes,"[https://openalex.org/I32597200, https://opena...","[Ghent University, Royal Museum for Central Af..."
Trypetheliales,[https://openalex.org/I1293066303],[Meise Botanic Garden]
Valvatida,[https://openalex.org/I130929987],[University of Mons]


In [13]:
expertise.to_excel("../data/processed/Belgische_taxonomische_expertise_per_instituut.xlsx")

In [14]:
expertise2.to_excel("../data/processed/Belgische_taxonomische_expertise_per_orde.xlsx")