# 1. Gathering taxonomic journals

We gathered taxonomic journals through three sources:
 - We used Wikidata to find all academic or scientific journals with a main subject or field of work related to taxonomy, phylogeny, nomenclature,...
 - We used Wikidata to find all journals that had an IPNI or ZooBank publication ID.
 - We used the OpenAlex API to retrieve all journals that were related to "taxonomy", i.e. with the concept "taxonomy" linked to it.

Here, we take a quick look at the results.

In [None]:
import pandas as pd
from matplotlib_venn import venn3
from matplotlib import pyplot as plt

In [None]:
journals = pd.read_csv("../data/processed/journals.csv")
journals

In [None]:
# number of journals per source
journals["source"].value_counts()

In [None]:
print("Number of unique journals (by title): " +
      str(len(journals.drop_duplicates(subset="title", ignore_index=True))))

In [None]:
ipnizoo = set(journals[journals["source"]=="IPNI or ZooBank ID"]["title"])
openalex = set(journals[journals["source"]=="OpenAlex taxonomy concept"]["title"])
wikisubjects = set(journals[journals["source"]=="Wikidata taxonomic subject"]["title"])

In [None]:
print("Number of journals found via IPNI or ZooBank ID, not found via OpenAlex: " +
      str(len(ipnizoo - openalex)))
print("Number of journals found via IPNI or ZooBank ID, not found via Wikidata subjects: " +
      str(len(ipnizoo - wikisubjects)))

In [None]:
print("Number of journals found via Wikidata subjects, not found via OpenAlex: " +
      str(len(wikisubjects - openalex)))
print("Number of journals found via Wikidata subjects, not found via IPNI or ZooBank ID: " +
      str(len(wikisubjects - ipnizoo)))

In [None]:
print("Number of journals found via OpenAlex, not found via Wikidata subjects: " +
      str(len(openalex - wikisubjects)))
print("Number of journals found via OpenAlex subjects, not found via IPNI or ZooBank ID: " +
      str(len(openalex - ipnizoo)))

In [None]:
print("Number of journals in common\n")
print("OpenAlex and IPNI/ZooBank IDs: " +
      str(len(openalex.intersection(ipnizoo))))
print("OpenAlex and Wikidata subjects: " +
      str(len(openalex.intersection(wikisubjects))))
print("Wikidata subjects and IPNI/ZooBank IDs: " +
      str(len(wikisubjects.intersection(ipnizoo))))
print("OpenAlex, Wikidata subjects and IPNI/ZooBank IDs: " +
      str(len(openalex.intersection(wikisubjects).intersection(ipnizoo))))

In [None]:
# number of journals with an OpenAlex ID per source
journals[journals["openAlexID"]==journals["openAlexID"]]["source"].value_counts()

In [None]:
# number of journals that were not recently dissolved (or not confirmed dissolved) per source
journals[(journals["dissolved"]==False) | (journals["dissolved"]!=journals["dissolved"])]["source"].value_counts()

In [None]:
# venn diagram
venn3([ipnizoo, openalex, wikisubjects], ("IPNI/ZooBank ID", "OpenAlex", "Wikidata subjects"))
plt.title("Unique journals per source", fontsize=15)