# Type specimens through GBIF

## Input Scientific Name

In [0]:
jso#@title String fields

scientificName = 'Solanum dillonii' #@param {type:"string"}

## Initialisation

### Importing Libraries

In [0]:
!pip install -q SPARQLWrapper

import requests
import json
import pandas as pd
from rdflib import *
from urllib.error import HTTPError

from google.colab import data_table
from IPython.display import display, Markdown, Latex, Image

import xml.etree.ElementTree as ET

from SPARQLWrapper import SPARQLWrapper, JSON

import warnings
warnings.filterwarnings('ignore') #suppress some warnings coming from RDFLib

### Defining the APIs

In [0]:
gbif_base_url = 'https://api.gbif.org/v1/'

gbif_occurence = gbif_base_url + 'occurrence/search'
gbif_species = gbif_base_url + 'species/match' # to retrieve the taxonKey

plazi_base_url = 'http://tb.plazi.org/GgServer/rdf/'


### Define functions

In [0]:
def get_collection(code):
  url = 'http://sweetgum.nybg.org/science/api/v1/institutions/' + code
  collection = requests.get(url)

  if collection.status_code == 200:
    collections = json.loads(collection.text)
    collections = {'code' : collections['code'], 'name' : collections['organization'], 'website' : collections['contact']['webUrl']}
    df = pd.DataFrame(collections, index=[0])
    return df
  else:
    return None

## Search for specimen with taxonomic issues

In [0]:
params = {'issues': 'TAXONOMIC_RULES'}

types_found = {'Holotype' : False, 'Neotype' : False, 'Lectotype' : False, 'Isotype' : False, 'Syntype' : False, 'Paratype' : False}

### Get the taxonKey

In [0]:
taxon_search_string = scientificName.replace(' ','%20')

taxonKey_search = {'name' : scientificName, 'strict' : True, 'verbose' : True}

request = requests.get(gbif_species, params=taxonKey_search)
if request.status_code == 200:
  req_json = json.loads(request.text)
  if req_json['matchType'] == 'NONE':
    print('Species not found, try another one')
    assert(False)

  taxonKey = req_json['usageKey']
  speciesKey = req_json['speciesKey']
  genusKey = req_json['genusKey']
  genus = req_json['genus']
  species = req_json['species'].split(' ')[1]
else:
  print('Something went wrong, try different Scientific Name')
  assert(False)

### Get all the collection codes from Index Herbariorum

In [0]:
IH_codes = []
herbaria = requests.get('http://sweetgum.nybg.org/science/api/v1/institutions')
if herbaria.status_code == 200:
  jHerb = herbaria.json()
  for element in jHerb['data']:
    IH_codes.append(element['code'])
else:
  print('Error reading IH API')


### Get the types from the taxonomic treatment (PLAZI)
use the API of PLAZI to check if there is a taxonomic treatment available

#### Look for treatments

In [0]:
plazi_sparql = 'https://treatment.ld.plazi.org/sparql'

p_query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX dwc: <http://rs.tdwg.org/dwc/terms/>
PREFIX treat: <http://plazi.org/vocab/treatment#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
SELECT * WHERE {{ 
  ?tc dwc:genus "{0}" .
  ?tc dwc:species "{1}" .
  ?tc a <http://filteredpush.org/ontologies/oa/dwcFP#TaxonConcept> . 
  OPTIONAL {{ ?tc treat:hasTaxonName ?tn . }}
  OPTIONAL {{ ?augmentingTreatment treat:augmentsTaxonConcept ?tc . 
  ?augmentingTreatment dc:creator ?augmentingTreatmentCreator .}}
  OPTIONAL {{ ?definingTreatment treat:definesTaxonConcept ?tc . 
  ?definingTreatment dc:creator ?definingTreatmentCreator .}}
  }}
""".format(genus,species)

treatments = []

sparql = SPARQLWrapper(plazi_sparql)
sparql.setQuery(p_query)
sparql.setReturnFormat(JSON)

results = sparql.query().convert()


for result in results["results"]["bindings"]:
  try:
    treatments.append(result['definingTreatment']['value'])
  except KeyError:
    pass
  try:
    treatments.append(result['augmentingTreatment']['value'])
  except KeyError:
    pass


treatments = list(set(treatments))
print(treatments)



['http://treatment.plazi.org/id/D01D1214B639B31834BFBDF964EDC34B']


#### Try to extract the types from the treatments and print publication

In [0]:
figures = []

for treatment in treatments:
  url = plazi_base_url + treatment.replace('http://treatment.plazi.org/id/','')
  g = Graph()
  try:
    g.parse(url, format='xml')

  # Print the publication
    publication_query = g.query(
      """ SELECT * WHERE {{
        <{0}> trt:publishedIn ?b.
        OPTIONAL{{ ?b dc:title ?title.}}
        OPTIONAL{{ ?b dc:creator ?creator.}}
        OPTIONAL{{ ?b bibo:journal ?journal.}}
        OPTIONAL{{ ?b dc:date ?date.}}
        OPTIONAL{{ ?b bibo:volume ?volume.}}
        }}""".format(treatment))
    pub_print = False
    if pub_print:
      for item in publication_query:
        print("Publication:")
        print('------------')
        print('published in: ' + item.b)
        print('author: ' + item.creator)
        #print('title: ' + item.title)
        #print('journal: ' + item.journal)
        #print('date: ' + item.date)
        #print('volume: ' + item.volume)  

    # Get the figures
    qfig = g.query(
      """ SELECT * WHERE {{
        <{0}> fabio:hasPart ?b.
        ?b rdf:type <http://purl.org/spar/fabio/Figure>.
        }}""".format(treatment))
    for fig in qfig:
      figures.append(fig[0]) 

    # Get the sections within the treatment
    qres = g.query(
      """ SELECT DISTINCT * WHERE {{
        <{0}> spm:hasInformation ?b.
        }}""".format(treatment))
  
    # For each section in the treatment, check if there is Type information
    for r in qres:
      q2res = g.query(
      """ SELECT DISTINCT * WHERE {{
       <{0}> spm:hasContent ?b.
       }}""".format(r[0]))
      for r2 in q2res:
        if str(r2).find('Type') != -1:
          print('=================')
          print('Type information:')
          print('-----------------')
          print(str(r2[0]))
          type_string = str(r2[0])
          type_string = type_string.split(" ")
          do_print = False
          for substring in type_string:
            substring = substring.replace(',', '').replace('!', '').replace(')','').replace(';','').replace('.','')
            if 'holotype' in substring:
              do_print = True
            if substring in IH_codes and do_print:
              collection = get_collection(substring)
              display(data_table.DataTable(collection, include_index=False, num_rows_per_page=20))
  except HTTPError:
    print('treatment gave an HTTP error')
    pass  

Type information:
-----------------
 Type.  Peru:Cajamarca: Prov. Celendin, on road from Celendin to Balsas, east of pass on descent to Balsas, 2002 m, 6°52.13S, 78°30.91W, 12 December 2007, S. Stern, E. Tepe, S. Leiva & M. Zapata 119 (holotype: USM!; isotypes: BM! [BM001016881], HAO†, NY! [NY 00986687], UT!).


Unnamed: 0,code,name,website
0,USM,Universidad Nacional Mayor de San Marcos,


Unnamed: 0,code,name,website
0,BM,The Natural History Museum,http://www.nhm.ac.uk/


Unnamed: 0,code,name,website
0,NY,The New York Botanical Garden,http://www.nybg.org


Unnamed: 0,code,name,website
0,UT,University of Utah,https://nhmu.utah.edu/botany


#### Pictures inside the treatments

In [0]:
for figure in figures:
  display(Image(url=figure, width=500))

### Based on the taxonKey, look for type specimens

In [0]:
# select the interesting colums
columns = ['scientificName', 'acceptedScientificName', 'recordedBy', 'eventDate', 'collectionCode', 'institutionCode', 'rightsHolder', 'countryCode', 'recordNumber']

In [0]:
holotype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Holotype', 'limit' : 20}

holotype_request = requests.get(gbif_occurence, params=holotype_search)
if holotype_request.status_code == 200:
  holotypes = json.loads(holotype_request.text)
  endOfRecords = holotypes['endOfRecords']
  df = pd.DataFrame(holotypes['results'])
  iteration = 1
  while not endOfRecords:
    holotype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Holotype', 'limit' : 20, 'offset' : iteration*20}
    holotype_request = requests.get(gbif_occurence, params=holotype_search)
    holotypes = json.loads(holotype_request.text)
    endOfRecords = holotypes['endOfRecords']
    dfe = pd.DataFrame(holotypes['results'])
    df = df.append(dfe, ignore_index=True)
    iteration += 1
  adjusted_columns = []
  for item in columns:
    if item in df.columns:
      adjusted_columns.append(item)
  if df.size > 0:
    df1 = df[adjusted_columns]
    display(data_table.DataTable(df1, include_index=False, num_rows_per_page=20))
    types_found['Holotype'] = True
  else:
    print('No HOLOTYPE found')
else:
  print('Something went wrong')

if df.size > 1:
  print('TAXONOMIC ERROR: THERE CAN BE ONLY ONE HOLOTYPE!')

No HOLOTYPE found


In [0]:
neotype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Neotype', 'limit' : 20}

neotype_request = requests.get(gbif_occurence, params=neotype_search)
if neotype_request.status_code == 200:
  neotypes = json.loads(neotype_request.text)
  endOfRecords = neotypes['endOfRecords']
  ndf = pd.DataFrame(neotypes['results'])
  iteration = 1
  while not endOfRecords:
    neotype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Neotype', 'limit' : 20, 'offset' : iteration*20}
    neotype_request = requests.get(gbif_occurence, params=neotype_search)
    neotypes = json.loads(neotype_request.text)
    endOfRecords = neotypes['endOfRecords']
    ndfe = pd.DataFrame(neotypes['results'])
    ndf = ndf.append(ndfe, ignore_index=True)
    iteration += 1
  if ndf.size > 0:
    adjusted_columns = []
    for item in columns:
      if item in ndf.columns:
        adjusted_columns.append(item)
    ndf1 = ndf[adjusted_columns]
    display(data_table.DataTable(ndf1, include_index=False, num_rows_per_page=20))
    types_found['Neotype'] = True
  else:
    print('No NEOTYPES found')
else:
  print('Something went wrong')

No NEOTYPES found


In [0]:
lectotype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Lectotype', 'limit' : 20}

lectotype_request = requests.get(gbif_occurence, params=lectotype_search)
if lectotype_request.status_code == 200:
  lectotypes = json.loads(lectotype_request.text)
  endOfRecords = lectotypes['endOfRecords']
  ldf = pd.DataFrame(lectotypes['results'])
  iteration = 1
  while not endOfRecords:
    lectotype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Lectotype', 'limit' : 20, 'offset' : iteration*20}
    lectotype_request = requests.get(gbif_occurence, params=lectotype_search)
    lectotypes = json.loads(lectotype_request.text)
    endOfRecords = lectotypes['endOfRecords']
    ldfe = pd.DataFrame(lectotypes['results'])
    ldf = ldf.append(ldfe, ignore_index=True)
    iteration += 1
  if ldf.size > 0:
    adjusted_columns = []
    for item in columns:
      if item in ldf.columns:
        adjusted_columns.append(item)
    ldf1 = ldf[adjusted_columns]
    display(data_table.DataTable(ldf1, include_index=False, num_rows_per_page=20))
    types_found['Lectotype'] = True
  else:
    print('No LECTOTYPES found')
else:
  print('Something went wrong')

No LECTOTYPES found


In [0]:
isotype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Isotype', 'limit' : 20}

isotype_request = requests.get(gbif_occurence, params=isotype_search)
if isotype_request.status_code == 200:
  isotypes = json.loads(isotype_request.text)
  endOfRecords = isotypes['endOfRecords']
  idf = pd.DataFrame(isotypes['results'])
  iteration = 1
  while not endOfRecords:
    isotype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Isotype', 'limit' : 20, 'offset' : iteration*20}
    isotype_request = requests.get(gbif_occurence, params=isotype_search)
    isotypes = json.loads(isotype_request.text)
    endOfRecords = isotypes['endOfRecords']
    idfe = pd.DataFrame(isotypes['results'])
    idf = idf.append(idfe, ignore_index=True)
    iteration += 1
  if idf.size > 0:
    adjusted_columns = []
    for item in columns:
      if item in idf.columns:
        adjusted_columns.append(item)
    idf1 = idf[adjusted_columns]
    display(data_table.DataTable(idf1, include_index=False, num_rows_per_page=20))
    types_found['Isotype'] = True
  else:
    print('No ISOTYPES found')
else:
  print('Something went wrong')

Unnamed: 0,scientificName,acceptedScientificName,recordedBy,eventDate,collectionCode,institutionCode,rightsHolder,countryCode,recordNumber
0,Solanum dillonii S.Knapp,Solanum dillonii S.Knapp,S. R. Stern,2007-12-12T00:00:00,NY,NY,The New York Botanical Garden,PE,119


In [0]:
syntype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Syntype', 'limit' : 20}

syntype_request = requests.get(gbif_occurence, params=syntype_search)
if syntype_request.status_code == 200:
  syntypes = json.loads(syntype_request.text)
  endOfRecords = syntypes['endOfRecords']
  sdf = pd.DataFrame(syntypes['results'])
  while not endOfRecords:
    syntype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Syntype', 'limit' : 20, 'offset' : iteration*20}
    syntype_request = requests.get(gbif_occurence, params=syntype_search)
    syntypes = json.loads(syntype_request.text)
    endOfRecords = syntypes['endOfRecords']
    sdfe = pd.DataFrame(syntypes['results'])
    sdf = sdf.append(sdfe, ignore_index=True)
    iteration += 1
  adjusted_columns = []
  for item in columns:
    if item in sdf.columns:
      adjusted_columns.append(item)
  if sdf.size > 0:
    sdf1 = sdf[adjusted_columns]
    display(data_table.DataTable(sdf1, include_index=False, num_rows_per_page=20))
    types_found['Syntype'] = True
  else:
    print('No SYNTYPE found')
else:
  print('Something went wrong')

No SYNTYPE found


In [0]:
paratype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Paratype', 'limit' : 20}

paratype_request = requests.get(gbif_occurence, params=paratype_search)
if paratype_request.status_code == 200:
  paratypes = json.loads(paratype_request.text)
  endOfRecords = paratypes['endOfRecords']
  pdf = pd.DataFrame(paratypes['results'])
  while not endOfRecords:
    paratype_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Paratype', 'limit' : 20, 'offset' : iteration*20}
    paratype_request = requests.get(gbif_occurence, params=paratype_search)
    paratypes = json.loads(paratype_request.text)
    endOfRecords = paratypes['endOfRecords']
    pdfe = pd.DataFrame(paratypes['results'])
    pdf = pdf.append(sdfe, ignore_index=True)
    iteration += 1
  adjusted_columns = []
  for item in columns:
    if item in pdf.columns:
      adjusted_columns.append(item)
  if pdf.size > 0:
    pdf1 = pdf[adjusted_columns]
    display(data_table.DataTable(pdf1, include_index=False, num_rows_per_page=20))
    types_found['Paratype'] = True
  else:
    print('No PARATYPE found')
else:
  print('Something went wrong')

Unnamed: 0,scientificName,acceptedScientificName,recordedBy,eventDate,collectionCode,institutionCode,rightsHolder,countryCode,recordNumber
0,Solanum dillonii S.Knapp,Solanum dillonii S.Knapp,S. R. Stern,2007-12-11T00:00:00,NY,NY,The New York Botanical Garden,PE,109
1,Solanum dillonii S.Knapp,Solanum dillonii S.Knapp,J. Santisteban C.,1987-05-31T00:00:00,NY,NY,The New York Botanical Garden,PE,125
2,Solanum dillonii S.Knapp,Solanum dillonii S.Knapp,M. O. Dillon,1986-03-16T00:00:00,NY,NY,The New York Botanical Garden,PE,4379
3,Solanum dillonii S.Knapp,Solanum dillonii S.Knapp,S. D. Knapp,1984-02-06T00:00:00,NY,NY,The New York Botanical Garden,EC,6258


In [0]:
type_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Type', 'limit' : 20}

type_request = requests.get(gbif_occurence, params=type_search)
if type_request.status_code == 200:
  types = json.loads(type_request.text)
  endOfRecords = types['endOfRecords']
  ndf = pd.DataFrame(types['results'])
  while not endOfRecords:
    type_search = {'taxonKey' : taxonKey, 'typeStatus' : 'Type', 'limit' : 20, 'offset' : iteration*20}
    type_request = requests.get(gbif_occurence, params=type_search)
    types = json.loads(type_request.text)
    endOfRecords = types['endOfRecords']
    ndfe = pd.DataFrame(types['results'])
    ndf = ndf.append(ndfe, ignore_index=True)
    iteration += 1
  adjusted_columns = []
  for item in columns:
    if item in ndf.columns:
      adjusted_columns.append(item)
  if ndf.size > 0:
    ndf1 = ndf[adjusted_columns]
    display(data_table.DataTable(ndf1, include_index=False, num_rows_per_page=20))
  else:
    print('No TYPE found')
else:
  print('Something went wrong')

No TYPE found


## Search for similar specimen

In [0]:
# take the holotype, if not take neotype or lectotype
if types_found['Holotype'] == True:
  try:
    year = df.year[0] # for the moment I take the first record, but this needs to be changed
  except AttributeError:
    year = '2008' #@param {type:"string"}
  try:
    month = df.month[0]
  except AttributeError:
    month = '03' #@param {type:"string"}
  countryCode = df.countryCode[0]
elif types_found['Neotype'] == True:
  year = ndf.year[0]
  month = ndf.month[0]
  countryCode = ndf.countryCode[0]
elif types_found['Lectotype'] == True:
  year = ldf.year[0]
  month = ldf.month[0]
  countryCode = ldf.countryCode[0]
elif types_found['Isotype'] == True:
  year = idf.year[0]
  month = idf.month[0]
  countryCode = idf.countryCode[0]
else:
  print('Other cases need to be implemented later, including taxonomic rules')
  assert(False)

# search parameters

params = {'genusKey' : genusKey, 'year' : year, 'month': month, 'country' : countryCode}


specimen_request = requests.get(gbif_occurence, params=params)
if specimen_request.status_code == 200:
  specimens = json.loads(specimen_request.text)
  specimen_df = pd.DataFrame(specimens['results'])
  adjusted_columns = []
  for item in columns:
    if item in specimen_df.columns:
      adjusted_columns.append(item)
  specimen_df1 = specimen_df[adjusted_columns]
  display(data_table.DataTable(specimen_df1, include_index=False, num_rows_per_page=20))

Unnamed: 0,scientificName,acceptedScientificName,recordedBy,eventDate,collectionCode,institutionCode,countryCode,recordNumber
0,Solanum saponaceum Dunal,Solanum saponaceum Dunal,"Stephen R. Stern, E J. Tepe, S Leiva, M Zapata",2007-12-13T00:00:00,BOT,NHMUK,PE,128
1,Solanum zahlbruckneri Bitter,Solanum aloysiifolium Dunal,"Stephen R. Stern, E J. Tepe, S Leiva, M Zapata",2007-12-12T00:00:00,BOT,NHMUK,PE,110
2,Solanum lepidotum Humb. & Bonpl. ex Dunal,Solanum lepidotum Humb. & Bonpl. ex Dunal,"Stephen R. Stern, E J. Tepe, S Leiva, M Zapata",2007-12-16T00:00:00,BOT,NHMUK,PE,167
3,Solanum oblongifolium Humb. & Bonpl. ex Dunal,Solanum oblongifolium Humb. & Bonpl. ex Dunal,"Stephen R. Stern, E J. Tepe, S Leiva, M Zapata",2007-12-12T00:00:00,BOT,NHMUK,PE,115
4,Solanum saponaceum Dunal,Solanum saponaceum Dunal,"Stephen R. Stern, E J. Tepe, S Leiva, M Zapata",2007-12-13T00:00:00,BOT,NHMUK,PE,126
5,Solanum acerifolium Humb. & Bonpl. ex Dunal,Solanum acerifolium Humb. & Bonpl. ex Dunal,"Stephen R. Stern, E J. Tepe, S Leiva, M Zapata",2007-12-17T00:00:00,BOT,NHMUK,PE,179
6,Solanum dillonii S.Knapp,Solanum dillonii S.Knapp,"Stephen R. Stern, E J. Tepe, S Leiva, M Zapata",2007-12-11T00:00:00,BOT,NHMUK,PE,109
7,Solanum albidum Dunal,Solanum albidum Dunal,"Stephen R. Stern, E J. Tepe, S Leiva, M Zapata",2007-12-14T00:00:00,BOT,NHMUK,PE,143
8,Solanum rubicaule S.R.Stern,Solanum rubicaule S.R.Stern,Stern Tepe Leiva Zapata,2007-12-17T00:00:00,BOT,NHMUK,PE,181
9,Solanum albidum Dunal,Solanum albidum Dunal,"Stephen R. Stern, E J. Tepe, S Leiva, M Zapata",2007-12-14T00:00:00,BOT,NHMUK,PE,143
