In [3]:
import json
import requests
import time
from data import *
import pickle

In [4]:
eiggRawData = eiggRawData()
eiggRawData = eiggRawData["Scientific name"].str.lower()
eiggRawData = set(eiggRawData)
len(eiggRawData)

  exec(code_obj, self.user_global_ns, self.user_ns)


3177

In [None]:
def preparedName(name,limit):
    name = name.split(" ")[:limit]
    return "+".join(name)

def validateSingleName(name,limit=100):
    callToValidateName = requests.get('http://resolver.globalnames.org/name_resolvers.json?names='+preparedName(name,limit))
    jsonRes = callToValidateName.json()['data'][0]
    #time.sleep(2)    =>  Politeness for server
    try:
        if not jsonRes['is_known_name']:
            return jsonRes['results']['canonical_form']
        else:
            return " ".join(name.split(" ")[:limit])
    except:
        print("error occured at "+name)
        return name+" error_loc"

def validateListedEiggSpecies(speciesList):
    validatedSpeciesResultDict = {}
    for species in eiggRawData:
        indivResult = validateSingleName(species)
        validatedSpeciesResultDict[species] = indivResult
    
    print(len(validatedSpeciesResultDict))
    with open("PartialEiggVerified",'wb') as f:
        pickle.dump(validatedSpeciesResultDict,f)
        
    failedComputations = list(filter(lambda x: "error_loc" in validatedSpeciesResultDict[x],validatedSpeciesResultDict.keys()))
    return failedComputations

In [16]:
failedComputations = validateListedEiggSpecies(eiggRawData)

error occured at circaea lutetiana x alpina = c. x intermedia
error occured at cladonia coccifera s. lat.
error occured at halichondria (halichondria) panicea
error occured at cochlicella (cochlicella) acuta
error occured at polygala vulgaris subsp. collina
error occured at salix caprea x myrsinifolia = s. x latifolia
error occured at parmelia saxatilis s. lat.
error occured at vertebrata fucoides
error occured at gymnadenia conopsea agg.
error occured at punctelia subrudecta s. lat.
error occured at porania (porania) pulvillus
error occured at anisus (anisus) leucostoma
error occured at zostera (zostera) marina
error occured at neuroterus albipes f. agamic
error occured at motacilla alba subsp. yarrellii
error occured at rhodophycota indet. (non-calc. crusts)
error occured at pectenia plumbea s. lat.
error occured at anthyllis vulneraria vulneraria langei
error occured at betula pendula x pubescens = b. x aurata
error occured at rosa spinosissima x mollis = r. x sabinii
error occured 

In [17]:
len(failedComputations)

163

# How do we deal with these 163 failed entries?

Doing it manually feels wrong...Google searching 113 records doesn't seem appealing

We can notice; the first two keywords of the phrase indicate the species. Anything after that, which is what destroys the APIs ability to tell if it's correct, is metadata.

Hence let's refire the requests, but only on the first two keywords

In [18]:
failedComputationDict = dict(zip(failedComputations, failedComputations))
len(failedComputationDict)

163

In [19]:
amendedDict = {}
failed = []
for failedName in failedComputationDict:
    validatedName = validateSingleName(failedName,2)
    if "error_loc" in validatedName:
        failed.append(failedName)
    else:
        amendedDict[failedName] = validatedName

error occured at cochlicella (cochlicella) acuta
error occured at vertebrata fucoides
error occured at porania (porania) pulvillus
error occured at anisus (anisus) leucostoma
error occured at zostera (zostera) marina
error occured at rhodophycota indet. (non-calc. crusts)
error occured at cirrhia icteritia
error occured at lichenicolous fungus (unidentified)
error occured at maerl indet
error occured at leiostyla (leiostyla) anglica
error occured at aphodius fimetarius/pedellus agg.
error occured at lithobius (lithobius) melanops
error occured at cochlicopa cf. lubrica
error occured at gyraulus (armiger) crista
error occured at euconulus (euconulus) cf. fulvus
error occured at platyhypnidium ripariodes
error occured at rubus sect. corylifolii
error occured at diatoms film
error occured at leiocolea collaris
error occured at filamentous brown algae
error occured at trochulus (trochulus) hispidus
error occured at leiocolea fitzgeraldiae
error occured at diatoms colonial
error occured at 

In [21]:
print(len(amendedDict))
print(amendedDict['carex hostiana x viridula = c. x fulva'])

125
carex hostiana


# Result

We saved 125 of the 163 errored plants. The tradeoff was technically some useful information- the metadata we saw, such as "x fragilis"- a specific type of the species we are studying.

However, we will proceed with the assumption that animals don't particularly care what type of species something is, if it will be their dinner. E.g. a grasshopper won't particularly care what specific type of wheat it eats; as long as it's the same species, it will be valid in the food chain.

## I will fix the rest manually

In [22]:
failed

['cochlicella (cochlicella) acuta',
 'vertebrata fucoides',
 'porania (porania) pulvillus',
 'anisus (anisus) leucostoma',
 'zostera (zostera) marina',
 'rhodophycota indet. (non-calc. crusts)',
 'cirrhia icteritia',
 'lichenicolous fungus (unidentified)',
 'maerl indet',
 'leiostyla (leiostyla) anglica',
 'aphodius fimetarius/pedellus agg.',
 'lithobius (lithobius) melanops',
 'cochlicopa cf. lubrica',
 'gyraulus (armiger) crista',
 'euconulus (euconulus) cf. fulvus',
 'platyhypnidium ripariodes',
 'rubus sect. corylifolii',
 'diatoms film',
 'leiocolea collaris',
 'filamentous brown algae',
 'trochulus (trochulus) hispidus',
 'leiocolea fitzgeraldiae',
 'diatoms colonial',
 'anthus spinoletta/petrosus agg.',
 'cochlicopa cf. lubricella',
 'encrusting algae indet.',
 'bryozoa indet crusts',
 'porifera indet crusts',
 'tenellia caerulea',
 'haliclona (rhizoniera) viscosa',
 'notonecta (notonecta) obliqua',
 'trochulus (trochulus) striolatus',
 'euconulus agg.',
 'lichen (unidentified)'

In [23]:
amendedDict['trochulus (trochulus) hispidus'] = 'trochulus hispidus'
amendedDict['porifera indet crusts'] = 'porifera indet crusts' #essentially a sea sponge- not recognized by resolver
amendedDict['cochlicopa cf. lubricella'] = 'cochlicopa lubricella'
amendedDict['anisus (anisus) leucostoma'] = 'anisus leucostoma'
amendedDict['rhodophycota indet. (non-calc. crusts)'] = 'rhodophycota'
amendedDict['taraxacum agg.'] = 'taraxacum officinale'
amendedDict['aphodius fimetarius/pedellus agg.'] = 'aphodius fimetarius'
amendedDict['cochlicopa cf. lubrica'] = 'cochlicopa lubrica'
amendedDict['trochulus (trochulus) striolatus'] = 'trochulus striolatus'
amendedDict['maerl indet'] = 'maerl'
amendedDict['bryozoa indet crusts'] = 'bryozoa'
amendedDict['euconulus (euconulus) cf. fulvus'] = 'euconulus fulvus'
amendedDict['notonecta (notonecta) obliqua'] = 'notonecta obliqua'
amendedDict['leiostyla (leiostyla) anglica'] = 'leiostyla anglica'
amendedDict['anthus spinoletta/petrosus agg.'] = 'anthus spinoletta'
amendedDict['lithobius (lithobius) forficatus'] = 'lithobius forficatus'
amendedDict['gyraulus (armiger) crista'] = 'gyraulus crista'
amendedDict['cochlicella (cochlicella) acuta'] = 'cochlicella acuta'
amendedDict['haliclona (rhizoniera) viscosa'] = 'haliclona viscosa'
amendedDict['columella agg.'] = 'columella'
amendedDict['zostera (zostera) marina'] = 'zostera marina'
amendedDict['porania (porania) pulvillus'] = 'porania pulvillus'
amendedDict['platyhypnidium ripariodes'] = 'platyhypnidium ripariodes'
amendedDict['vertebrata fucoides'] = 'vertebrata fucoides'

In [24]:
len(amendedDict)

149

In [25]:
fullSpeciesOfEiggDict = {}

with open("PartialEiggVerified",'rb') as f:
    partialDict = pickle.load(f)

for item in partialDict:
    if "error_loc" not in partialDict[item]:
        fullSpeciesOfEiggDict[item] = partialDict[item]
    
for item in amendedDict:
    fullSpeciesOfEiggDict[item] = amendedDict[item]
    
with open("EiggVerifiedSpeciesList",'wb') as f:
    pickle.dump(fullSpeciesOfEiggDict,f)


In [26]:
fullSpeciesOfEiggDict['carex hostiana x viridula = c. x fulva']

'carex hostiana'

In [77]:
len(fullSpeciesOfEiggDict)

2452

In [27]:
with open("EiggVerifiedSpeciesList",'rb') as f:
    testingPickleWrite = pickle.load(f)
    
print(len(testingPickleWrite))

3163
