In [1]:
# importing libraries
import os
from dotenv import load_dotenv
load_dotenv()
import requests
import json
import pandas as pd

In [2]:
# get access to the token we have saved in our .env
api_key = os.getenv('token')

In [30]:
# get parameters
parameters = {"Auhorization" : f"token {api_key}"}

In [31]:
# read our dataset - outliers
out = pd.read_csv('gbif_out.csv')

In [38]:
# create a list of species
spec = list(out.species.unique())
type(spec)

list

In [39]:
# call the IUCN api in order to get the Animal Threat Category
# we are also interested in the common name of every specie and the population trend

def spec_info(df, token):
    result = []
    for d in df:
        try:
            gen = d.split()[0].lower()
            sp = d.split()[1]
            url = f"https://apiv3.iucnredlist.org/api/v3/species/{gen}%20{sp}?token={api_key}"
            get = requests.get(url)
            info = get.json()

            result.append(info.get('result')[0])
        except:
            result.append({'taxonid': None, 
                           'scientific_name': f'{d}',
                           'kingdom': 'ANIMALIA',
                           'phylum': 'CHORDATA',
                           'class': 'AVES', 
                           'order': None,
                           'family': None,
                           'genus': None,
                           'main_common_name': None,
                           'authority': None,
                           'published_year': None,
                           'assessment_date': None,
                           'category': None, 
                           'criteria': None, 
                           'population_trend': None, 
                           'marine_system': None, 
                           'freshwater_system': None, 
                           'terrestrial_system': None, 
                           'assessor': None, 
                           'reviewer': None,
                           'eoo_km2': None,
                           'elevation_upper': None,
                           'elevation_lower': None,
                           'depth_upper': None,
                           'depth_lower': None,
                           'errata_flag': None,
                           'errata_reason': None,
                           'amended_flag': None,
                           'amended_reason': None})
            
    return pd.DataFrame.from_dict(result)


In [41]:
iucn = spec_info(df = spec, token = api_key)

Unnamed: 0,taxonid,scientific_name,kingdom,phylum,class,order,family,genus,main_common_name,authority,...,aoo_km2,eoo_km2,elevation_upper,elevation_lower,depth_upper,depth_lower,errata_flag,errata_reason,amended_flag,amended_reason
0,22697369.0,Phoeniconaias minor,ANIMALIA,CHORDATA,AVES,PHOENICOPTERIFORMES,PHOENICOPTERIDAE,Phoeniconaias,Lesser Flamingo,"(Geoffroy Saint-Hilaire, 1798)",...,,27700000,,,,,,,,
1,22680003.0,Tadorna ferruginea,ANIMALIA,CHORDATA,AVES,ANSERIFORMES,ANATIDAE,Tadorna,Ruddy Shelduck,"(Pallas, 1764)",...,,37900000,,,,,,,,
2,22718179.0,Passer hispaniolensis,ANIMALIA,CHORDATA,AVES,PASSERIFORMES,PASSERIDAE,Passer,Spanish Sparrow,"(Temminck, 1820)",...,,16600000,,,,,,,,
3,22693363.0,Calidris canutus,ANIMALIA,CHORDATA,AVES,CHARADRIIFORMES,SCOLOPACIDAE,Calidris,Red Knot,"(Linnaeus, 1758)",...,,17800000,300.0,0.0,,,,,,
4,22696599.0,Podiceps grisegena,ANIMALIA,CHORDATA,AVES,PODICIPEDIFORMES,PODICIPEDIDAE,Podiceps,Red-necked Grebe,"(Boddaert, 1783)",...,,50300000,1000.0,,,,,,,


In [49]:
# create dics with relevant info 
category = dict(zip(iucn.scientific_name, iucn.category))
common_name = dict(zip(iucn.scientific_name, iucn.main_common_name))
population_trend = dict(zip(iucn.scientific_name, iucn.population_trend))


# use map to apply dics to each specie
out['category'] = out.species.map(category)
out['common_name'] = out.species.map(common_name)
out['population_trend'] = out.species.map(population_trend)

out.category.unique()

array(['NT', 'LC', 'VU', None], dtype=object)

- Extinct **(EX)** – beyond reasonable doubt that the species is no longer extant.
- Extinct in the wild **(EW)** – survives only in captivity, cultivation and/or outside native range, as presumed after exhaustive surveys.
- Critically endangered **(CR)** – in a particularly and extremely critical state.
- Endangered **(EN)** – very high risk of extinction in the wild, meets any of criteria A to E for Endangered.
- Vulnerable **(VU)** – meets one of the 5 red list criteria and thus considered to be at high risk of unnatural (human-caused) extinction without further human intervention.
- Near threatened **(NT)** – close to being at high risk of extinction in the near future.
- Least concern **(LC)** – unlikely to become extinct in the near future.
- Data deficient **(DD)**
- Not evaluated **(NE)**

In [51]:
out.to_csv('outliers.csv')