<a href="https://colab.research.google.com/github/FFI-Vietnam/camtrap-tools/blob/main/Wildlife%20Insights/bulk-upload/04_collect-WI-taxon-database.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
This script is used for fetching species information from Wildlife Insights species database.
It also facilitates adding species whose staff-defined common name does not match with Wildlife Insights common name.
After runnning this script, new file is added to 'data cleaning' folder

data cleaning
    |__ 4.1_collect_WI-taxon-database.csv
    
"""

In [1]:
import pandas as pd
import numpy as np
import os
import json
import requests

# mount with Google Drive to read and save file
from google.colab import drive
drive.mount('/content/drive')

# specifies Colab directories and file names
root = '/content/drive/'

dataset_folder = 'My Drive/FFI/Wildlife Insights Bulk Upload Test/bulk-upload_template-autofill/dataset'
contain_folder = 'My Drive/FFI/Wildlife Insights Bulk Upload Test/bulk-upload_template-autofill/data cleaning'

WIdatabase_file_name = "4.1_collect_WI-taxon-database.csv"

# useful functions
def read_csv_Google_drive(root, contain_folder, file_name):
  """
  function to read a csv file from Google Drive
  param examples:
    root = '/content/drive/'
    contain_folder = 'My Drive/FFI/dataset'
    file_name = 'image_metadata(2020-06-26)_full.csv'
  """
  file_path = os.path.join(root, contain_folder, file_name)
  return pd.read_csv(file_path)

def save_csv_Google_drive(df, root, contain_folder, file_name):
  """
  function to save a csv file to Google Drive
  param examples:
    root = '/content/drive/'
    contain_folder = 'My Drive/FFI/dataset'
    file_name = 'image_metadata(2020-06-26)_full.csv'
  """
  # save file to Colab runtime storage (will be deleted when this notebook is closed)
  df.to_csv('dataframe.csv', index=False)

  # save file back to Google Drive for permanent storage
  folder_path = os.path.join(root, contain_folder)
  file_path = os.path.join(root, contain_folder, file_name)
  try:
    os.makedirs(folder_path)
  except:
    pass

  with open('dataframe.csv', 'r') as f:
    df_file = f.read()

  with open(file_path, 'w') as f:
    f.write(df_file)

  print(f'File is saved to {file_name} in Google Drive at {file_path}')

Mounted at /content/drive


In [2]:
# 4.1) 

# download WI taxon id database
URL = "https://api.wildlifeinsights.org/api/v1/taxonomy?fields=class,order,family,genus,species,authority,taxonomyType,uniqueIdentifier,commonNameEnglish&page[size]=30000"
response = requests.get(URL)

# temporarily save the database to a json file
with open('taxonomy.json', 'w') as outfile:
    json.dump(response.json()["data"], outfile)

taxonomy_df = pd.read_json(r'taxonomy.json')

In [3]:
# find species who is not in WI taxon db

df = read_csv_Google_drive(root, contain_folder, "1.4_clean_metadata_remove-no-animal-image.csv")

# list of species
species_list = df['species_common_name'].unique()

unfound = []
for s in species_list:
  if (s not in taxonomy_df['commonNameEnglish'].to_list()):
    unfound.append(s)

print("There are",len(unfound),"unfound species")
unfound

There are 41 unfound species


['Asian Palm Civet',
 'Ferret-badger',
 'Murid',
 'Asian Red-cheeked Squirrel',
 'Annamite Dark Muntjac',
 'Asian Black Bear',
 'Streak-breasted Scimitar-babbler',
 'Indochinese Wren-babbler',
 'Puff-throated Bulbul',
 'Macaque',
 'Squirrel',
 'Animal',
 'Rhesus Macaque',
 'Scaly Thrush',
 'Oriental Magpie-robin',
 'Coral-billed Scimitar-babbler',
 'Impressed Tortoise',
 'Maxomys',
 'Grey-capped Emerald Dove',
 'Greater Racquet-tailed Drongo',
 'Asian Water Monitor',
 'Japanese Robin',
 'Particolored Flying Squirrel',
 'Leopoldamys',
 'Shrew',
 'Butterfly',
 'Brownish-flanked Bush Warbler',
 'Snake',
 'Crested Serpent Eagle',
 'Moth',
 'Flying Squirrel',
 'Muntjac',
 'Lesser Mouse-deer',
 'Flying Insect',
 'White-throated Fantail',
 'Eastern Striped Squirrel',
 "Natalia's Agama",
 'Snowy-browed Flycatcher',
 "Tickell's Blue Flycatcher",
 'Hill Blue Flycatcher',
 'Banded Kingfisher']

In [4]:
# (optional) create template match_unfound_species matching for the below cell
for i in unfound:
  print('{:60}'.format('match_unfound_species["'+i+'"]') + '= {"commonNameEnglish":""}')

match_unfound_species["Asian Palm Civet"]                   = {"commonNameEnglish":""}
match_unfound_species["Ferret-badger"]                      = {"commonNameEnglish":""}
match_unfound_species["Murid"]                              = {"commonNameEnglish":""}
match_unfound_species["Asian Red-cheeked Squirrel"]         = {"commonNameEnglish":""}
match_unfound_species["Annamite Dark Muntjac"]              = {"commonNameEnglish":""}
match_unfound_species["Asian Black Bear"]                   = {"commonNameEnglish":""}
match_unfound_species["Streak-breasted Scimitar-babbler"]   = {"commonNameEnglish":""}
match_unfound_species["Indochinese Wren-babbler"]           = {"commonNameEnglish":""}
match_unfound_species["Puff-throated Bulbul"]               = {"commonNameEnglish":""}
match_unfound_species["Macaque"]                            = {"commonNameEnglish":""}
match_unfound_species["Squirrel"]                           = {"commonNameEnglish":""}
match_unfound_species["Animal"]            

Use these query to find common species name

In [22]:
taxonomy_df[(taxonomy_df['commonNameEnglish'] == 'Small Asian Mongoose') ]

Unnamed: 0,id,class,order,family,genus,species,authority,commonNameEnglish,taxonomyType,uniqueIdentifier


In [24]:
taxonomy_df[(taxonomy_df['genus'] == 'Rimator') & (taxonomy_df['species'] == 'danjoui')]

Unnamed: 0,id,class,order,family,genus,species,authority,commonNameEnglish,taxonomyType,uniqueIdentifier
21704,2014455,Aves,Passeriformes,Pellorneidae,Rimator,danjoui,"Robinson & Kloss, 1919",Indochinese Wren-Babbler,biological,7054758f-7909-4467-995e-319c5e62ecbb


In [23]:
taxonomy_df[taxonomy_df['species'] == 'impressa']

Unnamed: 0,id,class,order,family,genus,species,authority,commonNameEnglish,taxonomyType,uniqueIdentifier
21712,2021737,Reptilia,Testudines,Testudinidae,Manouria,impressa,"(Günther, 1882)",Impressed tortoise,biological,a71b2322-da09-48d4-b101-3dc9884d3132


In [None]:
taxonomy_df[(taxonomy_df['genus'] == 'Pterorhinus')]

Unnamed: 0,id,class,order,family,genus,species,authority,commonNameEnglish,taxonomyType,uniqueIdentifier


In [13]:
taxonomy_df[(taxonomy_df['family'] == 'Sciurinae')]

Unnamed: 0,id,class,order,family,genus,species,authority,commonNameEnglish,taxonomyType,uniqueIdentifier


In [None]:
taxonomy_df[(taxonomy_df['order'] == 'Carnivora')]

In [None]:
taxonomy_df[(taxonomy_df['class'] == 'Insecta')]

Unnamed: 0,id,class,order,family,genus,species,authority,commonNameEnglish,taxonomyType,uniqueIdentifier
4105,2021559,Insecta,Lepidoptera,,,,,Butterflies and Moths,biological,4a7126ec-2b0b-426d-bafd-356219eb487a
19924,2021552,Insecta,,,,,,Insect,biological,9fb844bd-26d1-49f1-a829-0025a48d3bdb


In [25]:
# match unfound species name with corresponding name in WI taxon id

match_unfound_species = {}
# match_unfound_species["Natalia's Agama"] = {"commonNameEnglish":""}

match_unfound_species["Indochinese Wren-babbler"]           = {"commonNameEnglish":"Indochinese Wren-Babbler"}
match_unfound_species["Impressed Tortoise"]                 = {"commonNameEnglish":"Impressed tortoise"}
match_unfound_species["Hill Blue Flycatcher"]               = {"commonNameEnglish":"Hill Blue-Flycatcher"}
match_unfound_species["Flying Insect"]                      = {"commonNameEnglish":"Insect"}
match_unfound_species["Animal"]                             = {"commonNameEnglish":"Mammal"}
match_unfound_species["Streaked Wren Babbler"]              = {"commonNameEnglish":"Streaked Wren-babbler"}
match_unfound_species["Asian Water Monitor"]                = {"commonNameEnglish":"Common Water Monitor"}
match_unfound_species["Asian Red-cheeked Squirrel"]         = {"commonNameEnglish":"Red-cheeked Squirrel"}
match_unfound_species["Muntjac"]                            = {"commonNameEnglish":"Muntiacus Species"}
match_unfound_species["Asian Black Bear"]                   = {"commonNameEnglish":"Asiatic Black Bear"}
match_unfound_species["Ferret-badger"]                      = {"commonNameEnglish":"Melogale Species"}
match_unfound_species["Rhesus Macaque"]                     = {"commonNameEnglish":"Rhesus Monkey"}
match_unfound_species["Murid"]                              = {"commonNameEnglish":"Muridae Family"}
match_unfound_species["Maxomys"]                            = {"commonNameEnglish":"Muridae Family"}
match_unfound_species["Leopoldamys"]                        = {"commonNameEnglish":"Muridae Family"}
match_unfound_species["Wild Pig"]                           = {"commonNameEnglish":"Wild Boar"}
match_unfound_species["Crested Serpent Eagle"]              = {"commonNameEnglish":"Crested Serpent-Eagle"}
match_unfound_species["Yellow-breasted Magpie"]             = {"commonNameEnglish":"Indochinese Green Magpie"}
match_unfound_species["Macaque"]                            = {"commonNameEnglish":"Macaca Species"}
match_unfound_species["Snake"]                              = {"commonNameEnglish":"Lizards and Snakes"}
match_unfound_species["Annamite Dark Muntjac"]              = {"commonNameEnglish":"Annamite Muntjac"}
match_unfound_species["White-Crowned Forktail"]             = {"commonNameEnglish":"White-crowned Forktail"}
match_unfound_species["Particolored Flying Squirrel"]       = {"commonNameEnglish":"African Linsang"}
match_unfound_species["Oriental Magpie Robin"]              = {"commonNameEnglish":"Oriental Magpie-Robin"}
match_unfound_species["Squirrel"]                           = {"commonNameEnglish":"Sciuridae Family"}
match_unfound_species["Butterfly"]                          = {"commonNameEnglish":"Butterflies and Moths"}
match_unfound_species["Moth"]                               = {"commonNameEnglish":"Butterflies and Moths"}
match_unfound_species["Asian Palm Civet"]                   = {"commonNameEnglish":"Common Palm Civet"}
match_unfound_species["Shrew"]                              = {"commonNameEnglish":"Soricidae Family"}
match_unfound_species["Racket-tailed Treepie"]              = {"commonNameEnglish":"Racquet-tailed Treepie"}
match_unfound_species["Emerald Dove"]                       = {"commonNameEnglish":"Common Emerald Dove"}
match_unfound_species["Domestic Buffalo"]                   = {"commonNameEnglish":"Bubalus Species"}
match_unfound_species["Malayan Night Heron"]                = {"commonNameEnglish":"Malay Night-heron"}
match_unfound_species["Lesser Mouse-deer"]                  = {"commonNameEnglish":"Lesser Oriental Chevrotain"}
match_unfound_species["Eastern Striped Squirrel"]           = {"commonNameEnglish":"Maritime Striped Squirrel"}
match_unfound_species["Green magpie"]                       = {"commonNameEnglish":"Common Green Magpie"}
match_unfound_species["Flying squirrel"]                    = {"commonNameEnglish":"Sciuridae Family"}
match_unfound_species["White-tailed Robin"]                 = {"commonNameEnglish":"White-tailed Blue Robin"}
match_unfound_species["Tickell's Blue Flycatcher"]          = {"commonNameEnglish":"Tickell's Blue-Flycatcher"}
match_unfound_species["Pale-footed Bush Warbler"]           = {"commonNameEnglish":"Pale-footed Bush-warbler"}
match_unfound_species["Streak-breasted Scimitar-babbler"]   = {"commonNameEnglish":"Streak-breasted Scimitar Babbler"}
match_unfound_species["Oriental Magpie-robin"]              = {"commonNameEnglish":"Oriental Magpie-Robin"}
match_unfound_species["Coral-billed Scimitar-babbler"]      = {"commonNameEnglish":"Coral-billed Scimitar Babbler"}
match_unfound_species["Grey-capped Emerald Dove"]           = {"commonNameEnglish":"Common Emerald Dove"}
match_unfound_species["Flying Squirrel"]                    = {"commonNameEnglish":"Sciuridae Family"}

match_unfound_species["Banded Kingfisher"]                  = {"genus":"Lacedo",     "species":"pulchella"}
match_unfound_species["Japanese Robin"]                     = {"genus":"Larvivora",  "species":"akahige"}
match_unfound_species["Ochraceous Bulbul"]                  = {"genus":"Alophoixus", "species":"ochraceus"}
match_unfound_species["Scaly Thrush"]                       = {"genus":"Zoothera",   "species":"dauma"}
match_unfound_species["White-throated Fantail"]             = {"genus":"Rhipidura",  "species":"albicollis"}
match_unfound_species["Brownish-flanked Bush Warbler"]      = {"genus":"Horornis",   "species":"fortipes"}
match_unfound_species["Greater Racquet-tailed Drongo"]      = {"genus":"Dicrurus",   "species":"paradiseus"}
match_unfound_species["Greater Racket-tailed Drongo"]       = {"genus":"Dicrurus",   "species":"paradiseus"}
match_unfound_species["Snowy-browed Flycatcher"]            = {"genus":"Ficedula",   "species":"hyperythra"}
match_unfound_species["Puff-throated Bulbul"]               = {"genus":"Alophoixus", "species":"pallidus"}

# rename some species
match_unfound_species["Chinese Serow"]                      = {"commonNameEnglish":"Sumatran Serow"}

In [26]:
# match species name with corresponding name in WI taxon id

match_table = taxonomy_df[taxonomy_df['commonNameEnglish'] == ""]
FFI_species_name = []

species_list = df['species_common_name'].dropna().unique()
# add Blank as a species
species_list = np.append(species_list, 'Blank')

for s in species_list:
  if (s in match_unfound_species): 
    FFI_species_name.append(s)
    # if the species has commonNameEnglish
    if 'commonNameEnglish' in match_unfound_species[s]:
      if taxonomy_df[taxonomy_df['commonNameEnglish'] == match_unfound_species[s]['commonNameEnglish']].empty:
        print('Cannot find', s)
        continue
      match_table = match_table.append(taxonomy_df[taxonomy_df['commonNameEnglish'] == match_unfound_species[s]['commonNameEnglish']])
    # if the species has genus
    else:
      if taxonomy_df[(taxonomy_df['genus'] == match_unfound_species[s]['genus']) &
                      (taxonomy_df['species'] == match_unfound_species[s]['species'])].empty:
        print('Cannot find', s)
        continue
      match_table = match_table.append(taxonomy_df[(taxonomy_df['genus'] == \
                                                      match_unfound_species[s]['genus']) & \
                                                      (taxonomy_df['species'] == \
                                                      match_unfound_species[s]['species'])])

  else:
    if s in taxonomy_df['commonNameEnglish'].to_list():
      FFI_species_name.append(s)
      match_table = match_table.append(taxonomy_df[taxonomy_df['commonNameEnglish'] == s])
    else:
      print(f"{s} not found")

match_table['FFI_species_name'] = FFI_species_name

# save match_table to Google Drive
save_csv_Google_drive(match_table, root, contain_folder, WIdatabase_file_name)
match_table.sample(7)

Natalia's Agama not found
File is saved to 4.1_collect_WI-taxon-database.csv in Google Drive at /content/drive/My Drive/FFI/Wildlife Insights Bulk Upload Test/bulk-upload_template-autofill/data cleaning/4.1_collect_WI-taxon-database.csv


Unnamed: 0,id,class,order,family,genus,species,authority,commonNameEnglish,taxonomyType,uniqueIdentifier,FFI_species_name
2360,2003133,Mammalia,Carnivora,Mustelidae,Mustela,kathiah,"Hodgson, 1835",Yellow-bellied Weasel,biological,18df0149-eb16-4e46-a8ef-53f82bd14786,Yellow-bellied Weasel
6430,2008529,Aves,Strigiformes,Tytonidae,Phodilus,badius,"Horsfield, 1821",Oriental Bay Owl,biological,b4f26318-22be-4bdf-b4a8-b207c0ad90ef,Oriental Bay Owl
3591,2004757,Mammalia,Rodentia,Spalacidae,Rhizomys,pruinosus,"Blyth, 1851",Hoary Bamboo Rat,biological,89336aa7-efec-4e43-b0e9-c8be5bf91c76,Hoary Bamboo Rat
9437,2012701,Aves,Passeriformes,Rhipiduridae,Rhipidura,albicollis,"Vieillot, 1818",,biological,0f3f4083-15ca-4a13-96e2-32a5170e3a23,White-throated Fantail
14271,2005301,Mammalia,Rodentia,Sciuridae,Tamiops,maritimus,"Bonhote, 1900",Maritime Striped Squirrel,biological,d17ee71d-ac39-44cf-b189-5eabd120b26f,Eastern Striped Squirrel
21520,2021317,Mammalia,Rodentia,Sciuridae,,,,Sciuridae Family,biological,e4d1e892-0e4b-475a-a8ac-b5c3502e0d55,Flying Squirrel
16851,2015024,Aves,Passeriformes,Turdidae,Geokichla,citrina,"Latham, 1790",Orange-headed Thrush,biological,b1e4f65f-55d7-4074-af7d-561adb1e5524,Orange-headed Thrush
