<a href="https://colab.research.google.com/github/FFI-Vietnam/camtrap-tools/blob/main/MegaDetector/confusion_matrix/02_add-conservation-status.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
"""
This script creates a groundtruth table which specifies what species inside an 
image and if three consecutive images are in the same batch.

After runnning this script, a new file is added to 'data cleaning' folder

data cleaning
    |__ 02_taxon-database-with-conservation-status.csv
    
"""

"\nThis script creates a groundtruth table which specifies what species inside an \nimage and if three consecutive images are in the same batch.\n\nAfter runnning this script, a new file is added to 'data cleaning' folder\n\ndata cleaning\n    |__ 02_taxon-database-with-conservation-status.csv\n    \n"

In [2]:
import pandas as pd
import numpy as np
import os
import requests
from tqdm.notebook import tqdm

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# specifies Colab directories and file names
root = '/content/drive/'

dataset_folder = 'My Drive/FFI/MegaDetector Test/confusion-matrix/dataset'
WI_folder = 'My Drive/FFI/Wildlife Insights Bulk Upload Test/bulk-upload_template-autofill/data cleaning'
contain_folder = 'My Drive/FFI/MegaDetector Test/confusion-matrix/data cleaning'

ground_truth_file_name = '01_ground-truth-table_Kon-Plong.csv'
MD_result_file_name = 'MegaDetector_result_2021-08-27.json'
# taxon match table is generated using Wildlife Insights/bulk-upload/04_collect-WI-taxon-database.ipynb script
# it is saved as 4.1_collect_WI-taxon-database.csv
WI_taxon_match_table_file_name = '4.1_collect_WI-taxon-database.csv'
MD_taxon_match_table_file_name = '02_taxon-database-with-conservation-status.csv'

In [5]:
# read and save file functions
def read_csv_Google_drive(root, contain_folder, file_name):
  file_path = os.path.join(root, contain_folder, file_name)
  return pd.read_csv(file_path)

def save_csv_Google_drive(df, root, contain_folder, file_name):
  """
  function to save a csv file to Google Drive
  param examples:
    root = '/content/drive/'
    contain_folder = 'My Drive/FFI/dataset'
    file_name = 'image_metadata(2020-06-26)_full.csv'
  """
  # save file to Colab runtime storage (will be deleted when this notebook is closed)
  df.to_csv('dataframe.csv', index=False)

  # save file back to Google Drive for permanent storage
  folder_path = os.path.join(root, contain_folder)
  file_path = os.path.join(root, contain_folder, file_name)
  try:
    os.makedirs(folder_path)
  except:
    pass

  with open('dataframe.csv', 'r') as f:
    df_file = f.read()

  with open(file_path, 'w') as f:
    f.write(df_file)

  print(f'File is saved to {file_name} in Google Drive at {file_path}')

In [6]:
# read taxon_match_table
taxon_match_table = read_csv_Google_drive(root, WI_folder, WI_taxon_match_table_file_name)
taxon_match_table.sample(5)

Unnamed: 0,id,class,order,family,genus,species,authority,commonNameEnglish,taxonomyType,uniqueIdentifier,FFI_species_name
102,2015020,Aves,Passeriformes,Turdidae,Geokichla,sibirica,"Pallas, 1776",Siberian Thrush,biological,c8ec11b2-2bf1-4466-b312-cb43cf20a1ee,Siberian Thrush
32,2014455,Aves,Passeriformes,Pellorneidae,Rimator,danjoui,"Robinson & Kloss, 1919",Indochinese Wren-Babbler,biological,7054758f-7909-4467-995e-319c5e62ecbb,Indochinese Wren-babbler
78,2009725,Aves,Piciformes,Picidae,Picus,rabieri,"Oustalet, 1898",Red-collared Woodpecker,biological,6cd3d89b-83db-4627-b837-ee65c61a010f,Red-collared Woodpecker
133,2006353,Aves,Columbiformes,Columbidae,Macropygia,unchall,"Wagler, 1827",Barred Cuckoo-dove,biological,9e1bf8e0-7185-4c20-b537-3c0acf16d6d7,Barred Cuckoo-dove
117,2014074,Aves,Passeriformes,Scotocercidae,Hemitesia,pallidipes,"Blanford, 1872",Pale-footed Bush-warbler,biological,06117775-606d-48a6-8203-0759588720f5,Pale-footed Bush-warbler


In [7]:
def get_scientific_name(taxon_match_table, species_common_name):
  """
  get genus and species name of a species
  """
  row = taxon_match_table[taxon_match_table['FFI_species_name'] == species_common_name]
  try:
    genus, species = row['genus'].iat[0], row['species'].iat[0]
  except:
    print('     Can not find taxonomy info for', species_common_name)
  return genus, species

# test
get_scientific_name(taxon_match_table, "Owston's Civet")

('Chrotogale', 'owstoni')

In [8]:
def get_conservation_status(genus, species):
  """
  get consevation status of a species from IUCN red list API
  https://apiv3.iucnredlist.org/api/v3/docs#species-history-name
  """
  api_token = "eba8c1a83a9d25e8ff01b8acbfa153fe3317d90badd60e2ec1d87ac8b330fda6"
  response = requests.get(f"https://apiv3.iucnredlist.org/api/v3/species/history/name/{genus}%20{species}?token={api_token}")
  try:
    status = response.json()['result'][0]['category']
  except:
    status = None
  return status

# test
get_conservation_status('Chrotogale', 'owstoni')

'Endangered'

In [10]:
# get list of conservation status
conservation_status = []

for species in tqdm(taxon_match_table['FFI_species_name']):
  genus, species = get_scientific_name(taxon_match_table, species)
  status = get_conservation_status(genus, species)
  if status:
    conservation_status.append(status)
  else:
    conservation_status.append('Not Applicable')

# create conservation_status column
taxon_match_table['conservation_status'] = conservation_status

# manually change status of some species
# EN -> LC
taxon_match_table.loc[pd.Index(taxon_match_table['FFI_species_name']).get_loc('Domestic Water Buffalo'), 'conservation_status'] = 'Least Concern'
# NA -> LC
taxon_match_table.loc[pd.Index(taxon_match_table['FFI_species_name']).get_loc('Human'), 'conservation_status'] = 'Least Concern'
taxon_match_table.loc[pd.Index(taxon_match_table['FFI_species_name']).get_loc('Domestic Dog'), 'conservation_status'] = 'Least Concern'
taxon_match_table.loc[pd.Index(taxon_match_table['FFI_species_name']).get_loc('Javan Mongoose'), 'conservation_status'] = 'Least Concern'
taxon_match_table.loc[pd.Index(taxon_match_table['FFI_species_name']).get_loc('Green-legged Partridge'), 'conservation_status'] = 'Least Concern'

# save to Google Drive
save_csv_Google_drive(taxon_match_table, root, contain_folder, MD_taxon_match_table_file_name)

taxon_match_table

  0%|          | 0/139 [00:00<?, ?it/s]

File is saved to 02_taxon-database-with-conservation-status.csv in Google Drive at /content/drive/My Drive/FFI/MegaDetector Test/confusion-matrix/data cleaning/02_taxon-database-with-conservation-status.csv


Unnamed: 0,id,class,order,family,genus,species,authority,commonNameEnglish,taxonomyType,uniqueIdentifier,FFI_species_name,conservation_status
0,2002045,Mammalia,Primates,Hominidae,Homo,sapiens,"Linnaeus, 1758",Human,biological,990ae9dd-7a59-4344-afcb-1b7b21368000,Human,Least Concern
1,2015024,Aves,Passeriformes,Turdidae,Geokichla,citrina,"Latham, 1790",Orange-headed Thrush,biological,b1e4f65f-55d7-4074-af7d-561adb1e5524,Orange-headed Thrush,Least Concern
2,2003818,Mammalia,Carnivora,Viverridae,Paradoxurus,hermaphroditus,"Pallas, 1777",Common Palm Civet,biological,20e0c8d9-c445-44b5-87fc-d5bb9724e874,Asian Palm Civet,Least Concern
3,2005874,Aves,Galliformes,Phasianidae,Arborophila,rufogularis,"Blyth, 1850",Rufous-throated Partridge,biological,843ab0d1-e748-4fdd-957e-59357e99db01,Rufous-throated Partridge,Least Concern
4,2019996,Mammalia,Carnivora,Mustelidae,Melogale,,,Melogale Species,biological,a6bdb282-2d0c-4f63-866b-e60ffa410d19,Ferret-badger,Not Applicable
...,...,...,...,...,...,...,...,...,...,...,...,...
134,2021544,Mammalia,Artiodactyla,Suidae,Sus,scrofa scrofa,,Domestic Pig,biological,c150a21e-952d-4665-8a62-a319841c5a56,Domestic Pig,Not Applicable
135,2015372,Aves,Passeriformes,Muscicapidae,Ficedula,tricolor,"Hodgson, 1845",Slaty-blue Flycatcher,biological,f08c53c7-05ae-46b7-9017-f336a3e2f772,Slaty-blue Flycatcher,Least Concern
136,2003975,Mammalia,Rodentia,Sciuridae,Petaurista,philippensis,"Elliot, 1839",Indian Giant Flying Squirrel,biological,7fe7e31a-154d-424c-89f9-c46b4a34bcd4,Indian Giant Flying Squirrel,Least Concern
137,2014555,Aves,Passeriformes,Leiotrichidae,Garrulax,chinensis,"Scopoli, 1786",Black-throated Laughingthrush,biological,3189052a-b57f-421f-a251-873f5e93d76d,Black-throated Laughingthrush,Least Concern
