In [1]:
# Module imports 
import pandas as pd
import os
import json

import dotenv

# Auto reload your development packages
%load_ext autoreload
%autoreload 2

# Load secrets and config from optional .env file
dotenv.load_dotenv()

True

In [2]:
# Load the datasets 
train_stats_df = pd.read_csv(os.getenv("TRAIN_STATS"))
test_stats_df = pd.read_csv(os.getenv("TEST_STATS"))
gbif_moth_checklist_df = pd.read_csv(os.getenv("GLOBAL_MOTH_CHECKLIST"))

In [3]:
# Filter rows in gbif_moth_checklist where 'acceptedTaxonKey' is present in train_stats's 'accepted_taxon_key'
gbif_moth_checklist_df = gbif_moth_checklist_df[gbif_moth_checklist_df["taxonomicStatus"]=="ACCEPTED"]
trained_gbif_moth_checklist_df = gbif_moth_checklist_df[
    gbif_moth_checklist_df['acceptedTaxonKey'].isin(train_stats_df['accepted_taxon_key'])
]

In [4]:
# Add accuracy columns to the checklist
trained_gbif_moth_checklist_df = trained_gbif_moth_checklist_df.assign(
    gbif_train_imgs = 0,
    gbif_test_imgs = 0,
    gbif_test_top1_acc = 0.0,
    traps_test_imgs = 0,
    traps_test_top1_acc = 0.0,
    traps_test_top5_acc = 0.0
)


In [5]:
## Add GBIF images and accuracy number ##
checklist_df = trained_gbif_moth_checklist_df.copy()

# Add number of training images
for _, row in train_stats_df.iterrows():
    key, imgs = row["accepted_taxon_key"], row["num_gbif_train_images"]
    checklist_df.loc[checklist_df["acceptedTaxonKey"]==key, "gbif_train_imgs"] = imgs 

# Add number of test images
for _, row in test_stats_df.iterrows():
    key, imgs = row["accepted_taxon_key"], row["num_gbif_test_images"]
    checklist_df.loc[checklist_df["acceptedTaxonKey"]==key, "gbif_test_imgs"] = imgs 

# Add test accuracy
with open(os.getenv("GBIF_TEST_ACC"), "r") as file:
    gbif_test_acc = json.load(file)
for key in gbif_test_acc.keys():
    imgs = gbif_test_acc[key][1]
    top1_acc = round(gbif_test_acc[key][0]/imgs*100, 2)
    checklist_df.loc[checklist_df["acceptedTaxonKey"]==int(float(key)), "gbif_test_top1_acc"] = top1_acc

In [6]:
## Add AMI-Traps images and accuracy number ##
# Load AMI-Traps accuracy file
with open(os.getenv("AMI_TRAPS_ACC"), "r") as file:
    ami_traps_acc = json.load(file)
ami_traps_acc_sp = ami_traps_acc["SPECIES"]

# Append the image and accuracy numbers to the checklist
for sp in ami_traps_acc_sp.keys():
    imgs = ami_traps_acc_sp[sp][2]
    top1_acc = round(ami_traps_acc_sp[sp][0]/imgs*100, 2)
    top5_acc = round(ami_traps_acc_sp[sp][1]/imgs*100, 2)
    checklist_df.loc[checklist_df["species"]==sp, ["traps_test_imgs", "traps_test_top1_acc", "traps_test_top5_acc"]] = [imgs, top1_acc, top5_acc]


In [7]:
# Save the checklist to disk
checklist_df.to_csv(os.getenv("GLOBAL_MOTH_CHECKLIST_ACC"), index=False)