# 2. Fetch Taxon Keys

This script is used to quickly run fetch_taxon_keys code to get the taxonomic keys for a given species list.

Instead of having to run `fetch_taxon_keys` through the terminal while providing it with arguments, this notebook uses SimpleNamespace module to create the `args` variable and passi to `fetch_taxon_keys`

In [None]:
from types import SimpleNamespace

import fetch_taxon_keys
import pandas as pd
import os

In [None]:
from datetime import datetime
loc_date = 'Leeds' + datetime.today().strftime('%d%m%Y')
print(loc_date)

## Kenya and Uganda

In [None]:
# Fetch the taxonomic keys
args = SimpleNamespace(
    species_filepath="../species_checklists/kenya_uganda-gbif-moths-preprocessed.csv",
    column_name_species="species_name_provided",
    column_name_authority="authority_name_provided",
    output_filepath="../species_checklists/kenya-uganda-moths-keys.csv",
    place=loc_date,
    use_multithreading=True
)

fetch_taxon_keys.save_taxon_keys(args)

## Anguilla

In [None]:
# Fetch the taxonomic keys
args = SimpleNamespace(
    species_filepath="../species_checklists/anguilla-moths-preprocessed.csv",
    column_name_species="species_name_provided",
    column_name_authority="authority_name_provided",
    output_filepath="../species_checklists/anguilla-moths-keys.csv",
    place=loc_date,
    use_multithreading=True
)

fetch_taxon_keys.save_taxon_keys(args)

## Costa Rica

In [None]:
# Fetch the taxonomic keys
args = SimpleNamespace(
    species_filepath="../species_checklists/costarica-moths-preprocessed.csv",
    column_name_species="species_name_provided",
    column_name_authority="authority_name_provided",
    output_filepath="../species_checklists/costarica-moths-keys2.csv",
    place=loc_date,
    use_multithreading=True
)

fetch_taxon_keys.save_taxon_keys(args)

## Thailand

In [None]:
# Fetch the taxonomic keys
args = SimpleNamespace(
    species_filepath="../species_checklists/thailand-moths-preprocessed.csv",
    column_name_species="Species",
    column_name_authority="authority_name_provided",
    output_filepath="../species_checklists/thailand-moths-keys.csv",
    place=loc_date,
    use_multithreading=True
)

fetch_taxon_keys.save_taxon_keys(args)

## Madagascar

In [None]:
# list all the files matching "../species_checklists/madagascar-moths-preprocessed-part*.csv"
files = os.listdir("../species_checklists/")
files = [f for f in files if f.startswith("madagascar-moths-preprocessed-part") and f.endswith(".csv")]

for f in files:
    print(f)
    # Fetch the taxonomic keys
    args = SimpleNamespace(
        species_filepath="../species_checklists/" + f ,
        column_name_species="Species",
        column_name_authority="Authority",
        output_filepath="../species_checklists/" + f.replace("preprocessed", "keys"),
        place=loc_date,
        use_multithreading=True
    )

    fetch_taxon_keys.save_taxon_keys(args)

In [None]:
# read in and combine "../species_checklists/madagascar-moths-keys*.csv"
files = os.listdir("../species_checklists/")
files = [f for f in files if f.startswith("madagascar-moths-keys-part") and f.endswith(".csv")]

# read in all files and combine into one df
df_reads = []
for f in files:
    df = pd.read_csv("../species_checklists/" + f)
    df_reads.append(df)

madagascar_moths = pd.concat(df_reads, ignore_index=True)
madagascar_moths.head()

In [None]:
madagascar_moths.to_csv("../species_checklists/madagascar-moths-keys.csv", index=False)

# Check the outputs

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('~/amber/projects/gbif_download_standalone/species_checklists/anguilla-moths-keys.csv')
df['match_type'].value_counts()/df.shape[0]*100

# df.head()

In [None]:
df = pd.read_csv('~/amber/projects/gbif_download_standalone/species_checklists/thailand-moths-keys.csv')
df['match_type'].value_counts()/df.shape[0]*100

In [None]:
df = pd.read_csv('~/amber/projects/gbif_download_standalone/species_checklists/madagascar-moths-keys.csv')
df['match_type'].value_counts()/df.shape[0]*100