# Preprocess the species lists

Short script to preprocess the species checklist.
Will be modified for each incoming species checklist, depending on what must be done. 

The aim is to transform the column names of the checklist, so that: 
- The column with species name is called "species_name_provided"
- The column with the authority is called "authority_name_provided". If such column doesn't exist, it should be created and left blank.
- The authority column is formatted as "Lastname, year" 

In [None]:
import pandas as pd
import os
import requests
from bs4 import BeautifulSoup

To date we have have sources for: 
- Sinapore
- Costa Rica
- The UK
- Thailand
- Madagascar
- Anguilla
- Kenya and Uganda
- Japan
- Nigeria
- Namibia

## Functions to curate data frames 

In [None]:
def split_dataframe(df, n, output_dir, list_name):
    split_size = len(df) // n
    for i in range(n):
        start_idx = i * split_size
        # Ensure the last part includes any remaining rows
        end_idx = (i + 1) * split_size if i < n - 1 else len(df)
        df_part = df.iloc[start_idx:end_idx]
        file_path = os.path.join(output_dir, f"{list_name}-moths-preprocessed-part{i + 1}.csv")
        df_part.to_csv(file_path, index=False)
        print(f"Saved part {i + 1} to {file_path}")

In [None]:
# For wiki sources, there are some entries formatted with repeating words.
# Here is a function to remove duplicate words in cells with exactly 3 words
def clean_repeated_words(cell):
    cell = cell.replace(',', '')

    words = cell.split()
    if len(words) > 2:
        unique_words = list(dict.fromkeys(words))  # Removes duplicates, preserving order
        return " ".join(unique_words)
    return cell

def clean_species_words(df, colname='Species'):
    df[colname] = df[colname].apply(clean_repeated_words)
    return(df)

In [None]:
def scrape_wikipedia_to_csv(url):
    # Send an HTTP GET request to fetch the content
    response = requests.get(url)
    response.raise_for_status()  # Check for request errors

    # Parse the HTML content with BeautifulSoup
    soup = BeautifulSoup(response.text, "html.parser")

    # Find all <div> elements with class "mw-heading mw-heading2"
    families = soup.find_all("div", class_="mw-heading2")

    # Dictionary to store each heading and its corresponding list items
    data = pd.DataFrame()

    # Loop through each heading and capture the associated bullet points
    for family in families:
        # Get the heading text
        family_text = family.get_text(strip=True).replace('[edit]', '')

        if family_text in ['Contents', 'References', 'See also']:
            continue

        # Find the next <ul> element (the bullet list after the heading)
        bullet_list = family.find_next("ul")

        # Collect list items if a <ul> is found
        items = []
        if bullet_list:
            auth = []
            species = []
            for li in bullet_list.find_all("li"):

                spec = [x.get_text(strip=True).strip() for x in li.find_all("a")]

                if spec == []:
                    spec = ['formatting error']
                    spec_auth = ['formatting error']
                else:
                    spec = [spec[0]]
                    spec_auth = [li.get_text(strip=True).replace(str(spec[0]), ' ').split('—', 1)[-1].strip().strip("()")]
                    species = species + spec

                    if spec_auth == []:
                        spec_auth = ['formatting error']

                    auth = auth + spec_auth

                    if len(auth) != len(species):
                        print(spec)



        df_dict = {'Family': [family_text] * len(auth), 'Genus': [''] * len(auth), 'Species': species, 'Authority': auth}
        fam_df = pd.DataFrame(df_dict)
        data = pd.concat([data, fam_df])

    data = data.loc[data['Species'] != "formatting error", ]
    data['Source'] = 'wiki'
    data = clean_species_words(data)

    return data

In [None]:
# we will want to remove all butterflies from the lists. These are the butterfly families:
butterfly_families = ['Papilionidae', 'Nymphalidae', 'Pieridae', 'Lycaenidae', 'Riodinidae', 'Hesperiidae']

def remove_butterflies(df, family_column='family'):
    og_count = df.shape[0]
    df = df[~df[family_column].isin(butterfly_families)]
    print(f'Removing {og_count - df.shape[0]} butterfly species.')
    return df

def clean_gbif_download(df):
    df = df.loc[df['order'] == 'Lepidoptera', ]

    # create a column made up from the third word in column onwards
    df['Authority'] = df['scientificName'].str.split().str[2:].str.join(' ')

    df["species_name_provided"] = df["species"].fillna('')
    df["authority_name_provided"] = df['Authority']

    df = remove_butterflies(df)

    df['Source'] = 'gbif'

    df = df[['family', 'genus', 'species', 'Authority', 'Source']]
    df.columns = ['Family', 'Genus', 'Species', 'Authority', 'Source']

    return(df)


## Singapore

From https://github.com/AMI-system/gbif_download_standalone/issues/22

In [None]:
# Read the species checklist
checklist_name = "singapore-moths"

df = pd.read_csv(os.path.join("../species_checklists", checklist_name+".csv"),
                 sep=',', encoding='latin-1')

df['Species'] = df['Scientific Name']
df['species_name_provided'] = df['Species']
df['authority_name_provided'] = df['Authority']

df['Genus'] = ''
df.head()

In [None]:
df = remove_butterflies(df, 'Family')

In [None]:
df.to_csv(os.path.join("../species_checklists/",
                       checklist_name+"-preprocessed.csv"),
          index=False)

## Costa Rica

From: https://github.com/AMI-system/species_classifier/issues/28

In [None]:
# Read the species checklist
checklist_name = "costarica-moths"

df = pd.read_csv(os.path.join("../species_checklists", checklist_name+".csv"),
                 sep=',', encoding='latin-1')

df.columns = ['Family', 'Genus', 'Species', 'Subspecies']

In [None]:
df.head()

In [None]:
# Combine costa rica column names
df["species_name_provided"] = df["Genus"].fillna('') + " " + df["Species"].fillna('')

df["authority_name_provided"] = ""

In [None]:
# Edit the column names to remove [] and ()
df['authority_name_provided'] = df['authority_name_provided'].replace('[\(\)\[\]]', '', regex=True)

In [None]:
print(df.shape)
df = remove_butterflies(df, 'Family')
print(df.shape)

In [None]:
df.head()

In [None]:
df.to_csv(os.path.join("../species_checklists/",
                       checklist_name+"-preprocessed.csv"),
          index=False)

## United Kingdom

From: https://github.com/AMI-system/species_classifier/issues/4

In [None]:
# Read the species checklist
checklist_name = "uksi-moths"

df = pd.read_csv(os.path.join("../species_checklists", checklist_name+".csv"),
                 sep=',', encoding='latin-1')

df["Genus"] = ''

In [None]:
df.head()

In [None]:
# Combine costa rica column names
df["species_name_provided"] = df["taxon"].fillna('')

df["authority_name_provided"] = df['preferred_authority'].replace('[\(\)\[\]]', '', regex=True)

In [None]:
df = remove_butterflies(df, 'family_taxon')

In [None]:
df.head()

In [None]:
df.to_csv(os.path.join("../species_checklists/", checklist_name+"-preprocessed.csv"),
          index=False)

## Thailand

From Two sources: 

1. GBIF: https://www.gbif.org/occurrence/search?basis_of_record=HUMAN_OBSERVATION&basis_of_record=PRESERVED_SPECIMEN&country=TH&taxon_key=797&occurrence_status=present

    GBIF.org (18 August 2025) GBIF Occurrence Download https://doi.org/10.15468/dl.sfrd8f

2. Update 18/08/2025: Species list from deployment partners. See email on 6th August titled: 'Re: AMBER updates'

In [None]:
# Read the species checklist
checklist_name = "thailand-moths"

df1 = pd.read_csv(os.path.join("/home/users/katriona/gbif_download_standalone/species_checklists/thailand-gbif-moths.tsv"),
                sep='\t', encoding='latin-1')

df1.head()

In [None]:
# tidy the species column
df1['scientific_name'] = df1['scientificName'].astype(str)
df1 = clean_species_words(df1, 'scientific_name')

# inspect species with > 2 words still:
df1.loc[df1['scientific_name'].str.split().str.len() > 2, 'scientific_name']

These appear to be subspecies - we will look at the species level

In [None]:
# remove duplicates in the scientific_name column
df1 = df1.drop_duplicates(subset='scientific_name')

# only keep rows where the scientific_name is two words or more and keep the first two (species, instead of subspecies)
df1 = df1[df1['scientific_name'].str.split().str.len() > 1]
df1['scientific_name'] = df1['scientific_name'].str.split().str[:2].str.join(' ')

#df = df[['scientific_name', 'taxon_id']]
df1['Species']  = df1['scientific_name']
df1['Genus']  = df1['genus']

df1["species_name_provided"] = df1["Species"].fillna('')
df1["authority_name_provided"] = ""

In [None]:
df1 = remove_butterflies(df1, 'family')
df1['source'] = 'gbif'

### Updates species list

In [None]:
# Read the species checklist
df2 = pd.read_csv(os.path.join("/home/users/katriona/gbif_download_standalone/species_checklists/Thailand_moth_species_update.tsv"),
                sep='\t', encoding='latin-1')

df2.head()

In [None]:
# tidy the species column
df2['scientific_name'] = df2['species'].astype(str)


# if scientific name ends with ', YYYY' remove the last two words
df2['scientific_name'] = df2['scientific_name'].str.replace(r', \d{4}$', '', regex=True)

# remove anything in brackets from scientific name
df2['scientific_name'] = df2['scientific_name'].str.replace(r'\(.*?\)', '', regex=True)

df2 = clean_species_words(df2, 'scientific_name')

# inspect species with > 2 words still:
df2.loc[df2['scientific_name'].str.split().str.len() > 2, 'scientific_name']

In [None]:
df2.head()

In [None]:
# remove duplicates in the scientific_name column
df2 = df2.drop_duplicates(subset='scientific_name')

# only keep rows where the scientific_name is two words or more and keep the first two (species, instead of subspecies)
df2 = df2[df2['scientific_name'].str.split().str.len() > 1]
df2['scientific_name'] = df2['scientific_name'].str.split().str[:2].str.join(' ')

#df = df[['scientific_name', 'taxon_id']]
df2['Species']  = df2['scientific_name']


df2["species_name_provided"] = df2["Species"].fillna('')
df2["authority_name_provided"] = ""

In [None]:
df2 = remove_butterflies(df2, 'family')
df2['source'] = 'updated list'

In [None]:
common_cols = df1.columns.intersection(df2.columns)
common_cols

In [None]:
df = pd.concat([df1[common_cols], df2[common_cols]], ignore_index=True)

In [None]:
print(df['source'].value_counts())

df = df.drop_duplicates(subset=['Species'], keep='last')

print(df['source'].value_counts())

In [None]:
df.to_csv(os.path.join("../species_checklists/",
                       checklist_name+"-preprocessed.csv"),
          index=False)

## Madagascar

This comes from two sources: 
1. Moths from GBIF using the filter: 
   https://www.gbif.org/occurrence/download?continent=AFRICA&country=MG&taxon_key=797&advanced=1&dataset_name=iNaturalist%20Research-grade%20Observations

    species list download
    citation: GBIF.org (13 November 2024) GBIF Occurrence Download  https://doi.org/10.15468/dl.2naum6
   
3. From Wikipedia: https://en.wikipedia.org/wiki/List_of_moths_of_Madagascar

### 1. From GBIF

In [None]:
# Read the species checklist
checklist_name = "madagascar"

mad_df1 = pd.read_csv(os.path.join("../species_checklists",
                            checklist_name+"_gbif_moths.csv"),
                sep='\t', encoding='latin-1')

mad_df1 = clean_gbif_download(mad_df1)
mad_df1.head()

### From Wikipedia

In [None]:
# Wikipedia URL for the moth species by family
wikipedia_url = 'https://en.wikipedia.org/wiki/List_of_moths_of_Madagascar'

# mad_df2 = scrape_wikipedia_to_csv(wikipedia_url)
mad_df2 = scrape_wikipedia_to_csv(wikipedia_url)
mad_df2.head()

### Combine

In [None]:
# combine df and mad_df2
df = pd.concat([mad_df1, mad_df2], ignore_index=True)
df.head()

In [None]:
print(df.shape)

# remove duplicated rows based on family, genus and species
df = df.drop_duplicates(subset=['Family', 'Genus', 'Species'])
df = df.dropna(subset=['Species'])
df = remove_butterflies(df, 'Family')

df["species_name_provided"] = df["Species"].fillna('')
df["authority_name_provided"] = df['Authority']
df['Subfamily'] = ""
df["GBIF accepted name"] = df["Species"].fillna('')

df = df[['Family', 'Subfamily', 'Species', 'Genus', 'GBIF accepted name', 'species_name_provided', 'authority_name_provided']]

print(df.shape)

In [None]:
df.head()

In [None]:
# split_dataframe(df=df, n=5, output_dir="../species_checklists/", list_name=checklist_name)

df.to_csv(os.path.join("../species_checklists/",
                    checklist_name+"-moths-preprocessed.csv"),
        index=False)

## Anguilla

List provided by David Roy on 21/5/24.
Updated list on 24/10/24

In [None]:
# Read the species checklist
checklist_name = "anguilla-moths"

df = pd.read_csv(os.path.join("../species_checklists",
                            checklist_name+".csv"),
                sep=',', encoding='latin-1')

df["Genus"] = ""

# Combine costa rica column names
df["species_name_provided"] = df["Species"].fillna('')
df["authority_name_provided"] = df['Authority (GBIF)']

df = df[['Family', 'Subfamily', 'Species', 'Genus', 'GBIF accepted name', 'species_name_provided', 'authority_name_provided']]

df.head()

In [None]:
df_update = pd.read_csv(os.path.join("../species_checklists",
                            checklist_name+"_update.csv"),
                sep=',', encoding='latin-1')

df_update["Genus"] = ""
df_update["Family"] = ""
df_update["Subfamily"] = ""
df_update["GBIF accepted name"] = ""

df_update["species_name_provided"] = df_update["Species"].fillna('')
df_update["authority_name_provided"] = ""

df_update.head()

In [None]:
# combine two lists
df_combined = pd.concat([df, df_update[list(df.columns)]])

df_combined.head()

In [None]:
# look for problematic entries
df_combined.loc[df_combined['species_name_provided'].str.contains('sp\\.'), 'species_name_provided'] = df_combined.loc[df_combined['species_name_provided'].str.contains('sp\\.'), 'species_name_provided'].str.replace(' sp.', '')

In [None]:
print(df_combined.shape)
df_combined = df_combined.drop_duplicates(subset='Species', keep="first")
print(df_combined.shape)

In [None]:
df_combined = remove_butterflies(df_combined, 'Family')

In [None]:
df_combined.to_csv(os.path.join("../species_checklists/",
                       checklist_name+"-preprocessed.csv"),
          index=False)

## Kenya and Uganda

### GBIF download

From: https://www.gbif.org/occurrence/download?continent=AFRICA&country=KE&country=UG&taxon_key=797&advanced=1&dataset_name=iNaturalist%20Research-grade%20Observations

species list download
citation: GBIF.org (13 November 2024) GBIF Occurrence Download https://doi.org/10.15468/dl.xuu4h2

In [None]:
# Read the species checklist from GBIF
checklist = 'kenya-uganda'
ku_df1 = pd.read_csv(os.path.join("../species_checklists", checklist + "_gbif_moths.csv"),
                sep='\t', encoding='latin-1')

ku_df1 = clean_gbif_download(ku_df1)

ku_df1.head()

### From Wikipedia

In [None]:
kenya_url = 'https://en.wikipedia.org/wiki/List_of_moths_of_Kenya'
uganda_url = 'https://en.wikipedia.org/wiki/List_of_moths_of_Uganda'

kenya_df_wiki = scrape_wikipedia_to_csv(kenya_url)
uganda_df_wiki = scrape_wikipedia_to_csv(uganda_url)

ku_df2 = pd.concat([kenya_df_wiki, uganda_df_wiki], ignore_index=True)
ku_df2.head()

### Combine dfs

In [None]:
df = pd.concat([ku_df1, ku_df2], ignore_index=True)

df["species_name_provided"] = df["Species"].fillna('')
df["authority_name_provided"] = df['Authority']
df['Subfamily'] = ""
df["GBIF accepted name"] = df["Species"].fillna('')

df = df[['Family', 'Subfamily', 'Species', 'Genus', 'GBIF accepted name', 'species_name_provided', 'authority_name_provided']]

df = remove_butterflies(df, 'Family')

In [None]:
df.to_csv(os.path.join("../species_checklists/",
                    checklist_name+"-moths-preprocessed.csv"),
        index=False)

df.head()

## Japan

### GBIF download

From: https://www.gbif.org/occurrence/download?continent=ASIA&country=JA&taxon_key=797&advanced=1&dataset_name=iNaturalist%20Research-grade%20Observations

species list download
citation: GBIF.org (13 November 2024) GBIF Occurrence Download https://doi.org/10.15468/dl.dvekqv

In [None]:
# Read the species checklist from GBIF
checklist_name='japan'
jp_df1 = pd.read_csv(os.path.join("../species_checklists", checklist_name + "-gbif-moths.tsv"),
                sep='\t', encoding='latin-1')

jp_df1 = clean_gbif_download(jp_df1)
jp_df1.head()

### From Wikipedia

In [None]:
japan_urls = ['https://en.wikipedia.org/wiki/List_of_moths_of_Japan_(Noctuoidea)',
             'https://en.wikipedia.org/wiki/List_of_moths_of_Japan_(Bombycoidea-Geometroidea)',
             'https://en.wikipedia.org/wiki/List_of_moths_of_Japan_(Pyraloidea-Drepanoidea)',
             'https://en.wikipedia.org/wiki/List_of_moths_of_Japan_(Choreutoidea-Thyridoidea)',
             'https://en.wikipedia.org/wiki/List_of_moths_of_Japan_(Zygaenoidea-Tortricoidea)',
             'https://en.wikipedia.org/wiki/List_of_moths_of_Japan_(Gelechioidea)',
             'https://en.wikipedia.org/wiki/List_of_moths_of_Japan_(Micropterigoidea-Yponomeutoidea)'] #'https://en.wikipedia.org/wiki/List_of_moths_of_Japan'

jp_df2 = pd.DataFrame()

for url in japan_urls:
    print(url)


    japan_df_wiki = scrape_wikipedia_to_csv(url)

    jp_df2 = pd.concat([jp_df2, japan_df_wiki], ignore_index=True)

jp_df2.head()

### From Jenna's list

Additional source from: http://listmj.mothprog.com/list.html

In [None]:
url = 'http://listmj.mothprog.com/list.html'

# Send a GET request to the URL
response = requests.get(url)
response.raise_for_status()

# Parse the HTML content
soup = BeautifulSoup(response.text, 'html.parser')

In [None]:
# Initialize lists to store data
families, genera, species_list = [], [], []

current_family = None
current_genus = None

# Loop through each element in the parsed HTML
for tag in soup.find_all(True):
    if 'family' in tag.get("class", []):
        # Update the current family
        current_family = tag.find("span", class_="highername").get_text(strip=True)
    elif 'genus' in tag.get("class", []):
        # Update the current genus
        if tag.find("span", class_="genusname") is not None:
            current_genus = tag.find("span", class_="genusname").get_text(strip=True)
        else:
            current_genus = 'undefined'
    elif 'species' in tag.get("class", []):
        # Treat anything else as a species if within a family and genus
        if tag.find("span", class_="sciname") is not None:
            species_name = tag.find("span", class_="sciname").get_text(strip=True)   # take species name before any additional text
        else:
            species_name = 'Undefined: ' + tag.get_text(strip=True)

        families.append(current_family)
        genera.append(current_genus)
        species_list.append(species_name)

# Create DataFrame
jp_df3 = pd.DataFrame({
    "Family": families,
    "Genus": genera,
    "Species": species_list
})

jp_df3['Authority'] = ''
jp_df3['Source'] = 'List-MJ'

jp_df3

### Combine df1, df2 and df3

In [None]:
df = pd.concat([jp_df2, jp_df1, jp_df3], ignore_index=True)
print(df['Source'].value_counts())

# remove duplicated rows based on family, and species
df = df.drop_duplicates(subset=['Family', 'Species'], keep='last')

# remove rows with missing species names
df = df.dropna(subset=['Species'])

df = remove_butterflies(df, 'Family')
print(df['Source'].value_counts())

In [None]:
df.loc[df['Species'] == 'Neodrymonia marginata']

In [None]:
df['Species'] = df['Species'].str.split().str[:2].str.join(' ')
df.head()

In [None]:
# Combine column names
df["species_name_provided"] = df["Species"].fillna('')
df["authority_name_provided"] = df['Authority']
df['Subfamily'] = ""
df["GBIF accepted name"] = df["Species"].fillna('')

df = df[['Family', 'Subfamily', 'Species', 'Genus', 'GBIF accepted name', 'species_name_provided', 'authority_name_provided']]

In [None]:
print(df.shape)
df = df[df['Genus'] != "undefined"]
print(df.shape)

In [None]:
df.to_csv(os.path.join("../species_checklists/",
                    checklist_name+"-moths-preprocessed.csv"),
        index=False)

In [None]:
df.head()

## Nigeria

#### GBIF download

From: https://www.gbif.org/occurrence/download?continent=AFRICA&country=NG&taxon_key=797&advanced=1&dataset_name=iNaturalist%20Research-grade%20Observations

species list download
citation: GBIF.org (13 November 2024) GBIF Occurrence Download https://doi.org/10.15468/dl.42bm6u 

In [None]:
# Read the species checklist from GBIF
checklist_name="nigeria"
ng_df1 = pd.read_csv(os.path.join("../species_checklists", checklist_name + "-gbif-moths.tsv"),
                sep='\t', encoding='latin-1')

ng_df1 = clean_gbif_download(ng_df1)

ng_df1.head()

#### From Wikipedia

In [None]:
nigeria_url = 'https://en.wikipedia.org/wiki/List_of_moths_of_Nigeria'

ng_df2 = scrape_wikipedia_to_csv(nigeria_url)

ng_df2.head()

#### combine df1 and df2

In [None]:
df = pd.concat([ng_df1, ng_df2], ignore_index=True)
print(df['Source'].value_counts())

# remove duplicated rows based on family, genus and species
df = df.drop_duplicates(subset=['Family', 'Species'], keep='first')

# remove rows with missing species names
df = df.dropna(subset=['Species'])
df = remove_butterflies(df, 'Family')

print(df['Source'].value_counts())

In [None]:
df.head()

In [None]:
# Combine costa rica column names
df["species_name_provided"] = df["Species"].fillna('')
df["authority_name_provided"] = df['Authority']
df['Subfamily'] = ""
df["GBIF accepted name"] = df["Species"].fillna('')


df = df[['Family', 'Subfamily', 'Species', 'Genus', 'GBIF accepted name', 'species_name_provided', 'authority_name_provided']]
df.head()

In [None]:
df.to_csv(os.path.join("../species_checklists/",
                    checklist_name+"-moths-preprocessed.csv"),
        index=False)

## Namibia

#### GBIF download

From: https://www.gbif.org/occurrence/download?continent=AFRICA&country=NA&taxon_key=797&advanced=1&dataset_name=iNaturalist%20Research-grade%20Observations

species list download
citation: GBIF.org (18 August 2025) GBIF Occurrence Download https://doi.org/10.15468/dl.76esw2

In [None]:
# Read the species checklist from GBIF
checklist_name="namibia"
na_df1 = pd.read_csv(os.path.join("/home/users/katriona/gbif_download_standalone/species_checklists", checklist_name + "-gbif-moths.tsv"),
                sep='\t', encoding='latin-1', low_memory=True)
na_df1 = clean_gbif_download(na_df1)

na_df1.head()

### From Wikipedia

Wiki: https://en.wikipedia.org/wiki/List_of_moths_of_Namibia

In [None]:
namibia_url = 'https://en.wikipedia.org/wiki/List_of_moths_of_Namibia'

na_df2 = scrape_wikipedia_to_csv(namibia_url)

na_df2.head()

### Combine df1 and df2

In [None]:
df = pd.concat([na_df1, na_df2], ignore_index=True)
print(df['Source'].value_counts())

# remove duplicated rows based on family, genus and species
df = df.drop_duplicates(subset=['Family', 'Species'], keep='first')

# remove rows with missing species names
df = df.dropna(subset=['Species'])
df = remove_butterflies(df, 'Family')

print(df['Source'].value_counts())

In [None]:
df.head()

In [None]:
# Combine costa rica column names
df["species_name_provided"] = df["Species"].fillna('')
df["authority_name_provided"] = df['Authority']
df['Subfamily'] = ""
df["GBIF accepted name"] = df["Species"].fillna('')


df = df[['Family', 'Subfamily', 'Species', 'Genus', 'GBIF accepted name', 'species_name_provided', 'authority_name_provided']]
df.head()

In [None]:
df.to_csv(os.path.join("../species_checklists/",
                    checklist_name+"-moths-preprocessed.csv"),
        index=False)