In [1]:
import pandas as pd
import taxonmatch as txm

In [None]:
# Suppress warnings from the requests library
logging.getLogger("requests").setLevel(logging.ERROR)

In [2]:
iucn_code = txm.find_dataset_id_by_name("iucn")

Title: The IUCN Red List of Threatened Species, ID: 19491596-35ae-4a91-9a98-85cf505f1bd3


In [3]:
iucn_dataset = txm.download_gbif_taxonomy(source = iucn_code)

GBIF backbone taxonomy data already downloaded.
Processing samples...
Done.


In [4]:
ncbi_dataset = txm.download_ncbi_taxonomy()

NCBI taxonomy data already downloaded.
Processing samples...
Done.


In [5]:
iucn_arthropoda = iucn_dataset[0][iucn_dataset[0].phylum == "Arthropoda"]

In [8]:
df_with_iucn_status = add_iucn_status_column(iucn_arthropoda)

Progress: 100.00%


In [9]:
df_with_iucn_status

Unnamed: 0,taxonID,datasetID,parentNameUsageID,acceptedNameUsageID,canonicalName,taxonRank,taxonomicStatus,kingdom,phylum,class,order,family,genus,gbif_taxonomy,gbif_taxonomy_ids,iucnRedListCategory
56234,11441042,19491596-35ae-4a91-9a98-85cf505f1bd3,1926040,,Durbaniella clarki phaea,subspecies,accepted,Animalia,Arthropoda,Insecta,Lepidoptera,Lycaenidae,Durbaniella,arthropoda;insecta;lepidoptera;lycaenidae;durb...,11441042,LEAST_CONCERN
56236,11840699,19491596-35ae-4a91-9a98-85cf505f1bd3,1926040,,Durbaniella clarki clarki,subspecies,accepted,Animalia,Arthropoda,Insecta,Lepidoptera,Lycaenidae,Durbaniella,arthropoda;insecta;lepidoptera;lycaenidae;durb...,11840699,LEAST_CONCERN
56237,11778839,19491596-35ae-4a91-9a98-85cf505f1bd3,1926040,,Durbaniella clarki jenniferae,subspecies,accepted,Animalia,Arthropoda,Insecta,Lepidoptera,Lycaenidae,Durbaniella,arthropoda;insecta;lepidoptera;lycaenidae;durb...,11778839,LEAST_CONCERN
79584,5791081,19491596-35ae-4a91-9a98-85cf505f1bd3,4644568,,Sundathelphusa leschenaultii,species,accepted,Animalia,Arthropoda,Malacostraca,Decapoda,Gecarcinucidae,Sundathelphusa,arthropoda;malacostraca;decapoda;gecarcinucida...,5791081,DATA_DEFICIENT
81296,5791055,19491596-35ae-4a91-9a98-85cf505f1bd3,5791054,,Cylindrothelphusa steniops,species,accepted,Animalia,Arthropoda,Malacostraca,Decapoda,Gecarcinucidae,Cylindrothelphusa,arthropoda;malacostraca;decapoda;gecarcinucida...,5791054;5791055,LEAST_CONCERN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7636077,4449486,19491596-35ae-4a91-9a98-85cf505f1bd3,4449483,,Leipaspis lauricola,species,accepted,Animalia,Arthropoda,Insecta,Coleoptera,Trogossitidae,Leipaspis,arthropoda;insecta;coleoptera;trogossitidae;le...,4449486,VULNERABLE
7645391,10723220,19491596-35ae-4a91-9a98-85cf505f1bd3,4406444,,Nemozoma breviatum,species,accepted,Animalia,Arthropoda,Insecta,Coleoptera,Trogossitidae,Nemozoma,arthropoda;insecta;coleoptera;trogossitidae;ne...,10723220,DATA_DEFICIENT
7646407,4449458,19491596-35ae-4a91-9a98-85cf505f1bd3,4405178,,Grynocharis oblonga,species,accepted,Animalia,Arthropoda,Insecta,Coleoptera,Trogossitidae,Grynocharis,arthropoda;insecta;coleoptera;trogossitidae;gr...,4449458,LEAST_CONCERN
7676646,11370760,19491596-35ae-4a91-9a98-85cf505f1bd3,1424939,,Rhionaeschna serrania,species,accepted,Animalia,Arthropoda,Insecta,Odonata,Aeshnidae,Rhionaeschna,arthropoda;insecta;odonata;aeshnidae;rhionaesc...,11370760,LEAST_CONCERN


In [57]:
def load_a3cat_dataframe(ncbi_filterd):
    url = 'https://a3cat.unil.ch'
    
    # Fare la richiesta HTTP al sito web
    response = requests.get(url)
    response.raise_for_status()  # Assicurarsi che la richiesta abbia avuto successo

    # Fare il parsing del contenuto HTML
    soup = BeautifulSoup(response.content, 'html.parser')

    # Trovare l'elemento con l'ID specificato
    version_tag = soup.find(id="header-version")
    
    if version_tag:
        # Estrarre la versione
        version = version_tag.text.strip()
        # Estrarre la data dalla versione
        date = version.split('v.')[1]
        # Generare il link del file
        download_link = f"{url}/data/a3cat/{date}.tsv"
        
        # Scaricare il file e caricare i dati in un DataFrame pandas
        df = pd.read_csv(download_link, sep='\t')
        a3cat = ncbi_filtered[ncbi_filtered.ncbi_id.astype(int).isin(df.TaxId)]
        return a3cat
    else:
        print("Last version not found, please download the dataset manually")
        return None



In [58]:
# Utilizzo della funzione
a3cat = load_a3cat_dataframe()

In [59]:
a3cat

Unnamed: 0,ncbi_id,ncbi_lineage_names,ncbi_lineage_ids,ncbi_canonicalName,ncbi_rank,ncbi_lineage_ranks,ncbi_target_string
5338,6661,cellular organisms;Eukaryota;Opisthokonta;Meta...,131567;2759;33154;33208;6072;33213;33317;12067...,Artemia franciscana,species,no rank;superkingdom;clade;kingdom;clade;clade...,arthropoda;branchiopoda;anostraca;artemiidae;a...
5343,6669,cellular organisms;Eukaryota;Opisthokonta;Meta...,131567;2759;33154;33208;6072;33213;33317;12067...,Daphnia pulex,species,no rank;superkingdom;clade;kingdom;clade;clade...,arthropoda;branchiopoda;diplostraca;daphniidae...
5352,6678,cellular organisms;Eukaryota;Opisthokonta;Meta...,131567;2759;33154;33208;6072;33213;33317;12067...,Balanus nubilus,species,no rank;superkingdom;clade;kingdom;clade;clade...,arthropoda;thecostraca;balanomorpha;balanidae;...
5360,6687,cellular organisms;Eukaryota;Opisthokonta;Meta...,131567;2759;33154;33208;6072;33213;33317;12067...,Penaeus monodon,species,no rank;superkingdom;clade;kingdom;clade;clade...,arthropoda;malacostraca;decapoda;penaeidae;pen...
5362,6689,cellular organisms;Eukaryota;Opisthokonta;Meta...,131567;2759;33154;33208;6072;33213;33317;12067...,Penaeus vannamei,species,no rank;superkingdom;clade;kingdom;clade;clade...,arthropoda;malacostraca;decapoda;penaeidae;pen...
...,...,...,...,...,...,...,...
2563734,3098083,cellular organisms;Eukaryota;Opisthokonta;Meta...,131567;2759;33154;33208;6072;33213;33317;12067...,Chrysina gloriosa,species,no rank;superkingdom;clade;kingdom;clade;clade...,arthropoda;insecta;coleoptera;scarabaeidae;chr...
2565541,3102772,cellular organisms;Eukaryota;Opisthokonta;Meta...,131567;2759;33154;33208;6072;33213;33317;12067...,Druceiella hillmani,species,no rank;superkingdom;clade;kingdom;clade;clade...,arthropoda;insecta;lepidoptera;hepialidae;druc...
2568333,3109984,cellular organisms;Eukaryota;Opisthokonta;Meta...,131567;2759;33154;33208;6072;33213;33317;12067...,Polylopha cassiicola,species,no rank;superkingdom;clade;kingdom;clade;clade...,arthropoda;insecta;lepidoptera;tortricidae;pol...
2572787,3116773,cellular organisms;Eukaryota;Opisthokonta;Meta...,131567;2759;33154;33208;6072;33213;33317;12067...,Rosalia funebris,species,no rank;superkingdom;clade;kingdom;clade;clade...,arthropoda;insecta;coleoptera;cerambycidae;ros...
