In [1]:
import pandas as pd
import numpy as np
import pickle

## 1. Backbone

In [2]:
backbone = pd.read_csv("../../data/external/backbone/Taxon.tsv", sep="\t", on_bad_lines='skip')
backbone = backbone[backbone["taxonRank"]=="species"]
# drop species with no canonical name
backbone = backbone.dropna(subset="canonicalName").set_index("canonicalName")
# and no full taxonomic lineage to the family
#backbone = backbone.dropna(subset=['kingdom', 'phylum', 'class', 'order', 'family'])
backbone = backbone[['taxonomicStatus',
                     'kingdom', 'phylum', 'class', 'order']]

#backbone["numberOfAuthors"] = [0,]*len(backbone.index)

  backbone = pd.read_csv("../../data/external/backbone/Taxon.tsv", sep="\t", on_bad_lines='skip')


## 2. Author counts

In [3]:
# version with dictionary, faster
# get disambiguated, European authors of taxonomic articles
authors = pd.read_pickle("../../data/processed/european_authors_disambiguated_truncated.pkl")

# link the author's expertise to the taxonomic backbone
available_species = set(backbone.index)
species_authors = {}

for subjects in authors["species_subject"]:
    if len(subjects) != 0: 
        for species in subjects:
            if species in available_species:
                if species not in species_authors:
                    species_authors[species] =  1
                else:
                    species_authors[species] += 1

In [4]:
sp_authors_df = pd.DataFrame(species_authors.keys(), species_authors.values()).reset_index()
sp_authors_df.columns = ["nr_authors", "canonicalName"]
sp_authors_df.set_index("canonicalName")

Unnamed: 0_level_0,nr_authors
canonicalName,Unnamed: 1_level_1
Canis aureus,24
Passiflora kikiana,2
Passiflora pachyantha,2
Passiflora killipiana,2
Passiflora guazumaefolia,2
...,...
Cercospora zeina,1
Fusarium pininemorale,1
Ophiostoma ips,1
Terminalia chebula,1


In [5]:
backbone = backbone.merge(sp_authors_df, on="canonicalName", how="left")

## 3. Demand counts

In [14]:
redlist = pd.read_csv("../../data/external/IUCN_eu_region_tax_research_needed/assessments.csv")
cwr = pd.read_excel("../../data/external/crop wild relatives europe.xlsx", skiprows=1)
horizon = pd.read_csv("../../data/external/invasive species on the horizon for Europe.csv", sep="\t")

In [15]:
# get canonical names
#redlist = redlist.rename(columns={"scientificName":"canonicalName"})
cwr["canonicalName"] = [" ".join(x.split()[:2]) for x in cwr["CROP WILD RELATIVE"]]
#horizon = horizon.rename(columns={"Species Name":"canonicalName"})

In [16]:
def count_species(backbone, species_list, countname):
    available_species = set(backbone["canonicalName"])
    species_count = {}

    for species in species_list:
        if species in available_species:
            if species not in species_count:
                species_count[species] =  1
            else:
                species_count[species] += 1
                
    count_df = pd.DataFrame(species_count.keys(), species_count.values()).reset_index()
    count_df.columns = [countname, "canonicalName"]
    count_df.set_index("canonicalName")
    
    backbone = backbone.merge(count_df, on="canonicalName", how="left")
    return backbone

In [17]:
horizon

Unnamed: 0,taxonKey,scientificName,acceptedTaxonKey,acceptedScientificName,numberOfOccurrences,taxonRank,taxonomicStatus,kingdom,kingdomKey,phylum,...,classKey,order,orderKey,family,familyKey,genus,genusKey,species,speciesKey,iucnRedListCategory
0,2768817,Asparagus tenuifolius Lam.,2768817,Asparagus tenuifolius Lam.,17,SPECIES,ACCEPTED,Plantae,6,Tracheophyta,...,196,Asparagales,1169,Asparagaceae,7683,Asparagus,9527393.0,Asparagus tenuifolius,2768817.0,LC
1,2874569,Cucumis sativus L.,2874569,Cucumis sativus L.,2554,SPECIES,ACCEPTED,Plantae,6,Tracheophyta,...,220,Cucurbitales,7224005,Cucurbitaceae,6634,Cucumis,2874568.0,Cucumis sativus,2874569.0,NE
2,2930755,Solanum triquetrum Cav.,2930755,Solanum triquetrum Cav.,3,SPECIES,ACCEPTED,Plantae,6,Tracheophyta,...,220,Solanales,1176,Solanaceae,7717,Solanum,2928997.0,Solanum triquetrum,2930755.0,NE
3,2975139,Vicia narbonensis var. aegyptiaca Asch. & Schw...,2975139,Vicia narbonensis var. aegyptiaca Asch. & Schw...,28,VARIETY,ACCEPTED,Plantae,6,Tracheophyta,...,220,Fabales,1370,Fabaceae,5386,Vicia,2974751.0,Vicia narbonensis,2975132.0,
4,2975196,Vicia cappadocica Boiss.,2975196,Vicia cappadocica Boiss.,34,SPECIES,ACCEPTED,Plantae,6,Tracheophyta,...,220,Fabales,1370,Fabaceae,5386,Vicia,2974751.0,Vicia cappadocica,2975196.0,NE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3702,5373507,Crambe fruticosa L.f.,5373507,Crambe fruticosa L.f.,8,SPECIES,ACCEPTED,Plantae,6,Tracheophyta,...,220,Brassicales,7225535,Brassicaceae,3112,Crambe,3041334.0,Crambe fruticosa,5373507.0,NT
3703,5567951,Beta macrocarpa Guss.,5567951,Beta macrocarpa Guss.,572,SPECIES,ACCEPTED,Plantae,6,Tracheophyta,...,220,Caryophyllales,422,Amaranthaceae,3064,Beta,3083586.0,Beta macrocarpa,5567951.0,NE
3704,7227623,Allium senescens subsp. senescens,7227623,Allium senescens subsp. senescens,2,SUBSPECIES,ACCEPTED,Plantae,6,Tracheophyta,...,196,Asparagales,1169,Amaryllidaceae,7682,Allium,9624496.0,Allium senescens,2855719.0,
3705,7262390,Ficus crassiramea subsp. crassiramea,7262390,Ficus crassiramea subsp. crassiramea,1,SUBSPECIES,ACCEPTED,Plantae,6,Tracheophyta,...,220,Rosales,691,Moraceae,6640,Ficus,2984588.0,Ficus crassiramea,7262389.0,


In [20]:
backbone = count_species(backbone, redlist["scientificName"], "taxonomicResearchNeeded")
backbone = count_species(backbone, cwr["canonicalName"], "cropWildRelatives")
backbone = count_species(backbone, horizon["species"], "horizonInvasives")

backbone

Unnamed: 0,canonicalName,taxonomicStatus,kingdom,phylum,class,order,nr_authors,taxonomicResearchNeeded_x,cropWildRelatives_x,taxonomicResearchNeeded_y,cropWildRelatives_y,horizonInvasives
0,Veronicastrum lungtsuanense,synonym,Plantae,Tracheophyta,Magnoliopsida,Lamiales,,,,,,
1,Thysananthus minor,accepted,Plantae,Marchantiophyta,Jungermanniopsida,Porellales,,,,,,
2,Erigone cristata,homotypic synonym,Animalia,Arthropoda,Insecta,Diptera,,,,,,
3,Ahomana chilensis,accepted,Animalia,Arthropoda,Insecta,Hemiptera,,,,,,
4,Costatoverruca baxteri,accepted,Animalia,Arthropoda,Maxillopoda,Sessilia,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
4976831,Pottia paraguensis,synonym,Plantae,Bryophyta,Bryopsida,Orthotrichales,,,,,,
4976832,Lestodiplosis polypori,synonym,Animalia,Arthropoda,Insecta,Diptera,,,,,,
4976833,Stictia gravida,synonym,Animalia,Arthropoda,Insecta,Hymenoptera,,,,,,
4976834,Epialtoides hiltoni,accepted,Animalia,Arthropoda,Malacostraca,Decapoda,,,,,,


In [None]:
set(list(backbone["horizonInvasives"]))

## 4. Count on order level

In [None]:
order = backbone[["kingdom", "phylum", "class", "order"]]
order = order[order["kingdom"]!="Bacteria"]
order = order[order["kingdom"]!="Archaea"].drop_duplicates(ignore_index=True)
order["nr_authors"] = [0.0,]*len(order)
order

In [None]:
for row in backbone[backbone["nr_authors"]==backbone["nr_authors"]].itertuples():
    order.loc[order["order"]==row.order,"nr_authors"] += row.nr_authors

In [None]:
order["taxonomicResearchNeeded"] = [0.0,]*len(order)
for row in backbone[backbone["taxonomicResearchNeeded"]==backbone["taxonomicResearchNeeded"]].itertuples():
    order.loc[order["order"]==row.order,"taxonomicResearchNeeded"] += row.taxonomicResearchNeeded

In [None]:
order["cropWildRelatives"] = [0.0,]*len(order)
for row in backbone[backbone["cropWildRelatives"]==backbone["cropWildRelatives"]].itertuples():
    order.loc[order["order"]==row.order,"cropWildRelatives"] += row.cropWildRelatives

In [None]:
order["horizonInvasives"] = [0.0,]*len(order)
for row in backbone[backbone["horizonInvasives"]==backbone["horizonInvasives"]].itertuples():
    order.loc[order["order"]==row.order,"horizonInvasives"] += row.horizonInvasives

In [None]:
order

In [None]:
order.to_pickle("../../data/processed/supply_and_demand_order_level.pkl")
order.to_csv("../../data/processed/supply_and_demand_order_level.tsv", sep="\t")