# iNaturalist status updates by state - TAS

Using the file produced in the collate-status-taxa.ipynb: `inat-aust-status-taxa.csv` (statuses joined to taxa names), generate lists to update iNaturalist statuses

## Prep - common to all states
1. Read in the inaturalist statuses & filter out this state
2. Read in the state conservation and sensitive lists
3. Prep fields incl IUCN equivalent mappings and matching to iNat taxonomy  
4. Merge and compare the state and inaturalist lists
5. Create update/removals list
6. Create additions list
7. Save files

## 1. Read in the inaturalist statuses & filter out TAS

In [1]:
import pandas as pd
import sys
import os
projectdir = os.path.dirname(os.getcwd()) + "/" # parent dir of cwd
sourcedir = projectdir + "data/in/"
sys.path.append(os.path.abspath(projectdir + "notebooks/includes/"))
import list_functions  as lf

# read in the statuses file
taxastatus = pd.read_csv(sourcedir + "inat-aust-status-taxa.csv", encoding='UTF-8',na_filter=False,dtype=str)

# filter out ACT entries
def filter_state_statuses(stateregex: str, urlregex: str):
    authoritydf = taxastatus['authority'].drop_duplicates().sort_values()
    authoritydf = authoritydf[pd.Series(authoritydf).str.contains(stateregex)]
    urldf = taxastatus['url'].drop_duplicates().sort_values()
    urldf = urldf[pd.Series(urldf).str.contains(urlregex)]
    placedisplaydf = taxastatus['place_display_name'].drop_duplicates().sort_values()
    placedisplaydf = placedisplaydf[pd.Series(placedisplaydf).str.contains(stateregex)]
    placedf = taxastatus['place_name'].drop_duplicates().sort_values()
    placedf = placedf[pd.Series(placedf).str.contains(stateregex)]
    # concat all and remove duplicates
    statedf = pd.concat([taxastatus.apply(lambda row: row[taxastatus['place_display_name'].isin(placedisplaydf)]),
                         taxastatus.apply(lambda row: row[taxastatus['place_name'].isin(placedf)]),
                         taxastatus.apply(lambda row: row[taxastatus['url'].isin(urldf)]),
                         taxastatus.apply(
                             lambda row: row[taxastatus['authority'].isin(authoritydf)])]).drop_duplicates()
    return statedf.sort_values(['taxon_id', 'user_id'])

inatstatuses = filter_state_statuses("Tasmania|TAS", ".tas.gov")
inatstatuses = inatstatuses.add_prefix("inat_")
inatstatuses.groupby(['inat_status']).size()

inat_status
Endangered          197
Presumed Extinct     19
Presumed extinct      1
Rare                255
Vulnerable          108
endangered            1
dtype: int64

### 3. State lists

In [4]:
# %%script echo skipping # comment this line to download dataset from lists.ala.org.au the web and save locally

sensitivelist = lf.download_ala_specieslist("https://lists.ala.org.au/ws/speciesListItems/dr491?max=10000&includeKVP=true")
# sensitivelist = lf.kvp_to_columns(sensitivelist)   # kvp values null
sensitivelist.to_csv(sourcedir + "state-lists/tas-ala-sensitive.csv", index=False)
conservationlist = lf.download_ala_specieslist("https://lists.ala.org.au/ws/speciesListItems/dr654?max=10000&includeKVP=true")
conservationlist = lf.kvp_to_columns(conservationlist)
conservationlist.to_csv(sourcedir + "state-lists/tas-ala-conservation.csv", index=False)

In [15]:
# Read list data
sensitivelist = pd.read_csv(sourcedir + "state-lists/tas-ala-sensitive.csv", dtype=str)
sensitivelist['geoprivacy'] = 'obscured'
sensitivelist['status'] = 'Sensitive'
sensitivelist['authority'] = "todo"
conservationlist = pd.read_csv(sourcedir + "state-lists/tas-ala-conservation.csv", dtype=str)
conservationlist['geoprivacy'] = 'open'
conservationlist['authority'] = "Threatened Species Protection Act 1995"

statelist = conservationlist[['id','name','lsid','status','geoprivacy','authority']].merge(sensitivelist[['id','name','lsid','geoprivacy','status','authority']], how="outer",on='name',suffixes=('_conservation', '_sensitive'))
statelist['status'] = statelist['status_conservation'].fillna(statelist['status_sensitive'])
statelist['authority'] = statelist['authority_conservation'].fillna(statelist['authority_sensitive'])
statelist['geoprivacy'] = statelist['geoprivacy_sensitive'].fillna(statelist['geoprivacy_conservation'])
statelist = statelist.rename(columns = {'name':'scientificName'})
statelist = statelist.add_prefix("state_")
print("Conservation list entries:" + str(len(conservationlist)))
print("Sensitive list entries:" + str(len(sensitivelist)))
statelist

Conservation list entries:685
Sensitive list entries:3


Unnamed: 0,state_id_conservation,state_scientificName,state_lsid_conservation,state_status_conservation,state_geoprivacy_conservation,state_authority_conservation,state_id_sensitive,state_lsid_sensitive,state_geoprivacy_sensitive,state_status_sensitive,state_authority_sensitive,state_status,state_authority,state_geoprivacy
0,4380224,Acacia axillaris,https://id.biodiversity.org.au/node/apni/2911902,Vulnerable,open,Threatened Species Protection Act 1995,,,,,,Vulnerable,Threatened Species Protection Act 1995,open
1,4379763,Acacia pataczekii,https://id.biodiversity.org.au/node/apni/2913609,Rare,open,Threatened Species Protection Act 1995,,,,,,Rare,Threatened Species Protection Act 1995,open
2,4380367,Acacia siculiformis,https://id.biodiversity.org.au/node/apni/2888960,Rare,open,Threatened Species Protection Act 1995,,,,,,Rare,Threatened Species Protection Act 1995,open
3,4380388,Acacia ulicifolia,https://id.biodiversity.org.au/node/apni/2907350,Rare,open,Threatened Species Protection Act 1995,,,,,,Rare,Threatened Species Protection Act 1995,open
4,4380283,Acacia uncifolia,https://id.biodiversity.org.au/node/apni/2916270,Rare,open,Threatened Species Protection Act 1995,,,,,,Rare,Threatened Species Protection Act 1995,open
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681,4380076,Thylacinus cynocephalus,https://biodiversity.org.au/afd/taxa/97764bed-...,Extinct,open,Threatened Species Protection Act 1995,,,,,,Extinct,Threatened Species Protection Act 1995,open
682,4380182,Thymichthys politus,https://biodiversity.org.au/afd/taxa/14e659fa-...,Endangered,open,Threatened Species Protection Act 1995,,,,,,Endangered,Threatened Species Protection Act 1995,open
683,4379921,Tyto novaehollandiae castanops,https://biodiversity.org.au/afd/taxa/2c30d58b-...,Endangered,open,Threatened Species Protection Act 1995,,,,,,Endangered,Threatened Species Protection Act 1995,open
684,4380426,Zearaja maugeana,https://biodiversity.org.au/afd/taxa/0d9d194f-...,Endangered,open,Threatened Species Protection Act 1995,,,,,,Endangered,Threatened Species Protection Act 1995,open


In [16]:
statelist.groupby('state_status',dropna=False).size()

state_status
Endangered    226
Extinct        28
Rare          310
Sensitive       1
Vulnerable    121
dtype: int64

### 4. Equivalent IUCN statuses
Ensure these match up to the `state_status` values

In [7]:
iucnStatusMappings = {
    'Least concern':'10',
    'Special least concern':'10',
    'Critically Endangered':'50',
    'Endangered':'40',
    'Vulnerable':'30',
    'Extinct':'70',
    'Extinct in the wild':'70',
    'Near Threatened':'20',
    'Sensitive':'30',
    'Rare':'30'
}

### 5. Determine best place ID to use

In [8]:
inatstatuses.groupby(['inat_place_id','inat_place_name','inat_place_display_name'])['inat_place_id'].count()
# looks like 6829

inat_place_id  inat_place_name     inat_place_display_name
6829           Tasmania            Tasmania, AU               580
9994           Northern Territory  Northern Territory, AU       1
Name: inat_place_id, dtype: int64

## Merge iNaturalist statuses with State sensitive list on scientificName

In [17]:
# set placeid
place_id = 6829
# get the inaturalist taxonomy matches for additions 
inattaxa = pd.read_csv(sourcedir + "inaturalist-australia-9/inaturalist-australia-9-taxa.csv",dtype=str,usecols=['id','name','rank','observations_count','is_active'])
inattaxa = inattaxa[inattaxa['is_active'] == 't']
inattaxa = inattaxa.rename(columns = {'id':'taxon_id','name':'taxon_name'})
inattaxa = inattaxa.add_prefix("inat_")
statelist = statelist[['state_scientificName','state_status','state_geoprivacy', 'state_lsid_conservation','state_lsid_sensitive','state_authority']].merge(inattaxa,how="left",left_on='state_scientificName',right_on='inat_taxon_name',suffixes=(None,'_inat'))
statelist


Unnamed: 0,state_scientificName,state_status,state_geoprivacy,state_lsid_conservation,state_lsid_sensitive,state_authority,inat_taxon_id,inat_taxon_name,inat_rank,inat_observations_count,inat_is_active
0,Acacia axillaris,Vulnerable,open,https://id.biodiversity.org.au/node/apni/2911902,,Threatened Species Protection Act 1995,775192,Acacia axillaris,species,2,t
1,Acacia pataczekii,Rare,open,https://id.biodiversity.org.au/node/apni/2913609,,Threatened Species Protection Act 1995,1254534,Acacia pataczekii,species,1,t
2,Acacia siculiformis,Rare,open,https://id.biodiversity.org.au/node/apni/2888960,,Threatened Species Protection Act 1995,567724,Acacia siculiformis,species,39,t
3,Acacia ulicifolia,Rare,open,https://id.biodiversity.org.au/node/apni/2907350,,Threatened Species Protection Act 1995,139880,Acacia ulicifolia,species,2249,t
4,Acacia uncifolia,Rare,open,https://id.biodiversity.org.au/node/apni/2916270,,Threatened Species Protection Act 1995,560392,Acacia uncifolia,species,170,t
...,...,...,...,...,...,...,...,...,...,...,...
681,Thylacinus cynocephalus,Extinct,open,https://biodiversity.org.au/afd/taxa/97764bed-...,,Threatened Species Protection Act 1995,,,,,
682,Thymichthys politus,Endangered,open,https://biodiversity.org.au/afd/taxa/14e659fa-...,,Threatened Species Protection Act 1995,1316897,Thymichthys politus,species,15,t
683,Tyto novaehollandiae castanops,Endangered,open,https://biodiversity.org.au/afd/taxa/2c30d58b-...,,Threatened Species Protection Act 1995,627135,Tyto novaehollandiae castanops,subspecies,17,t
684,Zearaja maugeana,Endangered,open,https://biodiversity.org.au/afd/taxa/0d9d194f-...,,Threatened Species Protection Act 1995,115239,Zearaja maugeana,species,1,t


In [18]:
# prepare the export fields, common to New template and Update template
mergedstatuses = statelist.merge(inatstatuses,how="outer",left_on='state_scientificName',right_on='inat_scientificName')

# add extra fields 
# add some extra fields
mergedstatuses['place_id'] = str(place_id)
mergedstatuses['username'] = 'peggydnew'
mergedstatuses['description'] = "Listed - refer to https://www.environment.sa.gov.au/topics/plants-and-animals/threatened-species-and-ecological-communities/threatened-species"
mergedstatuses['state_lsid_conservation'].fillna(mergedstatuses['state_lsid_sensitive'])
mergedstatuses['state_url'] = "https://bie.ala.org.au/species/" + mergedstatuses['state_lsid_conservation']
mergedstatuses['state_iucn_equivalent'] = mergedstatuses['state_status'].map(iucnStatusMappings)
#mergedstatuses['state_status'] = mergedstatuses['state_status'].fillna('Sensitive')
#mergedstatuses['state_geoprivacy'] = mergedstatuses['state_geoprivacy'].fillna('open')
mergedstatuses['inat_taxon_id'] = mergedstatuses['inat_taxon_id_y'].fillna(mergedstatuses['inat_taxon_id_x'])
mergedstatuses['inat_scientificName'] = mergedstatuses['inat_scientificName'].fillna(mergedstatuses['inat_taxon_name'])

# UPDATE: inat status and state status both exist
# REMOVE: inat status exists, state status does not
# ADD: state status exists, inat status does not (matching taxon)
# NO MATCH: state status exists, inat taxa not found
mergedstatuses['action'] = 'na'
mergedstatuses.loc[mergedstatuses['inat_id'].notnull() & mergedstatuses['state_scientificName'].notnull(), 'action'] = "UPDATE"
mergedstatuses.loc[mergedstatuses['inat_id'].notnull() & mergedstatuses['state_scientificName'].isnull(), 'action'] = "REMOVE"
mergedstatuses.loc[mergedstatuses['inat_id'].isnull() & mergedstatuses['inat_taxon_id'].notnull(), 'action'] = "ADD"
mergedstatuses.loc[mergedstatuses['inat_id'].isnull() & mergedstatuses['inat_taxon_id'].isnull(), 'action'] = "NO MATCH"

# only update those with different values 
mergedstatuses['action'] = mergedstatuses.apply(lambda x: "NO CHANGE" if (x['action'] == "UPDATE") & ((x['state_status'] == x['inat_status']) & (x['state_geoprivacy'] == x['inat_geoprivacy'] ) & (x['state_geoprivacy'] == x['inat_geoprivacy']) & (x['state_iucn_equivalent'] == x['inat_iucn'])) else x['action'], axis=1)

# display
mergedstatusesprintfriendly = mergedstatuses[['action','inat_id','inat_taxon_id','state_scientificName','inat_scientificName', 'state_status','inat_status','state_geoprivacy','inat_geoprivacy','state_iucn_equivalent','inat_iucn','state_authority','inat_authority','state_url','inat_url','inat_description','inat_place_display_name','inat_current_synonymous_taxon_ids']]
mergedstatuses.groupby('action').size()


action
ADD           10
NO CHANGE    411
NO MATCH     249
REMOVE       154
UPDATE        16
dtype: int64

### Output files

In [19]:
# UPDATES
# Headers: action,taxon_name,id,taxon_id,status,iucn_equivalent,authority,url,geoprivacy,place_id,username,description
updates = pd.DataFrame(mergedstatuses[mergedstatuses['action'].isin(['UPDATE','REMOVE'])])
updates = updates[['action','state_scientificName','inat_id','inat_taxon_id','state_status','state_iucn_equivalent','state_authority','state_url','state_geoprivacy','place_id','username','description']]
updates.columns = updates.columns.str.replace("state_", "", regex=True)
updates.columns = updates.columns.str.replace("inat_", "", regex=True)
updates = updates.rename(columns={'scientificName':'taxon_name'})

# ADDITIONS
# Headers: Taxon_Name,Status,Authority,IUCN_equivalent,Description,iNaturalist_Place_ID,url,Taxon_Geoprivacy,Username,taxon_id
additions = pd.DataFrame(mergedstatuses[mergedstatuses['action'] == "ADD"])
additions = additions[['action','state_scientificName','inat_id','inat_taxon_id','state_status','state_iucn_equivalent','state_authority','state_url','state_geoprivacy','place_id','username','description']]
additions = additions.rename(columns={'state_scientificName':'Taxon_Name',
                                      'state_status':'Status',
                                      'state_authority':'Authority',
                                      'state_iucn_equivalent':'IUCN_equivalent',
                                      'description':'Description',
                                      'place_id':'iNaturalst_Place_ID',
                                      'state_url':'url',
                                      'state_geoprivacy':'taxon_Geoprivacy',
                                      'inat_taxon_id':'taxon_id'})

# WRITE TO FILE
mergedstatusesprintfriendly.to_csv(projectdir + "data/out/summaries/tas.csv",index=False)
updates.to_csv(projectdir + "data/out/updates-tas.csv", index=False)
additions.to_csv(projectdir + "data/out/additions-tas.csv", index=False)