# iNaturalist status updates by state - SA

Using the file produced in the collate-status-taxa.ipynb: `inat-aust-status-taxa.csv` (statuses joined to taxa names), generate lists to update iNaturalist statuses

## Prep - common to all states
1. Read in the inaturalist statuses & filter out this state
2. Read in the state conservation and sensitive lists
3. Prep fields incl IUCN equivalent mappings and matching to iNat taxonomy  
4. Merge and compare the state and inaturalist lists
5. Create update/removals list
6. Create additions list
7. Save files

## 1. Read in the inaturalist statuses & filter out VIC

In [1]:
import pandas as pd
import sys
import os
projectdir = os.path.dirname(os.getcwd()) + "/" # parent dir of cwd
sourcedir = projectdir + "data/in/"
sys.path.append(os.path.abspath(projectdir + "notebooks/includes/"))
import list_functions  as lf

# read in the statuses file
taxastatus = pd.read_csv(sourcedir + "inat-aust-status-taxa.csv", encoding='UTF-8',na_filter=False,dtype=str)

# filter out ACT entries
def filter_state_statuses(stateregex: str, urlregex: str):
    authoritydf = taxastatus['authority'].drop_duplicates().sort_values()
    authoritydf = authoritydf[pd.Series(authoritydf).str.contains(stateregex)]
    urldf = taxastatus['url'].drop_duplicates().sort_values()
    urldf = urldf[pd.Series(urldf).str.contains(urlregex)]
    placedisplaydf = taxastatus['place_display_name'].drop_duplicates().sort_values()
    placedisplaydf = placedisplaydf[pd.Series(placedisplaydf).str.contains(stateregex)]
    placedf = taxastatus['place_name'].drop_duplicates().sort_values()
    placedf = placedf[pd.Series(placedf).str.contains(stateregex)]
    # concat all and remove duplicates
    statedf = pd.concat([taxastatus.apply(lambda row: row[taxastatus['place_display_name'].isin(placedisplaydf)]),
                         taxastatus.apply(lambda row: row[taxastatus['place_name'].isin(placedf)]),
                         taxastatus.apply(lambda row: row[taxastatus['url'].isin(urldf)]),
                         taxastatus.apply(
                             lambda row: row[taxastatus['authority'].isin(authoritydf)])]).drop_duplicates()
    return statedf.sort_values(['taxon_id', 'user_id'])

inatstatuses = filter_state_statuses(" VIC |Victoria|VICTORIA|Vic","vic.gov.au")
inatstatuses = inatstatuses.add_prefix("inat_")
inatstatuses.groupby(['inat_status']).size()


inat_status
CR                                    2
Conservation Dependent                2
Critically Endangered               479
Critically Endangered                 1
EN                                    4
Endangered                          924
Endangered (Extinct in Victoria)      1
Extinct                              44
NT                                    1
Poorly known                          1
Sensitive                             8
Threatened                            4
Vulnerable                          260
endangered                            1
dtype: int64

### 2. iNaturalist taxonomy

### 3. State lists

Get the ALA Conservation and Sensitive lists


In [4]:
#  %%script echo skipping # comment this line to download dataset from lists.ala.org.au the web and save locally

sensitivelist = lf.download_ala_specieslist("https://lists.ala.org.au/ws/speciesListItems/dr490?max=10000&includeKVP=true")
sensitivelist = lf.kvp_to_columns(sensitivelist)
sensitivelist.to_csv(sourcedir + "state-lists/vic-ala-sensitive.csv", index=False)

conservationlist = lf.download_ala_specieslist("https://lists.ala.org.au/ws/speciesListItems/dr655?max=10000&includeKVP=true")
conservationlist = lf.kvp_to_columns(conservationlist)
conservationlist.to_csv(sourcedir + "state-lists/vic-ala-conservation.csv", index=False)

In [14]:
# Read sensitive list data
sensitivelist = pd.read_csv(sourcedir + "state-lists/vic-ala-sensitive.csv", dtype=str)
sensitivelist['geoprivacy'] = 'obscured'
sensitivelist['authority'] = 'Victorian Biodiversity Atlas'
sensitivelist['status'] = 'Restricted'
conservationlist = pd.read_csv(sourcedir + "state-lists/vic-ala-conservation.csv", dtype=str)
conservationlist['geoprivacy'] = ""
conservationlist['authority'] = 'Victorian Department of Energy, Environment and Climate Action'

statelist = conservationlist[['id','name','lsid','status','geoprivacy','authority']].merge(sensitivelist[['id','name','lsid','geoprivacy','status','authority']], how="outer",on='name',suffixes=('_conservation', '_sensitive'))
statelist['status'] = statelist['status_conservation'].fillna(statelist['status_sensitive'])
statelist['authority'] = statelist['authority_conservation'].fillna(statelist['authority_sensitive'])
statelist['geoprivacy'] = statelist['geoprivacy_sensitive'].fillna('open')
statelist = statelist.rename(columns = {'name':'scientificName'})
statelist = statelist.add_prefix("state_")
print("Conservation list entries:" + str(len(conservationlist)))
print("Sensitive list entries:" + str(len(sensitivelist)))
statelist.groupby('state_status',dropna=False).size()
#statelist.groupby('state_geoprivacy',dropna=False).size()

Conservation list entries:1999
Sensitive list entries:136


state_status
Conservation Dependent       3
Critically Endangered      559
Endangered                1074
Extinct                     54
Restricted                  12
Threatened                   5
Vulnerable                 304
dtype: int64

### 4. Equivalent IUCN statuses

In [21]:
iucnStatusMappings = {
    'Least concern':'10',
    'Special least concern':'10',
    'Near Threatened':'20',
    'Conservation Dependent':'20',
    'Vulnerable':'30',
    'Threatened':'30',
    'Restricted':'30',
    'Sensitive':'30',
    'Rare':'30',
    'Endangered':'40',
    'Critically Endangered':'50',
    'Extinct':'70',
    'Extinct in the wild':'70',
}

### 5. Determine best place ID to use

In [17]:
inatstatuses.groupby(['inat_place_id','inat_place_name','inat_place_display_name'])['inat_place_id'].count()
# looks like 7830 - note for extract


inat_place_id  inat_place_name  inat_place_display_name
6744           Australia        Australia                     2
7830           Victoria         Victoria, AU               1730
Name: inat_place_id, dtype: int64

## Merge iNaturalist statuses with State lists on scientificName


In [22]:
# set placeid
place_id = 7830
# get the inaturalist taxonomy matches for additions 
inattaxa = pd.read_csv(sourcedir + "inaturalist-australia-9/inaturalist-australia-9-taxa.csv",dtype=str,usecols=['id','name','rank','observations_count','is_active'])
inattaxa = inattaxa[inattaxa['is_active'] == 't']
inattaxa = inattaxa.rename(columns = {'id':'taxon_id','name':'taxon_name'})
inattaxa = inattaxa.add_prefix("inat_")
statelist = statelist[['state_scientificName','state_status','state_geoprivacy', 'state_lsid_conservation','state_lsid_sensitive','state_authority']].merge(inattaxa,how="left",left_on='state_scientificName',right_on='inat_taxon_name',suffixes=(None,'_inat'))
statelist

Unnamed: 0,state_scientificName,state_status,state_geoprivacy,state_lsid_conservation,state_lsid_sensitive,state_authority,inat_taxon_id,inat_taxon_name,inat_rank,inat_observations_count,inat_is_active
0,Ambassis agassizii,Extinct,open,https://biodiversity.org.au/afd/taxa/b0ff773c-...,,"Victorian Department of Energy, Environment an...",93797,Ambassis agassizii,species,34,t
1,Bidyanus bidyanus,Endangered,open,https://biodiversity.org.au/afd/taxa/05866f31-...,,"Victorian Department of Energy, Environment an...",95759,Bidyanus bidyanus,species,38,t
2,Chelodina expansa,Endangered,open,https://biodiversity.org.au/afd/taxa/fc7d0724-...,,"Victorian Department of Energy, Environment an...",39599,Chelodina expansa,species,189,t
3,Craterocephalus fluviatilis,Critically Endangered,open,https://biodiversity.org.au/afd/taxa/50568ccf-...,,"Victorian Department of Energy, Environment an...",98614,Craterocephalus fluviatilis,species,0,t
4,Emydura macquarii,Critically Endangered,open,https://biodiversity.org.au/afd/taxa/39c22a1e-...,,"Victorian Department of Energy, Environment an...",99882,Emydura macquarii,species,2934,t
...,...,...,...,...,...,...,...,...,...,...,...
2009,Prasophyllum retroflexum,Restricted,obscured,,https://id.biodiversity.org.au/taxon/apni/5140...,Victorian Biodiversity Atlas,,,,,
2010,Pterostylis tenuissima,Restricted,obscured,,https://id.biodiversity.org.au/taxon/apni/5141...,Victorian Biodiversity Atlas,553266,Pterostylis tenuissima,species,6,t
2011,Callocephalon fimbriatum,Restricted,obscured,,https://biodiversity.org.au/afd/taxa/6c646af8-...,Victorian Biodiversity Atlas,116842,Callocephalon fimbriatum,species,4448,t
2012,Oecetis quadrula,Restricted,obscured,,https://biodiversity.org.au/afd/taxa/1b3ac25a-...,Victorian Biodiversity Atlas,,,,,


In [23]:
# prepare the export fields, common to New template and Update template
mergedstatuses = statelist.merge(inatstatuses,how="outer",left_on='state_scientificName',right_on='inat_scientificName')

# add extra fields 
# add some extra fields
mergedstatuses['place_id'] = str(place_id)
mergedstatuses['username'] = 'peggydnew'
mergedstatuses['description'] = "Listed - refer to https://www.environment.sa.gov.au/topics/plants-and-animals/threatened-species-and-ecological-communities/threatened-species"
mergedstatuses['state_lsid_conservation'].fillna(mergedstatuses['state_lsid_sensitive'])
mergedstatuses['state_url'] = "https://bie.ala.org.au/species/" + mergedstatuses['state_lsid_conservation']
mergedstatuses['state_iucn_equivalent'] = mergedstatuses['state_status'].map(iucnStatusMappings)
#mergedstatuses['state_status'] = mergedstatuses['state_status'].fillna('Sensitive')
#mergedstatuses['state_geoprivacy'] = mergedstatuses['state_geoprivacy'].fillna('open')
mergedstatuses['inat_taxon_id'] = mergedstatuses['inat_taxon_id_y'].fillna(mergedstatuses['inat_taxon_id_x'])
mergedstatuses['inat_scientificName'] = mergedstatuses['inat_scientificName'].fillna(mergedstatuses['inat_taxon_name'])

# UPDATE: inat status and state status both exist
# REMOVE: inat status exists, state status does not
# ADD: state status exists, inat status does not (matching taxon)
# NO MATCH: state status exists, inat taxa not found
mergedstatuses['action'] = 'na'
mergedstatuses.loc[mergedstatuses['inat_id'].notnull() & mergedstatuses['state_scientificName'].notnull(), 'action'] = "UPDATE"
mergedstatuses.loc[mergedstatuses['inat_id'].notnull() & mergedstatuses['state_scientificName'].isnull(), 'action'] = "REMOVE"
mergedstatuses.loc[mergedstatuses['inat_id'].isnull() & mergedstatuses['inat_taxon_id'].notnull(), 'action'] = "ADD"
mergedstatuses.loc[mergedstatuses['inat_id'].isnull() & mergedstatuses['inat_taxon_id'].isnull(), 'action'] = "NO MATCH"

# only update those with different values 
mergedstatuses['action'] = mergedstatuses.apply(lambda x: "NO CHANGE" if (x['action'] == "UPDATE") & ((x['state_status'] == x['inat_status']) & (x['state_geoprivacy'] == x['inat_geoprivacy'] ) & (x['state_geoprivacy'] == x['inat_geoprivacy']) & (x['state_iucn_equivalent'] == x['inat_iucn'])) else x['action'], axis=1)

# display
mergedstatusesprintfriendly = mergedstatuses[['action','inat_id','inat_taxon_id','state_scientificName','inat_scientificName', 'state_status','inat_status','state_geoprivacy','inat_geoprivacy','state_iucn_equivalent','inat_iucn','state_authority','inat_authority','state_url','inat_url','inat_description','inat_place_display_name','inat_current_synonymous_taxon_ids']]
mergedstatuses.groupby('action').size()


action
ADD            32
NO CHANGE    1062
NO MATCH      513
REMOVE        265
UPDATE        408
dtype: int64

## Updates

In [24]:
# UPDATES
# Headers: action,taxon_name,id,taxon_id,status,iucn_equivalent,authority,url,geoprivacy,place_id,username,description
updates = pd.DataFrame(mergedstatuses[mergedstatuses['action'].isin(['UPDATE','REMOVE'])])
updates = updates[['action','state_scientificName','inat_id','inat_taxon_id','state_status','state_iucn_equivalent','state_authority','state_url','state_geoprivacy','place_id','username','description']]
updates.columns = updates.columns.str.replace("state_", "", regex=True)
updates.columns = updates.columns.str.replace("inat_", "", regex=True)
updates = updates.rename(columns={'scientificName':'taxon_name'})

# ADDITIONS
# Headers: Taxon_Name,Status,Authority,IUCN_equivalent,Description,iNaturalist_Place_ID,url,Taxon_Geoprivacy,Username,taxon_id
additions = pd.DataFrame(mergedstatuses[mergedstatuses['action'] == "ADD"])
additions = additions[['action','state_scientificName','inat_id','inat_taxon_id','state_status','state_iucn_equivalent','state_authority','state_url','state_geoprivacy','place_id','username','description']]
additions = additions.rename(columns={'state_scientificName':'Taxon_Name',
                                      'state_status':'Status',
                                      'state_authority':'Authority',
                                      'state_iucn_equivalent':'IUCN_equivalent',
                                      'description':'Description',
                                      'place_id':'iNaturalst_Place_ID',
                                      'state_url':'url',
                                      'state_geoprivacy':'taxon_Geoprivacy',
                                      'inat_taxon_id':'taxon_id'})

# WRITE TO FILE
mergedstatusesprintfriendly.to_csv(projectdir + "data/out/summaries/vic.csv",index=False)
updates.to_csv(projectdir + "data/out/updates-vic.csv", index=False)
additions.to_csv(projectdir + "data/out/additions-vic.csv", index=False)