In [11]:
import pandas as pd
import requests
import io
from ftfy import fix_encoding
import os
import sys
import datetime
import os.path as path

# top level directory
projectDir = path.abspath(path.join(os.getcwd(),"../../..")) + "/"
sys.path.append(os.path.abspath(projectDir + "source-code/includes"))
import list_functions as lf

sourceDataDir = projectDir + "source-data/QLD/"
statusDir = projectDir + "source-data/status-codes/"
processedDataDir = projectDir + "current-lists/"
state = 'QLD'
monthStr = datetime.datetime.now().strftime('%Y%m')
codesfile = statusDir + state + "-codes.csv"
codesurl =  "https://apps.des.qld.gov.au/data-sets/wildlife/wildnet/species-status-codes.csv"
sensitivelisturl = "https://apps.des.qld.gov.au/data-sets/wildlife/wildnet/qld-confidential-species.csv"
prodListDruid = "dr493"
testListDruid = "dr18404"

## Download the raw files from data.qld.gov.au
... save locally

In [2]:
# Status codes
response = requests.get(codesurl)
rtext = fix_encoding(response.text)
speciescodes = pd.read_csv(io.StringIO(rtext))
speciescodes.to_csv(sourceDataDir + "species-status-codes.csv", index=False)

# Confidential/Sensitive List
response = requests.get(sensitivelisturl)
rtext = fix_encoding(response.text)
sensitivelist = pd.read_csv(io.StringIO(rtext))
sensitivelist.to_csv(sourceDataDir + "qld-confidential-species.csv", index=False)

... read local file

In [17]:
speciescodes = pd.read_csv(sourceDataDir + "species-status-codes.csv")
sensitivelist = pd.read_csv(sourceDataDir + "qld-confidential-species.csv")

In [18]:
ncastatuscodes = speciescodes[speciescodes['Field'] == "NCA_status"][['Code', 'Code_description']]
ncastatuscodes['Code_description'] = ncastatuscodes['Code_description'].str.replace(" wildlife", "")
ncastatuscodes.loc[
    ncastatuscodes['Code_description'] == "Critically endangered", 'Code_description'] = "Critically Endangered"
ncastatuscodes.loc[ncastatuscodes['Code_description'] == "Near threatened", 'Code_description'] = "Near Threatened"
ncastatuscodes

Unnamed: 0,Code,Code_description
20,C,Least concern
21,CR,Critically Endangered
22,E,Endangered
23,EX,Extinct
24,I,International
25,NT,Near Threatened
26,P,Prohibited
27,PE,Extinct in the wild
28,SL,Special least concern
29,V,Vulnerable


## List processing
* Expand the nca status, endemicity and epbc status codes
* Rename fields to DwC terms
* Replace kingdom and class values with scientific terms

In [19]:
# nca status
sensitivelist = pd.merge(sensitivelist,ncastatuscodes,left_on=['NCA status'],right_on=['Code'],how="left")
#sensitivelist = sensitivelist.rename(columns={'NCA status':'sourceStatus'})
sensitivelist = sensitivelist[['Scientific name', 'Common name', 'NCA status','Taxon Id', 'Code_description','Kingdom','Family']]
sensitivelist = sensitivelist.rename(columns= {'Scientific name':'scientificName',
                                               'Common name': 'vernacularName',
                                                'NCA status': 'category', 
                                                'Taxon Id':'WildNetTaxonID',
                                               'Code_description':'status',
                                               'Kingdom':'kingdom',
                                               'Family':'family'})
sensitivelist['generalisation'] = "2 km"
# map sourceStatus to category
codeMap = {'C': 'LC', 'CR': 'CR', 'E': 'EN',
           'NT': 'NT','PE': 'EW', 'SL': 'SL',
           'V': 'VU'}
kingdomMap = {'animals':'Animalia','plants':'Plantae','fungi':'Fungi'}
sensitivelist['kingdom'] = sensitivelist['kingdom'].replace(kingdomMap)
sensitivelist['category'] = sensitivelist['category'].replace(codeMap)
sensitivelist['category'] = sensitivelist['category'].fillna('UK')
sensitivelist.to_csv(processedDataDir + 'sensitive-lists/QLD-' + prodListDruid + '-sensitive.csv',encoding="UTF-8",index=False)
sensitivelist

Unnamed: 0,scientificName,vernacularName,category,WildNetTaxonID,status,kingdom,family,generalisation
0,Rhinolophus robertsi,greater large-eared horseshoe bat,EN,969,Endangered,Animalia,Rhinolophidae,2 km
1,Chloebia gouldiae,Gouldian finch,EN,1376,Endangered,Animalia,Estrildidae,2 km
2,Erythrura trichroa,blue-faced parrot-finch,NT,1378,Near Threatened,Animalia,Estrildidae,2 km
3,Neochmia phaeton evangelinae,crimson finch (white-bellied subspecies),LC,1370,Least concern,Animalia,Estrildidae,2 km
4,Poephila cincta cincta,black-throated finch (white-rumped subspecies),EN,1365,Endangered,Animalia,Estrildidae,2 km
...,...,...,...,...,...,...,...,...
955,Pneumatopteris pennigera,lime fern,UK,11700,,Plantae,Thelypteridaceae,2 km
956,Reholttumia costata,,NT,42079,Near Threatened,Plantae,Thelypteridaceae,2 km
957,Thelypteris confluens,,VU,16042,Vulnerable,Plantae,Thelypteridaceae,2 km
958,Macadamia jansenii,,CR,8185,Critically Endangered,Plantae,Proteaceae,2 km


### Change Logs
Upload the file to the test environment before running the below cell to compare it to the list in production.
- check record counts old vs new and verify count in change log
- send to domain experts for verification

In [20]:
ltype = "S"
changeDir = "Monitoring/Change-logs/"
filename = "QLD-sensitive.csv"
changelist = lf.get_changelist(testListDruid, prodListDruid, ltype)
changelist.to_csv(projectDir + changeDir + monthStr + "-" + filename, encoding="UTF-8", index=False)
changelist

Unnamed: 0,name,scientificName,commonName,listUpdate


## Back up the list in production to `historical-lists`
If all ok, run the following to keep a copy of the list in prod before overwriting it

In [16]:
filename = state + "-" + prodListDruid + "-sensitive.csv"
prodListUrl = "https://lists.ala.org.au/ws/speciesListItems/" + prodListDruid + "?max=10000&includeKVP=true"
prodList = lf.download_ala_specieslist(prodListUrl)
prodList = lf.kvp_to_columns(prodList)
prodList.to_csv(projectDir + "historical-lists/sensitive/" + filename, encoding="UTF-8", index=False)