# Test of whether parameters were successfully applied
Provide the path to an ouput sqlite database and this code will check the attributes of results against the parameters that were supposed to be applied.

In [53]:
import sqlite3
import pandas as pd

database = "T:/Occurrence_Records/All_params_1.sqlite"

In [54]:
def validate_output(database):
    '''
    Compares the occurrence record attributes to the filters that were supposed to be applied.  
    
    PARAMTERS
    ---------
    database: path to a wrangler output database
    
    RESULTS
    -------
    prints messages if tests are failed
    '''
    import sqlite3
    import pandas as pd
    
    return

In [55]:
# Connect to a database
conn = sqlite3.connect(database)
cursor = conn.cursor()

# Get the taxon concept
taxon_concept = (pd.read_sql(sql="SELECT * FROM taxon_concept;", con=conn)
                 .rename({"index": "key", "0": "value"}, axis=1)
                 .set_index("key"))
print(taxon_concept)

# Get the filter set that was applied
filter_set = (pd.read_sql(sql="SELECT * FROM filter_set", con=conn)
              .rename({"index": "parameter", "0": "value"}, axis=1)
              .set_index("parameter"))
print(filter_set)

                            value
key                              
ID                        bwitux0
GBIF_ID                   9606290
EBIRD_ID              Wild Turkey
detection_distance_m          300
TAXON_EOO                    None
                                                                        value
parameter                                                                    
name                                                               Test_EBD_1
query_polygon                                                            None
issues_omit                 ['TAXON_MATCH_HIGHERRANK', 'COORDINATE_UNCERTA...
sampling_protocols_omit                  ['eBird Pelagic Protocol', 'Random']
bases_omit                      ['PRESERVED_SPECIMEN', 'MACHINE_OBSERVATION']
has_coordinate_uncertainty                                              False
geoissue                                                                 None
default_coordUncertainty                                    

In [56]:
# Presence only
presabs = [x[0] for x in conn.execute("SELECT DISTINCT occurrenceStatus FROM occurrence_records;").fetchall()]
if presabs != ["PRESENT"]:
    print("!!! Failed occurrenceStatus test: " + str(presabs))

In [57]:
# DWCA archive
if filter_set.loc["get_dwca", "value"] == "True":
    try:
        conn.execute("SELECT download_key FROM GBIF_download_info;").fetchall()
    except:
        print("!!! Failed DWCA download key test")

In [58]:
# Latitude range
lat_range = filter_set.loc["lat_range", "value"]
if lat_range != "None":
    lat_range = [float(x) for x in lat_range.split(",")]
    max_lat = float(conn.execute("SELECT MAX(decimalLatitude) FROM occurrence_records;").fetchall()[0][0])
    min_lat = float(conn.execute("SELECT MIN(decimalLatitude) FROM occurrence_records;").fetchall()[0][0])
    if (min_lat < lat_range[0] or max_lat > lat_range[1]):
        print("!!! Failed test of decimalLatitude values")

# Longitude range
lon_range = filter_set.loc["lon_range", "value"]
if lon_range != "None":
    lon_range = [float(x) for x in lon_range.split(",")]
    max_lon = float(conn.execute("SELECT MAX(decimalLongitude) FROM occurrence_records;").fetchall()[0][0])
    min_lon = float(conn.execute("SELECT MIN(decimalLongitude) FROM occurrence_records;").fetchall()[0][0])
    if (min_lon < lon_range[0] or max_lon > lon_range[1]):
        print("!!! Failed test of decimalLongitude values")

In [72]:
# Years range
yrs_range = filter_set.loc["years_range", "value"]
if yrs_range != "None":
    yrs_range = [float(x) for x in yrs_range.split(",")]
    max_yr = float(conn.execute("SELECT MAX(strftime('%Y', eventDate)) FROM occurrence_records;").fetchall()[0][0])
    min_yr = float(conn.execute("SELECT MIN(strftime('%Y', eventDate)) FROM occurrence_records;").fetchall()[0][0])
    if (min_yr < yrs_range[0] or max_yr > yrs_range[1]):
        print("!!! Failed test of year (eventDate) values")

In [102]:
# Months range
months_range = filter_set.loc["months_range", "value"]
if months_range != "None":
    months_range = [float(x) for x in months_range.split(",")]
    max_month = float(conn.execute("SELECT MAX(strftime('%m', eventDate)) FROM occurrence_records;").fetchall()[0][0])
    min_month = float(conn.execute("SELECT MIN(strftime('%m', eventDate)) FROM occurrence_records;").fetchall()[0][0])
    # Months range could be like 1,12
    if months_range[0] < months_range[1]:
        if (min_month < months_range[0] or max_month > months_range[1]):
            print("!!! Failed test of month (eventDate) values")
    # Months range could be like 11,3
    if months_range[0] > months_range[1]:
        no_months = list(range(months_range[1] + 1, months_range[0]) -1)
        months = conn.execute("SELECT DISTINCT strftime('%m', eventDate) FROM occurrence_records;").fetchall()
        months = [int(x[0]) for x in months]
        if len(set(months) & set(no_months)) != 0:
            print("!!! Failed test of month (eventDate) values")

In [95]:
a = 11
b = 3
no_months = list(range(b + 1, a - 1))
print(no_months)
months = conn.execute("SELECT DISTINCT strftime('%m', eventDate) FROM occurrence_records;").fetchall()
months = [int(x[0]) for x in months]
print(sorted(months))
if len(set(months) & set(no_months)) != 0:
    print("!!! Failed test of month (eventDate) values")

[4, 5, 6, 7, 8, 9]
[5, 6, 7, 8, 9, 10, 11, 12]
!!! Failed test of month (eventDate) values


In [59]:
multilists = {"issues_omit": "issues", "sampling_protocols_omit": "samplingProtocol", "bases_omit": "basisOfRecord", 
              "datasets_omit": "datasetName", "collection_codes_omit": "collectionCode", "institutions_omit": "institutionID"}

# Issues
values = [x[0] for x in conn.execute("SELECT DISTINCT issues FROM occurrence_records").fetchall()]
print(values)
parameter = filter_set.loc["issues_omit", "value"]
print(parameter)
a = []
a.append(parameter.replace("'", "").replace("[", "").replace("]", ""))
print(a)



['nan']
['TAXON_MATCH_HIGHERRANK', 'COORDINATE_UNCERTAINTY_METERS_INVALID']
['TAXON_MATCH_HIGHERRANK, COORDINATE_UNCERTAINTY_METERS_INVALID']
