##  Example of how to create the CSV File that the NABat bulk uploader is looking for, based on a folder of wav files (that contain Guano metadata)

#### Guano metadata is great for interoperability and data sharing.

#### Between the content that is stored in the Guano MD and the content in the NABat file naming convention, nearly all the info we need is at hand.

In [1]:
from guano import GuanoFile

import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
dname = r"F:\1181\NE"
d = Path(dname)
wavs = d.glob('*.wav')

In [3]:
%%time

rows = []
mds = []
for wav in wavs:
    g = GuanoFile(str(wav))
    md = dict(g.items())
    md['filepath'] = wav.parent
    md['filename'] = wav.name
    md['seconds'] = g.wav_params.nframes / g.wav_params.framerate
    mds.append(md)


Wall time: 2.33 s


### This gives us a dataframe of the Guano MD in that folder

In [4]:
df = pd.DataFrame(mds)
df.columns = [col.replace('|', '_').replace(' ', '_') for col in df.columns]
df.replace('noID', '', inplace=True)
df.head()

Unnamed: 0,GUANO_Version,Length,Note,SB_Classifier,SB_Filter_HP,SB_Region,SB_Species_Auto_ID,SB_Version,Samplerate,Species_Auto_ID,Species_Manual_ID,TE,Timestamp,filename,filepath,seconds
0,1.0,0.0,,south Great Basin,0,Great Basin,,4.2.1,192000,,,1,2017-07-12 21:40:04,NABAT-1181NE_20170712_214004.wav,F:\1181\NE,4.761333
1,1.0,0.0,,south Great Basin,0,Great Basin,,4.2.1,192000,,,1,2017-07-12 21:40:12,NABAT-1181NE_20170712_214012.wav,F:\1181\NE,2.677333
2,1.0,5.0,,south Great Basin,30,Great Basin,HiF,4.2.1,192000,HiF,Myev,1,2017-07-12 21:40:55,NABAT-1181NE_20170712_214055.wav,F:\1181\NE,4.984
3,1.0,0.0,,south Great Basin,0,Great Basin,,4.2.1,192000,,,1,2017-07-12 21:41:01,NABAT-1181NE_20170712_214101.wav,F:\1181\NE,5.004
4,1.0,5.0,,south Great Basin,5,Great Basin,,4.2.1,192000,,,1,2017-07-12 21:41:48,NABAT-1181NE_20170712_214148.wav,F:\1181\NE,5.004


#### A quick plot of the classification results

In [5]:

data = df[(df.Species_Manual_ID!='') & (df.Species_Auto_ID!='')]

import matplotlib.pyplot as plt

fig, ax = plt.subplots()
ax.scatter(data.Species_Auto_ID, data.Species_Manual_ID)

right= data[data.Species_Manual_ID==data.Species_Auto_ID]
ax.scatter(right.Species_Auto_ID, right.Species_Manual_ID, color='red')

ax.set_xlabel('Auto ID result')
ax.set_ylabel('Manual Result (Truth)')

plt.show()

<Figure size 640x480 with 1 Axes>

## Now let's convert this to the format that NABat needs

#### A couple of helper functions we'll need...

In [6]:
from datetime import datetime, timedelta

def monitoring_night(dt):
    # return the sample night from a date time
    # if after midnight return previous date
    if dt.hour < 12:
        monitoringnight = dt.date() - timedelta(days=1)
    else:
        monitoringnight = dt.date()
        
    return monitoringnight
    

def parse_nabat_fname(fname):
    """If a file is in the format specified by NABat (grtsid_sitename_date_time.wav)
    parse it and return the components.  
    
    Allows for some discrepansies an malformations
    """
    f = Path(fname)
    name = f.stem
    if name.lower().startswith('nabat'):
        name = name[5:]
    if name.startswith('-'):
        name = name [1:]
        
    print()
        
    digit = name[0]
    grtsid = ''
    while digit.isnumeric():
        name = name [1:]
        grtsid += digit
        digit = name[0]
        
    if name.startswith('-'):
        name = name [1:]
        
    sitename, datestr, timestr = name.split('_')
    dt = datetime.strptime('T'.join([datestr, timestr]), "%Y%m%dT%H%M%S")

    
    return(grtsid, sitename, dt, monitoring_night(dt))

In [7]:
template = pd.read_csv(r"C:\Users\talbertc\Downloads\Bulk_Stationary_Acoustic_Template.csv")

In [8]:
template_lookup = {k:'' for k in template.columns}

In [9]:
def get_softwaretype(row):
    if 'SB_Version' in row:
        sb_version = row['SB_Version']
        if sb_version.startswith('4.2'):
            return 'SonoBat 4.2'
        elif sb_version.startswith('4'):
            return 'SonoBat 4.x'
        elif sb_version.startswith('3'):
            return 'SonoBat 3.x'
        
        else:
            raise Exception ("Need to add handles for Kaleidoscope")
            
get_softwaretype(df.iloc[0])

'SonoBat 4.2'

In [10]:
species_list = "ANPA|CHME|CORA|COTO|EPFU|EUMA|EUFL|EUPE|EUUN|IDPH|LANO|LABL|LABO|LACI|LAEG|LAIN|LASE|LAXA|LENI|LEYE|MACA|MOMO|MOME|MYAU|MYAS|MYCA|MYCI|MYEV|MYGR|MYKE|MYLE|MYLU|MYSE|MYSO|MYTH|MYVE|MYVO|MYYU|NYHU|NYFE|NYMA|PAHE|PESU|TABR|ARJA|BRCA|DIEC|LAMI|MYAUR|MYAUS|MYOC|NOLE|STRU".split('|')

In [15]:
def get_nabat_row(guano_row):
    row = template_lookup.copy()
    fname = guano_row['filename']
    grtsid, sitename, dt, monitoring_night = parse_nabat_fname(fname)
    row['Grts Id'] = grtsid
    row['Location Name'] = sitename
    row['Activation Start Time'] = str(guano_row.Timestamp)
    row['Activation Stop Time'] = str(guano_row.Timestamp + timedelta(seconds=round(guano_row.seconds)))
    row['Detector'] = 'foo'
    row['Microphone'] ='bar'
    row['Audio Recording Name (*.wav *.zc)'] = fname
    row['Software Type'] = get_softwaretype(row)
    
    species = guano_row['Species_Auto_ID']
    if species.upper() in species_list: 
        row['Auto Id'] = species.upper()
#     else:
#         print(f'dropping species:{species}')
    
    species = guano_row['Species_Manual_ID']
    if species.upper() in species_list: 
        row['manual id'] = species.upper()
#     else:
#         print(f'dropping species:{species}')
    
    return row

In [16]:
get_nabat_row(df.iloc[10])




{'Grts Id': '1181',
 'Location Name': 'NE',
 'Latitude Decimal Degrees (NAD83)': '',
 'Longitude Decimal Degrees (NAD83)': '',
 'Activation Start Time': '2017-07-12 21:42:34',
 'Activation Stop Time': '2017-07-12 21:42:39',
 'Detector': 'foo',
 'Microphone': 'bar',
 'Microphone Orientation': '',
 'Microphone Height (meters)': '',
 'Distance to Nearest Clutter (meters)': '',
 'Clutter Type': '',
 'Distance to Nearest Water (meters)': '',
 'Water Type': '',
 'Percent Clutter': '',
 'Broad Habitat Type': '',
 'Audio Recording Name (*.wav *.zc)': 'NABAT-1181NE_20170712_214234.wav',
 'Software Type': None,
 'Auto Id': '',
 'manual Id': ''}

In [17]:
nabat_df = pd.DataFrame(list(df.apply(get_nabat_row, axis=1)))

nabat_df = nabat_df[template.columns]












































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































Unnamed: 0,Grts Id,Location Name,Latitude Decimal Degrees (NAD83),Longitude Decimal Degrees (NAD83),Activation Start Time,Activation Stop Time,Detector,Microphone,Microphone Orientation,Microphone Height (meters),Distance to Nearest Clutter (meters),Clutter Type,Distance to Nearest Water (meters),Water Type,Percent Clutter,Broad Habitat Type,Audio Recording Name (*.wav *.zc),Software Type,Auto Id,manual Id
0,1181,NE,,,2017-07-12 21:40:04,2017-07-12 21:40:09,foo,bar,,,,,,,,,NABAT-1181NE_20170712_214004.wav,,,
1,1181,NE,,,2017-07-12 21:40:12,2017-07-12 21:40:15,foo,bar,,,,,,,,,NABAT-1181NE_20170712_214012.wav,,,
2,1181,NE,,,2017-07-12 21:40:55,2017-07-12 21:41:00,foo,bar,,,,,,,,,NABAT-1181NE_20170712_214055.wav,,,
3,1181,NE,,,2017-07-12 21:41:01,2017-07-12 21:41:06,foo,bar,,,,,,,,,NABAT-1181NE_20170712_214101.wav,,,
4,1181,NE,,,2017-07-12 21:41:48,2017-07-12 21:41:53,foo,bar,,,,,,,,,NABAT-1181NE_20170712_214148.wav,,,


In [20]:
nabat_df.head()

Unnamed: 0,Grts Id,Location Name,Latitude Decimal Degrees (NAD83),Longitude Decimal Degrees (NAD83),Activation Start Time,Activation Stop Time,Detector,Microphone,Microphone Orientation,Microphone Height (meters),Distance to Nearest Clutter (meters),Clutter Type,Distance to Nearest Water (meters),Water Type,Percent Clutter,Broad Habitat Type,Audio Recording Name (*.wav *.zc),Software Type,Auto Id,manual Id
0,1181,NE,,,2017-07-12 21:40:04,2017-07-12 21:40:09,foo,bar,,,,,,,,,NABAT-1181NE_20170712_214004.wav,,,
1,1181,NE,,,2017-07-12 21:40:12,2017-07-12 21:40:15,foo,bar,,,,,,,,,NABAT-1181NE_20170712_214012.wav,,,
2,1181,NE,,,2017-07-12 21:40:55,2017-07-12 21:41:00,foo,bar,,,,,,,,,NABAT-1181NE_20170712_214055.wav,,,
3,1181,NE,,,2017-07-12 21:41:01,2017-07-12 21:41:06,foo,bar,,,,,,,,,NABAT-1181NE_20170712_214101.wav,,,
4,1181,NE,,,2017-07-12 21:41:48,2017-07-12 21:41:53,foo,bar,,,,,,,,,NABAT-1181NE_20170712_214148.wav,,,
