In [1]:
%load_ext autoreload
%autoreload 2


## Step 2 (we have a set of clean files with good file names, we also have a set of site/deployment metadata in the NABat bulk upload csv format)

### We want to update the GuanoMD in each file to contain all of the information we have for that recording.
### We will be linkinging the individual files with the site/deployment metadata by the GRTS id and sitename


In [1]:
import time
from pathlib import Path
import pandas as pd

from datetime import datetime, timedelta

from guano import GuanoFile

from osgeo import gdal
import nabatpy

In [2]:
all_site_md_fname = r"Z:\TSH\DD274_NABat\CNHP_data_processing\Source\CO_NABat\CO_NABAT_2017_Stationary_Acoustic_deployment_md_2019Apr19_final.csv"
all_site_md = nabatpy.utils.bulkupload_to_df(all_site_md_fname).set_index(['grts_cell_id', 'location_name'])

all_site_md.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,latitude,longitude,start_time,end_time,detector,microphone,microphone_orientation,microphone_height,distance_to_nearest_clutter,clutter_type,distance_to_nearest_water,water_type,percent_clutter,broad_habitat_type,audio_recording_name,software_type
grts_cell_id,location_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2,NW,39.290652,-108.821024,2017-06-28,2017-07-01,WILDLIFE ACOUSTICS SM2Bat+,Wildlife Acoustics SMX-US,,2.0,,vegetation,,,,shrubland,FS,Sonobat 4.x
2,SW,39.279238,-108.775622,2017-06-28,2017-07-01,WILDLIFE ACOUSTICS SM2Bat+,Wildlife Acoustics SMX-US,,2.0,,vegetation,,,,shrubland,FS,Sonobat 4.x
2,NE,39.332177,-108.748456,2017-06-28,2017-07-01,WILDLIFE ACOUSTICS SM2Bat+,Wildlife Acoustics SMX-US,,2.0,,vegetation,,,,shrubland,FS,Sonobat 4.x
2,SE,39.271585,-108.72806,2017-06-28,2017-07-01,WILDLIFE ACOUSTICS SM2Bat+,Wildlife Acoustics SMX-US,,2.0,,vegetation,,,,shrubland,FS,Sonobat 4.x
61,NE,38.052719,-104.448246,2017-06-21,2017-06-24,WILDLIFE ACOUSTICS SM2Bat192,Wildlife Acoustics SMX-US,,3.0,,vegetation,,,,prairie,FS,Sonobat 4.x


In [3]:
input_dname = r"D:\CNHP_Output\2017"

#### This dictionary contains some Project Level metadata items that will be consistent across all of these files

In [4]:
project_md = {'sample_frame':'conus',
             'project_name':'Colorado NABat Monitoring',
             'project_id':33,
             'land_unit_code':'',
             'contact_info':'Jeremy Siemers, Colorado Natural Heritage Program',
             'classifier':'south Great Basin'}

to_remove = []

In [5]:
def pull_site_md(fname):
    # returns a dictionary of all of the site/deployment MD associated with a single file name
    parts = nabatpy.utils.parse_nabat_fname(fname)
    site_md_row = all_site_md.loc[int(parts['GrtsId'])].loc[parts['SiteName']]
       
    site_md = {}
    site_md['latitude'] = site_md_row['latitude']
    site_md['longitude'] = site_md_row['longitude']
    site_md['grts_cell_id'] = parts['GrtsId']
    site_md['location_name'] = parts['SiteName']
    site_md['detector'] = site_md_row['detector']
    site_md['microphone'] = site_md_row['microphone']
    site_md['microphone_height'] = site_md_row['microphone_height']
    site_md['distance_to_nearest_clutter'] = site_md_row['distance_to_nearest_water']
    site_md['clutter_type'] = site_md_row['clutter_type']
    site_md['broad_habitat_type'] = site_md_row['broad_habitat_type']
    
    start, stop = nabatpy.utils.get_auto_times(fname)
    site_md['start_time'] = nabatpy.utils.time_to_timestr(start)
    site_md['end_time'] = nabatpy.utils.time_to_timestr(stop)
    
    return site_md


In [32]:
all_site_md.iloc[all_site_md.index.get_level_values('grts_cell_id') == 2461]

Unnamed: 0_level_0,Unnamed: 1_level_0,latitude,longitude,start_time,end_time,detector,microphone,microphone_orientation,microphone_height,distance_to_nearest_clutter,clutter_type,distance_to_nearest_water,water_type,percent_clutter,broad_habitat_type,audio_recording_name,software_type
grts_cell_id,location_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2461,Redlands,39.063848,-108.649977,2017-06-27,2017-06-30,WILDLIFE ACOUSTICS SM4Bat-FS,Wildlife Acoustics SMM-U1,,3.0,,building or other man-structure,,,,agriculture,FS,Sonobat 4.x
2461,Ribbon,39.0096,-108.620676,2017-06-27,2017-06-30,WILDLIFE ACOUSTICS SM4Bat-FS,Wildlife Acoustics SMM-U1,,1.5,,vegetation,,,,shrubland,FS,Sonobat 4.x
2461,WLSWA,39.079615,-108.579023,2017-06-27,2017-06-30,WILDLIFE ACOUSTICS SM4Bat-FS,Wildlife Acoustics SMM-U1,,3.0,,vegetation,,,,forest-deciduous,FS,Sonobat 4.x
2461,CLSP,39.082878,-108.610485,2017-06-27,2017-06-30,WILDLIFE ACOUSTICS SM4Bat-FS,Wildlife Acoustics SMM-U1,,3.0,,vegetation,,,,forest-deciduous,FS,Sonobat 4.x


In [31]:
def get_row_from_fname(fname):
    # returns an ordered dictionary of all of the metadata we have for a single file
    row = nabatpy.utils.get_empty_row(version=2)
    site_md = pull_site_md(fname)
    row.update(site_md)
    row.update(project_md)
    
    row['audio_recording_name'] = Path(fname).name
    
    parts = nabatpy.utils.parse_nabat_fname(fname)
    row['grts_cell_id'] = parts['GrtsId']
    row['location_name'] = parts['SiteName']
    
    return row

# test it:
get_row_from_fname(r"D:\CNHP_Output\2017\2\NE\2_NE_20170628_211252.wav")

OrderedDict([('grts_cell_id', '2'),
             ('location_name', 'NE'),
             ('latitude', 39.33217724),
             ('longitude', -108.74845649999999),
             ('start_time', '2017-06-28T20:50:58'),
             ('end_time', '2017-06-29T05:15:45'),
             ('detector', 'WILDLIFE ACOUSTICS SM2Bat+'),
             ('microphone', 'Wildlife Acoustics SMX-US'),
             ('microphone_orientation', ''),
             ('microphone_height', 2.0),
             ('distance_to_nearest_clutter', nan),
             ('clutter_type', 'vegetation'),
             ('distance_to_nearest_water', ''),
             ('water_type', ''),
             ('percent_clutter', ''),
             ('broad_habitat_type', 'shrubland'),
             ('audio_recording_name', '2_NE_20170628_211252.wav'),
             ('software_type', ''),
             ('auto_id', ''),
             ('manual_id', ''),
             ('project_name', 'Colorado NABat Monitoring'),
             ('project_id', 33),
           

In [9]:
from nabatpy.utils import parse_nabat_fname, row_lookup_v2

def update_single_md(fname, to_delete=[]):
    # Updates the guano MD for a single file.
    # to_delete is a list of guano tags that we want to delete (This will be empty unless a previous run has added something unwanted to the MD)
    
    g = GuanoFile(fname)

    for thing in to_delete:
        try:
            del g[thing]
        except KeyError:
            pass
        
    row = get_row_from_fname(fname)
    for k,v in row.items():
#         print(k, v)
        if pd.isna(v):
            v = ''
        
        nabat_tag = row_lookup_v2[row_lookup_v2.df_columns==k]['nabat_tag'].iloc[0]
        if nabat_tag.startswith('NABat|'):
            g[nabat_tag] = v
            
    g.write(make_backup=False)
    
    
# test it:
update_single_md(r"D:\CNHP_Output\2017\2\NE\2_NE_20170628_211252.wav")

In [36]:
def update_all_md(dname, redo_all=False):
    input_dir = Path(dname)
    
    wavs = list(input_dir.glob("**/*.wav"))
    
    from ipywidgets import FloatProgress, Button
    from IPython.display import display
    fp = FloatProgress(min=0, max=len(wavs))
    label = Button(description = '...')
    label.style.button_color='#ffffcc'
    label.description = 'warming up...'
    display(fp, label)
    
    
    last_grt = ''
    print(len(wavs))
    
    for i, wav in enumerate(wavs):
        if i%100 == 0:
            parts = nabatpy.utils.parse_nabat_fname(str(wav))
            label.description = f"{parts['GrtsId']} {parts['SiteName']} {i}"
            
        try:        
            g = GuanoFile(wav)
        except:
            print(f"Problem with guanoMD in {wav}")
            g is None    
        
        if redo_all or g is None or not 'NABat' in g.get_namespaces():
            label.style.button_color='lightgreen'
            guano_md = None 
            
            try:
                update_single_md(fname=str(wav), to_delete=to_remove)
            except Exception as e:
                print(f"There was a problem with:\n\t{wav}")

                import traceback
                traceback.print_exc()
                dropped_drive = False
                while not wav.parent.exists():
                    dropped_drive = True
                    print('waiting for drive to reconect ...')
                    time.sleep(30)

                if dropped_drive:
                    try:
                        update_single_md(fname=str(wav), to_delete=to_remove)
                    except:
                        print(f"still didn't work")
        else:
            label.style.button_color = '#ffffcc'
            
        fp.value += 1
 

In [37]:
update_all_md(input_dname, redo_all=False)

FloatProgress(value=0.0, max=136697.0)

Button(description='warming up...', style=ButtonStyle(button_color='#ffffcc'))

136697
There was a problem with:
	D:\CNHP_Output\2017\2717\SE\2717_SE_20170805_045044.wav


Traceback (most recent call last):
  File "C:\Anaconda3\envs\pymdwizard\lib\site-packages\pandas\core\indexes\base.py", line 2657, in get_loc
    return self._engine.get_loc(key)
  File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'SE'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "<ipython-input-36-0e4893b01a5e>", line 34, in update_all_md
    update_single_md(fname=str(wav), to_delete=to_remove)
  File "<ipython-input-9-927894f73fd8>", line 15, in update_single_md
    row = get_row_from_fname(fname)
  File "<ipython-input-31-f851ca1efe05>", line 4, in get_row_from_fname

Problem with guanoMD in D:\CNHP_Output\2017\509\SE\509_SE_20170609_221438.wav
