In [1]:
%load_ext autoreload
%autoreload 2

## Step 1

* Make a working copy of the files in the source directory in our output directory.
* The filenames of these will be normalized to match the NABat file naming convention.
* The original file name will be inserted into the GuanoMD of each file.
* Files in a subfolder called 'Noise' will be skipped.

In [1]:
import time

from osgeo import gdal
import nabatpy

from pathlib import Path
import shutil

from guano import GuanoFile

# these are just being used to display a nice progress bar in the notebook.
# they would not be needed in other contexts (script, service, desktop application)
# if they were removed update our main function accordingly.
from ipywidgets import FloatProgress
from IPython.display import display

In [4]:
# We were having an issue with the network drive we're working off of dropping of and this stopping the script.
# this function was used to make the process more robust.  
def wait_for_network_copy(in_file, out_file):
      
    copied = False
    while not copied:
        try:
            if not out_file.parent.exists():
                out_file.parent.mkdir(parents=True)
            shutil.copy(str(in_file), str(out_file))
            copied = True
        except Exception as e:
            while not out_file.parent.exists():
                print('waiting for drive to reconect ...')
                time.sleep(30)
            time.sleep(30)

        
# This function contains all of our steps.
def cleanup_fnames(indname, outdname):
    """make copies of each file to the output directory, with names normalized
    """
    output_dir = Path(outdname)
    input_dir = Path(indname)
    
    wavs = list(input_dir.glob("**/*.wav"))

    fp = FloatProgress(min=0, max=len(wavs))
    display(fp)

    for wav in wavs:
        parts = nabatpy.utils.parse_nabat_fname(wav)
        subdir = output_dir.joinpath(parts['GrtsId'], parts['SiteName'])
        
        out_file = subdir.joinpath(parts['correct_fname'])
        
        if 'noise' in wav.parent.name.lower():
            if out_file.exists():
                print(f"Deleting: {out_file.name}")
                out_file.unlink()
        elif not out_file.exists():
            wait_for_network_copy(wav, out_file)
                    
        if out_file.exists():
            updated = False
            while not updated:
                try:
                    g = GuanoFile(out_file)
                    g['Original Filename'] = "/".join([wav.parent.parent.name, wav.parent.name, wav.name])
                    g.write(make_backup=False)
                    updated = True
                except ValueError:
                    print('ve:', out_file)
                    updated = True
                
                except Exception as e:
                    import traceback
                    traceback.print_exc()
                    while not subdir.exists():
                        print('waiting for drive to reconect ...')
                        time.sleep(30)
                        
                    try:
                        g = GuanoFile(out_file)
                        g['Original Filename'] = "/".join([wav.parent.parent.name, wav.parent.name, wav.name])
                        g.write(make_backup=False)
                        updated = True
                    except Exception as e:
                        print('failed', e)
                        pass
            
        fp.value += 1

            

In [3]:
input_dname = r"Z:\TSH\DD274_NABat\CNHP_data_processing\Source\CO_NABat\CO 2016 acoustic"
output_dname = r"D:\CNHP_Output\2016"

In [None]:
cleanup_fnames(input_dname, output_dname)

FloatProgress(value=0.0, max=152941.0)

### Do the Same for 2017

In [None]:
input_dname = r"Z:\TSH\DD274_NABat\CNHP_data_processing\Source\CO_NABat\CO 2017 acoustic"
output_dname = r"D:\CNHP_Output\2017"

In [None]:
cleanup_fnames(input_dname, output_dname)

##### Dang it.  There's are typos in some of the sitenames.  For the crosswalk to work the filenames must match exactly the values in the bulk upload form

We need to rename 338 SF to SE, 2461 REDLAN to Redlands, and 2461 RIBBON to Ribbon

other options here would be to update the bulk upload form to reflect the names in the files.  We could also make other notebooks use case insensitive matching which would handle the RIBBON vs Ribbon case.

In [14]:
d = Path(r"D:\CNHP_Output\2017\338\SE")

wavs = d.glob("**/*.wav")
for wav in wavs:
    wav.rename(str(wav).replace('SF', 'SE'))
    
# d.rename(str(d.replace('SF', 'SE')))

In [10]:
d = Path(r"D:\CNHP_Output\2017\2461\REDLAN")
d.rename(str(d).replace('REDLAN', 'Redlands'))
d = Path(str(d).replace('REDLAN', 'Redlands'))

wavs = d.glob("**/*.wav")
for wav in wavs:
    wav.rename(str(wav).replace('REDLAN', 'Redlands'))
    


D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_210603.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_210646.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_210825.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_210938.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_210945.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_211056.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_211314.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_211440.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_211515.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_211552.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_211639.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_211720.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_211731.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_211750.wav
D:\CNHP_Output\2017\2461\Redlands\2461_REDLAN_20170627_211851.wav
D:\CNHP_Ou

In [9]:
# Arg! Because of the way windows handles case insensitive file names, we need to rename these twice to correct just the case.  Maybe there's a better way to handle this?
d = Path(r"D:\CNHP_Output\2017\2461\RIBBON")
d.rename(str(d).replace('RIBBON', 'Ribbonx'))
d = Path(str(d).replace('RIBBON', 'Ribbonx'))

wavs = d.glob("**/*.wav")
for wav in wavs:
    wav.rename(str(wav).replace('RIBBON', 'Ribbonx'))
    
d = Path(r"D:\CNHP_Output\2017\2461\Ribbonx")
d.rename(str(d).replace('Ribbonx', 'Ribbon'))
d = Path(str(d).replace('Ribbonx', 'Ribbon'))

wavs = d.glob("**/*.wav")
for wav in wavs:
    wav.rename(str(wav).replace('Ribbonx', 'Ribbon'))