In [1]:
#============================== EXPERIMENT IMPORTS ==============================
#===== Standard imports
import warnings                   # This block prevents display of harmless warnings, but should be
warnings.filterwarnings('ignore') # commented out till the final version, to avoid missing "real" warnings 

#===== 3rd party imports
# None

#===== Repository imports
import proxycodelib               # Mandatory. Allow access to shared python code in the upper 'codelib' directory
from jupytools import mooltipath  # Magic absolute path builder
from dataset import Dataset

#============================= EXPERIMENT PARAMETERS =============================
# Path where to find initial annotated dataset (audio and lab files)
INPUT_PATH ='D:/datasets/sounds/Nolasco'
DATASET      = Dataset('TEST')

In [3]:
from chunker import load_dataset_manifest
dataset_path, filenames, sr, duration, overlap, chunks_md5 = load_dataset_manifest(DATASET)
print('DATASET PATH   :', dataset_path)
print('NB AUDIO FILES :', len (filenames))
print('SAMPLE RATE    :', sr)
print('DURATION       :', duration)
print('OVERLAP        :', overlap)
print('CHUNKS MD5     :', chunks_md5)

DATASET PATH   : D:\Jupyter\ShowBees\datasets\MAIN1000
NB AUDIO FILES : 48
SAMPLE RATE    : 22050
DURATION       : 1.0
OVERLAP        : 0.0
CHUNKS MD5     : 0ceac55bcc5572c597994d08f09df48d


In [4]:
from chunker import build_dataset_labs
build_dataset_labs(DATASET, INPUT_PATH)

48

In [2]:
# Slice target dataset into chunks
from chunker import build_dataset_chunks
chunk_dir, nb_files,nb_chunks, md5h = build_dataset_chunks(DATASET, INPUT_PATH)

print("OUTPUT DIR      :", chunk_dir)
print("PROCESSED FILES :", nb_files)
print("CHUNKS BUILT    :", nb_chunks)
print("MD5 HASH        :", md5h)

In features 
 ['D:\\Jupyter\\ShowBees\\experiments\\EXP002', 'D:\\Anaconda3\\python37.zip', 'D:\\Anaconda3\\DLLs', 'D:\\Anaconda3\\lib', 'D:\\Anaconda3', '', 'C:\\Users\\jp\\AppData\\Roaming\\Python\\Python37\\site-packages', 'D:\\Anaconda3\\lib\\site-packages', 'D:\\Anaconda3\\lib\\site-packages\\win32', 'D:\\Anaconda3\\lib\\site-packages\\win32\\lib', 'D:\\Anaconda3\\lib\\site-packages\\Pythonwin', 'D:\\Anaconda3\\lib\\site-packages\\IPython\\extensions', 'C:\\Users\\jp\\.ipython', 'D:\\Jupyter\\ShowBees\\codelib']
[2020-07-28 10:36:27 RAM77.5% 0.14GB] Dataset chunk directory already exists and is not empty.
[2020-07-28 10:36:27 RAM77.5% 0.14GB] Checking checksum...
[2020-07-28 10:36:41 RAM76.6% 0.14GB] Checksum is ok. Nothing to be done
OUTPUT DIR      : D:\Jupyter\ShowBees\datasets\MAIN1000\chunks
PROCESSED FILES : 48
CHUNKS BUILT    : 24788
MD5 HASH        : 0ceac55bcc5572c597994d08f09df48d


In [4]:
import proxycodelib
from chunker import is_valid_dataset
is_valid_dataset(DATASET)

True

In [16]:
from chunker import build_dataset_thresholds
build_dataset_thresholds(DATASET, [0, .5, 1])

[2020-07-27 20:48:59 RAM77.5% 0.09GB] CF001 - Missing Queen - Day -
[2020-07-27 20:48:59 RAM77.5% 0.09GB] GH001 - Active - Day - 141022_0659_0751
[2020-07-27 20:49:06 RAM77.2% 0.09GB] CF003 - Active - Day - (214)
[2020-07-27 20:49:07 RAM77.2% 0.09GB] CF003 - Active - Day - (215)
[2020-07-27 20:49:07 RAM77.2% 0.09GB] CF003 - Active - Day - (216)
[2020-07-27 20:49:08 RAM77.2% 0.09GB] CF003 - Active - Day - (217)
[2020-07-27 20:49:08 RAM77.2% 0.09GB] CF003 - Active - Day - (218)
[2020-07-27 20:49:09 RAM77.2% 0.09GB] CF003 - Active - Day - (219)
[2020-07-27 20:49:09 RAM77.1% 0.09GB] CF003 - Active - Day - (220)
[2020-07-27 20:49:09 RAM77.1% 0.09GB] CF003 - Active - Day - (221)
[2020-07-27 20:49:10 RAM77.1% 0.09GB] CF003 - Active - Day - (222)
[2020-07-27 20:49:10 RAM77.1% 0.09GB] CF003 - Active - Day - (223)
[2020-07-27 20:49:11 RAM77.1% 0.09GB] CF003 - Active - Day - (224)
[2020-07-27 20:49:11 RAM77.1% 0.09GB] CF003 - Active - Day - (225)
[2020-07-27 20:49:11 RAM77.1% 0.09GB] CF003 - Acti

In [18]:
import proxycodelib
from chunker import build_dataset_labels

liste = build_dataset_labels(DATASET, './workdir')

liste

['CF001 - Missing Queen - Day -_chunk0108',
 'CF001 - Missing Queen - Day -_chunk0109',
 'CF001 - Missing Queen - Day -_chunk0110',
 'CF001 - Missing Queen - Day -_chunk0111',
 'CF001 - Missing Queen - Day -_chunk0112',
 'CF001 - Missing Queen - Day -_chunk0113',
 'CF001 - Missing Queen - Day -_chunk0114',
 'CF001 - Missing Queen - Day -_chunk0115',
 'GH001 - Active - Day - 141022_0659_0751_chunk0603',
 'GH001 - Active - Day - 141022_0659_0751_chunk0604',
 'GH001 - Active - Day - 141022_0659_0751_chunk0605',
 'GH001 - Active - Day - 141022_0659_0751_chunk0606',
 'GH001 - Active - Day - 141022_0659_0751_chunk0607',
 'GH001 - Active - Day - 141022_0659_0751_chunk0608',
 'GH001 - Active - Day - 141022_0659_0751_chunk0609',
 'GH001 - Active - Day - 141022_0659_0751_chunk0610',
 'GH001 - Active - Day - 141022_0659_0751_chunk0611',
 'GH001 - Active - Day - 141022_0659_0751_chunk0612',
 'GH001 - Active - Day - 141022_0659_0751_chunk0613',
 'GH001 - Active - Day - 141022_0659_0751_chunk0688',
