In [1]:
from koogu.data import preprocess, feeder
from koogu.model import architectures
from koogu import train, assessments, recognize

from matplotlib import pyplot as plt           # used for plotting graphs


In [2]:
# List class-specific subdirectories to process
class_dirs = ['CrestedGibbons','GreyGibbons', 'noise']

# Path to the directory where pre-processed data will be written.
# Directory will be created if it doesn't exist.
prepared_audio_dir = '/Volumes/DJC Files/Benchmarking_MS_Data/benchmarking_zenodo/data/AcousticData/Jahoo_trainingdata_manual_annotations_addDanum/'

data_settings = {
    # Settings for handling raw audio
    'audio_settings': {
        'clip_length': 12.0,
        'clip_advance': 0.4,
        'desired_fs': 32000
    },

    # Settings for converting audio to a time-frequency representation
    'spec_settings': {
        'win_len': 0.128,
        'win_overlap_prc': 0.75,
        'bandwidth_clip': [500, 3000]
    }
}

# Convert audio files into prepared data
clip_counts = preprocess.from_top_level_dirs(
    data_settings['audio_settings'],
    class_dirs=class_dirs,
    audio_root=prepared_audio_dir,
    output_root='/Volumes/DJC Files/Benchmarking_MS_Data/benchmarking_zenodo/results/koogu_updated/multi/',
    negative_class_label='noise')

print(clip_counts)

/Volumes/DJC Files/Benchmarking_MS_Data/benchmarking_zenodo/data/AcousticData/Jahoo_trainingdata_manual_annotations_addDanum/GreyGibbons/Gibbons_SW10_20180223_060002_6543.48121111204_6551.48388033511_.wav: duration = 8.0026875 s. Ignoring.
/Volumes/DJC Files/Benchmarking_MS_Data/benchmarking_zenodo/data/AcousticData/Jahoo_trainingdata_manual_annotations_addDanum/GreyGibbons/Gibbons_SW10_20180223_060002_6609.60320306769_6615.605204985_.wav: duration = 6.0020625 s. Ignoring.
/Volumes/DJC Files/Benchmarking_MS_Data/benchmarking_zenodo/data/AcousticData/Jahoo_trainingdata_manual_annotations_addDanum/GreyGibbons/Gibbons_SW10_20180223_060002_6750.55021475909_6757.75261705985_.wav: duration = 7.2024375 s. Ignoring.
/Volumes/DJC Files/Benchmarking_MS_Data/benchmarking_zenodo/data/AcousticData/Jahoo_trainingdata_manual_annotations_addDanum/GreyGibbons/Gibbons_SW10_20180223_060002_6681.62722607536_6686.72892770507_.wav: duration = 5.10175 s. Ignoring.
/Volumes/DJC Files/Benchmarking_MS_Data/benc

{'CrestedGibbons': 213, 'GreyGibbons': 60, 'noise': 0}


In [3]:
prepared_audio_dir = '/Volumes/DJC Files/Benchmarking_MS_Data/benchmarking_zenodo/results/koogu_updated/multi/'

data_feeder = feeder.SpectralDataFeeder(
    prepared_audio_dir,                        # where the prepared clips are at
    data_settings['audio_settings']['desired_fs'],
    data_settings['spec_settings'],
    validation_split=0.3,                     # set aside 15% for validation
    max_clips_per_class=20000                  # use up to 20k inputs per class
)

print(data_feeder)

<koogu.data.feeder.SpectralDataFeeder object at 0x34a6eb430>


In [None]:
model = architectures.DenseNet(
    [4, 4, 4],                                 # 3 dense-blocks, 4 layers each
    preproc=[ ('Conv2D', {'filters': 16}) ],   # Add a 16-filter pre-conv layer
    dense_layers=[32]                          # End with a 32-node dense layer
)

# Settings that control the training process
training_settings = {
    'batch_size': 64,
    'epochs': 80,                              # run for 50 epochs

    # Start with a learning rate of 0.01, and drop it to a tenth of its value,
    # successively, at epochs 20 & 40.
    'learning_rate': 0.01,
    'lr_change_at_epochs': [20, 40],
    'lr_update_factors': [1.0, 1e-1, 1e-2],    # up to 20, beyond 20, beyond 40

    'dropout_rate': 0.05                       # Helps model generalize better
}

# Path to the directory where model files will be written
model_dir = '/Volumes/DJC Files/Benchmarking_MS_Data/benchmarking_zenodo/results/koogu_updated/multi/'

# Perform training
history = train(
    data_feeder,
    model_dir,
    data_settings,
    model,
    training_settings
)

# Plot training & validation history
fig, ax = plt.subplots(2, sharex=True, figsize=(12, 9))
ax[0].plot(
    history['train_epochs'], history['binary_accuracy'], 'r',
    history['eval_epochs'], history['val_binary_accuracy'], 'g')
ax[0].set_ylabel('Accuracy')
ax[1].plot(
    history['train_epochs'], history['loss'], 'r',
    history['eval_epochs'], history['val_loss'], 'g')
ax[1].set_yscale('log')
ax[1].set_xlabel('Epoch')
ax[1].set_ylabel('Loss')
plt.show()

Data: 3 classes, 191 training & 82 eval samples
Model: "DenseNet"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 321, 372)]           0         []                            
                                                                                                  
 tf.expand_dims (TFOpLambda  (None, 321, 372, 1)          0         ['input_1[0][0]']             
 )                                                                                                
                                                                                                  
 Pre_Conv (Conv2D)           (None, 321, 372, 16)         144       ['tf.expand_dims[0][0]']      
                                                                                                  
 B1_CF1_BatchNorm (BatchNor  (None, 321, 37

  data_feeder.training_samples /


Epoch 1/80
3/3 - 33s - loss: 0.5278 - binary_accuracy: 0.5672 - lr: 0.0100 - 33s/epoch - 11s/step
Epoch 2/80


In [None]:
from koogu import recognize

# Path to a single audio file or to a directory (can contain subdirectories)
test_audio_root = '/Volumes/DJC Files/Benchmarking_MS_Data/benchmarking_zenodo/data/AcousticData/Jahoo_testdata_clips/ImagesIgnoreWindowsWavs/'

# Output directory
raw_detections_root = '/Volumes/DJC Files/Benchmarking_MS_Data/benchmarking_zenodo/results/koogu_updated/multi/detections/'
chosen_threshold = 0.1

recognize(
  model_dir='/Volumes/DJC Files/Benchmarking_MS_Data/benchmarking_zenodo/results/koogu_updated/multi/',
  audio_root=test_audio_root,
  output_dir=raw_detections_root,
  threshold=chosen_threshold,
  batch_size=64,    # Increasing this may improve speed on computers having higher resources
  recursive=True,   # Process subdirectories also
  show_progress=True
)