In [1]:
from ava.segmenting.utils import copy_segments_to_standard_format
from ava.preprocessing.utils import get_spec # makes spectrograms
from ava.models.vae import X_SHAPE # spectrogram dimensions
from ava.preprocessing.preprocess import tune_syll_preprocessing_params

In [2]:
help(copy_segments_to_standard_format)

Help on function copy_segments_to_standard_format in module ava.segmenting.utils:

copy_segments_to_standard_format(orig_seg_dirs, new_seg_dirs, seg_ext, delimiter, usecols, skiprows, max_duration=None)
    Copy onsets/offsets from SAP, MUPET, or Deepsqueak into a standard format.

    Note
    ----
    - `delimiter`, `usecols`, and `skiprows` are all passed to `numpy.loadtxt`.

    Parameters
    ----------
    orig_seg_dirs : list of str
            Directories containing original segments.
    new_seg_dirs : list of str
            Corresponding directories for new segments.
    seg_ext : str
            Input filename extension.
    delimiter : str
            Input filename delimiter. For a CSV file, for example, this would be a
            comma: `','`
    usecols : tuple
            Input file onset and offset columns, zero-indexed.
    skiprows : int
            Number of rows to skip. For example, if there is a single-line header
            set `skiprows=1`.
    max_duration 

In [3]:
orig_seg_dirs = ["C:\\\\Users\\door1\\Documents\\Work\\Frankfurt\\autoencoded-vocal-analysis\\data\\c2_br"]
new_seg_dirs =  ["C:\\\\Users\\door1\\Documents\\Work\\Frankfurt\\autoencoded-vocal-analysis\\data\\c2_br_out"]
seg_ext = '.txt'
delimiter  = '\t'
usecols = (0,1)
skiprows =0
copy_segments_to_standard_format(orig_seg_dirs, new_seg_dirs, seg_ext, delimiter, usecols, skiprows)


In [4]:
preprocess_params = {
    'get_spec': get_spec, # spectrogram maker
    'max_dur': 0.4, # maximum syllable duration
    'min_freq': 2000, # minimum frequency
    'max_freq': 12000, # maximum frequency
    'num_freq_bins': X_SHAPE[0], # hard-coded
    'num_time_bins': X_SHAPE[1], # hard-coded
    'nperseg': 1024, # FFT
    'noverlap': 512, # FFT
    'spec_min_val': 0.1, # minimum log-spectrogram value
    'spec_max_val': 6.0, # maximum log-spectrogram value
    'fs': 44100, # audio samplerater
    'mel': False, # frequency spacing, mel or linear
    'time_stretch': True, # stretch short syllables?
    'within_syll_normalize': False, # normalize spectrogram values on a
                                    # spectrogram-by-spectrogram basis
    'max_num_syllables': None, # maximum number of syllables per directory
    'sylls_per_file': 20, # syllable per file
    'real_preprocess_params': ('min_freq', 'max_freq', 'spec_min_val', \
            'spec_max_val', 'max_dur'), # tunable parameters
    'int_preprocess_params': ('nperseg','noverlap'), # tunable parameters
    'binary_preprocess_params': ('time_stretch', 'mel', \
            'within_syll_normalize'), # tunable parameters
}


In [5]:
audio_dirs = ["C:\\Users\\door1\\Documents\\Work\\Frankfurt\\autoencoded-vocal-analysis\\data\\audio"] # directories containing audio
seg_dirs = new_seg_dirs# directories containing onset/offset decisions
#preprocess_params = tune_syll_preprocessing_params(audio_dirs, seg_dirs, preprocess_params)

In [6]:
spec_dirs = ["C:\\Users\\door1\\Documents\\Work\\Frankfurt\\autoencoded-vocal-analysis\\data\\specs"]

from ava.preprocessing.preprocess import process_sylls
from joblib import Parallel, delayed
from itertools import repeat

gen = zip(audio_dirs, seg_dirs, spec_dirs, repeat(preprocess_params))
Parallel(n_jobs=4)(delayed(process_sylls)(*args) for args in gen)

[None]

In [7]:
split = 0.8 # 80/20 train/test split

# Construct a random train/test partition.
from ava.models.vae_dataset import get_syllable_partition
partition = get_syllable_partition(spec_dirs, split)

# Make Dataloaders.
from ava.models.vae_dataset import get_syllable_data_loaders
loaders = get_syllable_data_loaders(partition)

In [8]:
from ava.models.vae import VAE
save_dir = 'C:\\Users\\door1\\Documents\\Work\\Frankfurt\\autoencoded-vocal-analysis\\model'
model = VAE(save_dir=save_dir)

# Train.
model.train_loop(loaders, epochs=101)

Training: epochs 0 to 100
Training set: 720
Test set: 180
Epoch: 0 Average loss: 107561.6418
Test loss: 5982.7954
Epoch: 1 Average loss: 35918.6121
Epoch: 2 Average loss: 19636.1168
Test loss: 8430.4753
Epoch: 3 Average loss: 12922.3218
Epoch: 4 Average loss: 9433.6224
Test loss: 7491.6844
Epoch: 5 Average loss: 7403.5926
Epoch: 6 Average loss: 6126.9557
Test loss: 5237.5189
Epoch: 7 Average loss: 5218.0044
Epoch: 8 Average loss: 4429.8805
Test loss: 3816.0930
Epoch: 9 Average loss: 3712.3244
Epoch: 10 Average loss: 3074.0919
Test loss: 2745.8310
Epoch: 11 Average loss: 2554.6710
Epoch: 12 Average loss: 2190.2065
Test loss: 1811.3368
Epoch: 13 Average loss: 1916.2137
Epoch: 14 Average loss: 1679.6009
Test loss: 1424.4205
Epoch: 15 Average loss: 1585.5272
Epoch: 16 Average loss: 1418.1087
Test loss: 1199.7971
Epoch: 17 Average loss: 1354.7388
Epoch: 18 Average loss: 1259.8386
Test loss: 1071.8376
Epoch: 19 Average loss: 1175.8477
Epoch: 20 Average loss: 1135.9929
Test loss: 1186.8224
Ep

In [12]:
from ava.data.data_container import DataContainer
import os
model_filename = os.path.join(save_dir, 'checkpoint_100.tar')
projection_dirs =  ["C:\\Users\\door1\\Documents\\Work\\Frankfurt\\autoencoded-vocal-analysis\\data\\projs"]
dc = DataContainer(audio_dirs=audio_dirs, spec_dirs=spec_dirs,  projection_dirs=projection_dirs,    model_filename=model_filename)
latent_means = dc.request('latent_means')

Making field: latent_means
Making field: latent_means
	Done with: latent_means


In [13]:
from ava.plotting.latent_projection import latent_projection_plot_DC
latent_projection_plot_DC(dc, embedding_type='latent_mean_umap')

Making field: latent_mean_umap
Reading field: latent_means
	Done with: latent_means
Running UMAP... (n=900)


  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


	Done.
Making field: latent_mean_umap
	Done with: latent_mean_umap


  im = ax.scatter(X, Y, c=color, alpha=alpha, s=s, cmap=colormap)
