In [35]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Config file

Only the `config_training.yaml` contained in `./VAD_algorithms/ecovad/` file needs to be updated to run the following pipeline

In [1]:
import yaml
from yaml import FullLoader

# Open the config file
with open("../config_training.yaml") as f:
    cfg = yaml.load(f, Loader=FullLoader)

In [2]:
audio_path = "../assets/demo_data/training_model/soundscape_data/"
speech_dir = "../assets/demo_data/training_model/human_voices/"
noise_dir = "../assets/demo_data/training_model/natural_sounds/"
audio_out_dir = "../assets/demo_data/training_model/synthetic_dataset"


## Generate the training / validation dataset

In [3]:
import glob
from VAD_algorithms.ecovad.make_data import preprocess_file, save_processed_arrays

In [5]:
list_audio_files = glob.glob(audio_path+ "/*")
print("Found {} files to split into training segments".format(len(list_audio_files)))

for file in list_audio_files:
    processed_arr, sr = preprocess_file(file, 
                    cfg["LENGTH_SEGMENTS"], 
                    overlap = 0, 
                    min_length = cfg["LENGTH_SEGMENTS"],
                    speech_dir=speech_dir,
                    noise_dir=noise_dir,
                    proba_speech=cfg["PROBA_SPEECH"],
                    proba_noise_speech=cfg["PROBA_NOISE_WHEN_SPEECH"],
                    proba_noise_nospeech=cfg["PROBA_NOISE_WHEN_NO_SPEECH"])

    save_processed_arrays(file, audio_out_dir, processed_arr, sr)

Found 244 files to split into training segments




## Train ecoVAD

In [18]:
train_val_path = "../assets/demo_data/training_model/synthetic_dataset"
ckpt_save_path = "../assets/model_weights/ecoVAD_ckpts_demo.pt"
model_save_path = "../assets/model_weights/ecoVAD_weights_demo.pt"

In [36]:
from VAD_algorithms.ecovad.train_model import trainingApp

trainingApp(train_val_path,
        model_save_path,
        ckpt_save_path,
        cfg["BATCH_SIZE"],
        cfg["NUM_EPOCH"],
        cfg["TB_PREFIX"],
        cfg["TB_COMMENT"],
        cfg["LR"],
        cfg["MOMENTUM"],
        cfg["DECAY"],
        0,
        cfg["USE_GPU"]
        ).main()

Model training on cpu
[TRAIN] Epoch: 0, Loss: 0.01, Accuracy/no speech: 1.00, Accuracy/speech: 0.90
[VAL] Epoch: 0, Loss: 0.03, Accuracy/no speech: 0.98, Accuracy/speech: 0.88
Validation loss decreased (inf --> 0.025523).  Saving model ...
[TRAIN] Epoch: 1, Loss: 0.01, Accuracy/no speech: 1.00, Accuracy/speech: 0.94
[VAL] Epoch: 1, Loss: 0.02, Accuracy/no speech: 0.98, Accuracy/speech: 0.92
Validation loss decreased (0.025523 --> 0.024808).  Saving model ...
[TRAIN] Epoch: 2, Loss: 0.01, Accuracy/no speech: 1.00, Accuracy/speech: 0.90
[VAL] Epoch: 2, Loss: 0.03, Accuracy/no speech: 0.98, Accuracy/speech: 1.00
EarlyStopping counter: 1 out of 10
[TRAIN] Epoch: 3, Loss: 0.01, Accuracy/no speech: 1.00, Accuracy/speech: 0.95
[VAL] Epoch: 3, Loss: 0.02, Accuracy/no speech: 0.99, Accuracy/speech: 0.95
Validation loss decreased (0.024808 --> 0.022199).  Saving model ...
[TRAIN] Epoch: 4, Loss: 0.01, Accuracy/no speech: 1.00, Accuracy/speech: 1.00
[VAL] Epoch: 4, Loss: 0.02, Accuracy/no speech: