In [1]:
%load_ext autoreload
%autoreload 2

# Import config file and set the path

In this notebook we have to manually set path as `./notebooks` is a subfolder 

In [2]:
import glob
import os
import soundfile

import yaml
from yaml import FullLoader

# Open the config file
with open("../config_inference.yaml") as f:
    cfg = yaml.load(f, Loader=FullLoader)

In [3]:
path_input_data = "../assets/demo_data/data_to_anonymised"
path_json_detections = "../assets/demo_data/detections/json/"
ecovad_weights_path = "../assets/model_weights/ecoVAD_ckpt.pt"
path_anonymised_data = "../assets/demo_data/anonymised_data"

In [4]:
# List the folder with files that needs predictions
types = ('/**/*.WAV', '/**/*.wav', '/**/*.mp3') # the tuple of file types
audiofiles= []
for files in types:
    audiofiles.extend(glob.glob(path_input_data + files, recursive=True))

print("Found {} files to analyze".format(len(audiofiles)))

Found 6 files to analyze


# Demo on using the different VAD algorithms

In this first section we demonstrate how to use the VAD algorithms to detect human speech in soundscapes. We standardized the output of the VAD models so that a `.json` file is created for each input file. The demo output can be found in `./assets/demo_data/detections/json`

### Predict with Pyannote

Pyannote brings good performance overall even though it returns more false positive on environmental soundscape. 

In [5]:
from pyannote.audio import Pipeline
from VAD_algorithms.pyannote.pyannote_predict import PyannotePredict

# Load the pyannote model
pipeline = Pipeline.from_pretrained("pyannote/voice-activity-detection")

# Make the prediction
for audiofile in audiofiles:
    out_name = audiofile.split('/')[-1].split('.')[0]
    out_path = os.sep.join([path_json_detections, "pyannote",  out_name])
    PyannotePredict(pipeline, audiofile, out_path).main()

  from .autonotebook import tqdm as notebook_tqdm
Downloading: 100%|██████████| 277/277 [00:00<00:00, 199kB/s]
Downloading: 100%|██████████| 17.7M/17.7M [00:00<00:00, 57.6MB/s]
Downloading: 100%|██████████| 1.98k/1.98k [00:00<00:00, 1.03MB/s]


### Predict with ecoVAD

ecoVAD does best on environmental soundscapes but might return more false positive on urban soundcape

In [6]:
from VAD_algorithms.ecovad.ecoVAD_predict import ecoVADpredict

for audiofile in audiofiles:
    out_name = audiofile.split('/')[-1].split('.')[0]
    out_path = os.sep.join([path_json_detections,  "ecoVAD", out_name])

    ecoVADpredict(audiofile, 
                out_path,
                ecovad_weights_path,
                cfg["THRESHOLD"],
                cfg["USE_GPU"]).main()

### Predict with webrtcVAD

Webrtc VAD does not perform as well as pyannote or ecoVAD but is able to do predictions very fast.

In [7]:
from VAD_algorithms.webrtcvad.webrtc_predict import WebrtcvadPredict

for audiofile in audiofiles:
    out_name = audiofile.split('/')[-1].split('.')[0]
    out_path = os.sep.join([path_json_detections,  "webrtcVAD", out_name])

    WebrtcvadPredict(audiofile, 
                    out_path,
                    cfg["FRAME_LENGTH"],
                    cfg["AGGRESSIVENESS"]).main()

# Data anonymisation

Now that the VAD models have detected the human speech in the audio files, we can use the resulting `.json` to anonymise the audio. In our case, the anonymisation script consists in removing those segments where human speech has been detected.

In [8]:
from anonymise_data import parseFolders, audio_anonymisation

parsed_folders = parseFolders(path_input_data, path_json_detections)

# Anonymise the files
for i in range(len(parsed_folders)):

    afile = parsed_folders[i]['audio']
    rfile = parsed_folders[i]['result']

    audio_name = afile.split("/")[-1].split(".")[0]
    save_name = os.sep.join([path_anonymised_data, "ANONYMISED_" + audio_name])

    anonymised_arr, sr = audio_anonymisation(rfile, afile)
    soundfile.write(save_name + ".wav", anonymised_arr, sr)
    # Notebook result in an error but the files are anonymised 
    # The error does not occur with the main script

Found 18 audio files with valid result file.


TypeError: list indices must be integers or slices, not str