This notebook computes the average SNR a given Voice Dataset. If the SNR is too low, that might reduce the performance or prevent model to learn.

To use this notebook, you need:
- WADA SNR estimation: http://www.cs.cmu.edu/~robust/archive/algorithms/WADA_SNR_IS_2008/
    1. extract in the same folder as this notebook
    2. under MacOS you'll have to rebuild the executable. In the build folder: 1) remove existing .o files and 2) run make


- FFMPEG: ```sudo apt-get install ffmpeg ```     


In [None]:
# !wget https://www.cs.cmu.edu/~robust/archive/algorithms/WADA_SNR_IS_2008/WadaSNR.tar.gz

In [None]:
!tar -xzf  WadaSNR.tar.gz

In [None]:
ls

In [None]:
import os, sys
import glob
import subprocess
import tempfile
import IPython
import soundfile as sf
import numpy as np
from tqdm import tqdm
import random
import librosa
from multiprocessing import Pool
from matplotlib import pylab as plt


In [None]:
# !pip install tqdm

In [None]:
# Set the meta parameters

DATA_PATH = "../../data/commonvoice/en/"
NUM_PROC = 100
CURRENT_PATH = os.getcwd()


In [None]:
def compute_file_snr(file_path):
    """ Convert given file to required format with FFMPEG and process with WADA."""
#     _, sr = sf.read(file_path)
    new_file = file_path.replace(".wav", "_tmp.wav")
#     if sr != 16000:
#         command = f'ffmpeg -i "{file_path}" -ac 1 -acodec pcm_s16le -y -ar 16000 "{new_file}"'
#     else:
#         command = f'cp "{file_path}" "{new_file}"'
        
    command = f'ffmpeg -i "{file_path}" -ac 1 -acodec pcm_s16le -y -ar 16000 "{new_file}"'    
    os.system(command)
    
    try:
        command = [f'"{CURRENT_PATH}/WadaSNR/Exe/WADASNR"', f'-i "{new_file}"', f'-t "{CURRENT_PATH}/WadaSNR/Exe/Alpha0.400000.txt"', '-ifmt mswav']
        output = subprocess.check_output(" ".join(command), shell=True)
        try:
            output = float(output.split()[-3].decode("utf-8"))
        except:
            raise RuntimeError(" ".join(command))
        os.system(f'rm "{new_file}"')
        
    except:
        output = float(np.nan)
    return output, file_path


In [None]:
wav_files = list(glob.glob(f"{DATA_PATH}/*.wav"))[:5]
print(f" > Number of wav files {len(wav_files)}")

In [None]:
# import time
# file_snrs = []
# for i in range(0, len(wav_files)-step, step):
#     wav_files_cut = wav_files[i:i+step]
if NUM_PROC == 1:
    file_snrs = [None] * len(wav_files) 
    for idx, wav_file in tqdm(enumerate(wav_files)):
        tup = compute_file_snr(wav_file)
        file_snrs[idx] = tup
else:
    with Pool(NUM_PROC) as pool:
        file_snrs = list(tqdm(pool.imap(compute_file_snr, wav_files), total=len(wav_files)))
            
    
#     time.sleep(60)

In [None]:
len(file_snrs)

In [None]:
file_snrs

In [None]:
unique_file_names = list(set(file_names))
len(unique_file_names)

In [None]:
file_names = [f for f in file_names if "_tmp" not in f]

In [None]:
len(file_names)

In [None]:
snrs = [tup[0] for tup in file_snrs if "_tmp" not in tup[1] ]

error_idxs = np.where(np.isnan(np.array(snrs, dtype=float)) == True)[0]
file_names = [tup[1] for tup in file_snrs if "_tmp" not in tup[1]]

error_files = [file_names[idx] for idx in error_idxs]

file_snrs = [i for j, i in enumerate(file_snrs) if (j not in error_idxs) and ( "_tmp" not in i[1])]


snrs = [tup[0] for tup in file_snrs if "_tmp" not in tup[1]]
file_idxs = np.argsort(snrs)


print(f" > Average SNR of the dataset:{np.mean(snrs)}")

In [None]:
l = [i for i in snrs if i>15 ]
len(snrs)-len(l)

In [None]:
snrs

In [None]:
len(snrs)

In [None]:
def output_snr_with_audio(idx):
    file_idx = file_idxs[idx]
    file_name = file_names[file_idx]
    wav, sr = sf.read(file_name)
    # multi channel to single channel
    if len(wav.shape) == 2:
        wav = wav[:, 0]
    print(f" > {file_name} - snr:{snrs[file_idx]}")
    IPython.display.display(IPython.display.Audio(wav, rate=sr))

In [None]:
# find worse SNR files
N = 10  # number of files to fetch
for i in range(N):
    output_snr_with_audio(i)
    

In [None]:
# find best recordings
N = 10  # number of files to fetch
for i in range(N):
    output_snr_with_audio(-i-1)

In [None]:
plt.hist(snrs, bins=100)

In [None]:
def output_snr_with_audio(idx, max_thr, min_thr):
    file_idx = file_idxs[idx]
    file_name = file_names[file_idx]
    wav, sr = sf.read(file_name)
    snr = snrs[file_idx]
    if snr > min_thr and snr < max_thr:
        # multi channel to single channel
        if len(wav.shape) == 2:
            wav = wav[:, 0]


        print(f" > {file_name} - snr:{snrs[file_idx]}")
        IPython.display.display(IPython.display.Audio(wav, rate=sr))

In [None]:
len(file_names)

In [None]:
# find best recordings
N = len(file_names)  # number of files to fetch
for i in range(N):
    output_snr_with_audio(i, 16, 0)

## Remove noisy clips

In [None]:
def output_snr_with_audio(idx):
    file_idx = file_idxs[idx]
    file_name = file_names[file_idx]
    snr = snrs[file_idx]
    if snr > 15:
        return file_name
    else:
        return None

In [None]:
len([i for i in snrs if i>15])

In [None]:
len([i for i in snrs if i<15])

In [None]:
N = len(file_idxs)  # number of files to fetch
with Pool(NUM_PROC) as pool:
    clean_wav_files  = list(tqdm(pool.imap(output_snr_with_audio, list(range(N))), total=N))

In [None]:
len(clean_wav_files)

In [None]:
clean_wav_files = list(filter((None).__ne__, clean_wav_files))

In [None]:
len(clean_wav_files)

# Move clean clips to another folder

In [None]:
from shutil import copyfile
import pathlib


In [None]:
dst = "VCTK-Corpus/wav22_clean/"

In [None]:
def move_clips(src_wav):
    src_wav = src_wav.replace("_tmp", "")
    wav_path = src_wav.replace("wav22", "wav22_clean")
    dirs = wav_path.split('/')
    spkr_path = "/".join([f for f in dirs[:-2]])
    
    pathlib.Path(spkr_path).mkdir(parents=True, exist_ok=True) 
    spkr_path = "/".join([f for f in dirs[:-1]])
    pathlib.Path(spkr_path).mkdir(parents=True, exist_ok=True)

    copyfile(src_wav, wav_path)
 

In [None]:
clean_wav_files[0]

In [None]:
path = "VCTK-Corpus/wav22_clean/"
os.path.exists(path)

In [None]:
with Pool(NUM_PROC) as pool:
    l = list(tqdm(pool.imap(move_clips, clean_wav_files), total=len(clean_wav_files)))