In [1]:
import os
import numpy as np
import random
import scipy.io.wavfile as wav
from python_speech_features import mfcc
from tqdm import tqdm

Define list of keywords

In [2]:
keywords = ['up', 'down', 'left', 'right']

Get filenames of audio to be used for negative samples

In [3]:
path = os.path.abspath("Datasets/TEDLIUM_release1/train/wav/")
filenames = []

for _, _, files in os.walk(path):
    for file in files:
        filenames.append(os.path.join(path, file))

Define some helper functions

In [4]:
def compute_mfcc(signal, num_features=40, min_window_size=98): 

    features = mfcc(signal, samplerate=16000, winlen=0.030, winstep=0.01, numcep=num_features, 
                         lowfreq=20, highfreq=4000, appendEnergy=False, nfilt=num_features)
    if features.shape[0] <= min_window_size:
        nb_samples = min_window_size - features.shape[0]
        features = np.concatenate((np.zeros((nb_samples, num_features)), features), axis=0)    
    
    return features

def extract_frames(features, shift=5, max_frames=20, frame_length=98, n_mffc=40):
    frames = []
    window_size = features.shape[0]
    enough_samples = True
    current_index = 0
    
    while enough_samples:
        if current_index + frame_length < window_size:
            frames.append(features[current_index: current_index + frame_length, :])
            current_index += shift
        else:
            frames.append(features[-frame_length:,:])
            enough_samples = False
    if len(frames) > max_frames:
        frames = random.sample(frames, k=max_frames)
        
    return frames
    

Define function that generates negative samples for model to train on

In [5]:
def generate_negative_samples(filenames, keywords, n_files=30, n_sentences=10, max_frames=20, shift=1, frame_shape=(98, 40)):
    
    sampled_filenames = random.sample(filenames, k=n_files)
    negative_samples = []
    
    for file in tqdm(sampled_filenames):
        
        windows = [] # list of tuples which indicates the begining and end of each sentence in the wav file
        fs, signal = wav.read(file)
        
        # Check that none of the keywords are present within the sentences collected
        with open(file.replace('wav', 'stm'), 'rt') as f:
            records = f.readlines()
            if n_sentences > len(records):
                sampled_sentences = records
            else:
                sampled_sentences = random.sample(records, k=n_sentences)
            for sentence in sampled_sentences:
                fields = sentence.split()
                label = fields[6:]
                if not any(kw in label for kw in keywords):
                    start, end = float(fields[3]), float(fields[4])
                    windows.append((start, end))
        
        # Compute 20 frames for each sentence
        for window in windows:
            start = int(window[0] * fs)
            end = int(window[1] * fs)
            if end > len(signal):
                end = len(signal)
                
            mfcc_features = compute_mfcc(signal[start:end], num_features=frame_shape[1])
            frames = extract_frames(mfcc_features, shift=shift, max_frames=max_frames, frame_length=frame_shape[0], n_mffc=frame_shape[1])
                
            negative_samples.extend(frames)    
                
    return negative_samples

In [6]:
negative_samples = generate_negative_samples(filenames, keywords, n_files=100, n_sentences=10, max_frames=60, shift=1, frame_shape=(98, 40))

100%|██████████| 100/100 [00:21<00:00,  4.73it/s]


In [8]:
any(s.shape[0] != 98 for s in negative_samples)

False

In [9]:
len(negative_samples)

49153

In [10]:
def save_negative_samples(neg_samples, folder_name, path):
    
    if not os.path.exists(os.path.join(path, folder_name)):
        path2neg_samples = os.path.join(path, folder_name)
        os.mkdir(path2neg_samples)
    
    for i, sample in enumerate(neg_samples):
        filename = 'neg_frame_' + str(i)
        np.save(os.path.join(path2neg_samples, filename), sample, allow_pickle=False)
    

In [11]:
save_negative_samples(negative_samples, folder_name='negative_samples', path='/aimlx/Datasets/' )