## Generate npy for YAMNET input

#### Setup

In [3]:
import os
os.sys.path.append('./audioset/yamnet')
import params
import features as features_lib

import numpy as np
import resampy
import soundfile as sf
import tensorflow as tf

# Directory with wav files for each class
train_dir = 'I:\datasets/vehicle/dataset/test_set/slice/'
# Directory that will store the spectrogram inputs for each class
out_dir = 'I:\datasets/vehicle/dataset/test_set/npy_mov_avg/'

if not os.path.exists(out_dir):
    os.mkdir(out_dir)
    for i in os.listdir(train_dir):
        os.mkdir(out_dir+i)

#### Function for moving average in waveform

In [4]:
def moving_average(x, w):
    return np.convolve(x, np.ones(w), 'valid') / w

#### Generate npy (90-s test set)

In [None]:
patch_sample = []
moving_avg = True

for site in os.listdir(train_dir):
    print(site)
    for time in os.listdir(train_dir+site):
        audio_path = train_dir + site + '/' + time + '/'
        dst_path = out_dir + site + '/' + time + '/'
        if not os.path.exists(dst_path):
            os.makedirs(dst_path)
        for smp in os.listdir(audio_path):
            wav_data, sr = sf.read(audio_path + smp, dtype=np.int16)
            assert wav_data.dtype == np.int16, 'Bad sample type: %r' % wav_data.dtype
            if(moving_avg):
                wav_data = moving_average(wav_data, 3)
            input_width = round(sr*.975)
            shft = max(0, round((len(wav_data)-input_width)/2))
            wav_data = wav_data[shft:shft+input_width]
            waveform = wav_data / 32768.0
            
            if len(waveform.shape) > 1:
                waveform = np.mean(waveform, axis=1)
            if sr != params.SAMPLE_RATE:
                waveform = resampy.resample(waveform, sr, params.SAMPLE_RATE)
                
            waveform = np.reshape(waveform, [1, -1]).astype(np.float32)
            spectrogram = features_lib. \
                          waveform_to_log_mel_spectrogram(tf.squeeze(waveform, axis=0), 
                                                          params)
            
            patches = features_lib. \
                        spectrogram_to_patches(spectrogram, params)
            patch_sample = tf.squeeze(patches)
            np.save(dst_path+'/'+smp+'.npy', tf.squeeze(patches))

print(patch_sample.shape)
print(len(patch_sample))

pooks-26-may-2020


#### Generate npy (normal dataset)

In [None]:
for cls in os.listdir(train_dir):
    print(cls)
    for smp in os.listdir(train_dir+cls):
        wav_data, sr = sf.read(train_dir+cls+'/'+smp, dtype=np.int16)
        assert wav_data.dtype == np.int16, 'Bad sample type: %r' % wav_data.dtype
        input_width = round(sr*.975)
        shft = max(0, round((len(wav_data)-input_width)/2))
        wav_data = wav_data[shft:shft+input_width]
        waveform = wav_data / 32768.0
        
        if len(waveform.shape) > 1:
            waveform = np.mean(waveform, axis=1)
        if sr != params.SAMPLE_RATE:
            waveform = resampy.resample(waveform, sr, params.SAMPLE_RATE)
            
        waveform = np.reshape(waveform, [1, -1]).astype(np.float32)
        spectrogram = features_lib. \
                      waveform_to_log_mel_spectrogram(tf.squeeze(waveform, axis=0), 
                                                      params)
        patches = features_lib. \
                  spectrogram_to_patches(spectrogram, params)
        
        
        np.save(out_dir+cls+'/'+smp+'.npy', tf.squeeze(patches))