In [8]:
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 28 19:55:26 2019

@author: ericl
"""

import os
import librosa
import numpy as np
import scipy
import matplotlib.pyplot as plt
from scipy import signal
from scipy.io import wavfile
import pandas as pd
from tqdm import tqdm, tqdm_notebook
import platform

if platform.system() == 'Linux':
    #These two dir are used to track the name of the files we want to convert, as only 200 test audios were selected, we track them from
    # the ones generated with matlab
    clean_train_folder = './Noise_Addition/timit_128/timit/train'
    clean_test_folder='./Noise_Addition/timit_128/timit/test'

    #These are the folders where we have our noisy data stored
    noisy_test_folder = './Noise_Addition/results/test'
    noisy_train_folder = './Noise_Addition/results/train'

    #output stft features in numpy form and save in below %dirs
    output_test_folder='./features/test'
    output_train_folder='./features/train'

    #the type of noise and SNR we want to deal with, add in dirs to ensure consistency
    noisy_types=['babble','white','factory1','hfchannel']
    SNRs=['5db','10db','15db','20db','0db','-5db']

else:
    #These two dir are used to track the name of the files we want to convert, as only 200 test audios were selected, we track them from
    # the ones generated with matlab
    clean_train_folder = r'.\Noise_Addition\timit_128\timit\train'
    clean_test_folder=r'.\Noise_Addition\results\test\babble\0db'

    #These are the folders where we have our noisy data stored
    noisy_test_folder = r'.\Noise_Addition\results\test'
    noisy_train_folder = r'.\Noise_Addition\results\train'

    #output stft features in numpy form and save in below dirs
    output_test_folder=r'.\features\test'
    output_train_folder=r'.\features\train'

    #the type of noise and SNR we want to deal with, add in dirs to ensure consistency
    noisy_types=[r'\babble',r'\white',r'\factory1',r'\hfchannel']
    SNRs=[r'\5db',r'\10db',r'\15db',r'\20db',r'\0db',r'\-5db']
    #SNRs=[r'\5db']
    #noisy_types=[r'\babble']


window_size = 2 ** 14  # about 1 second of samples
#sample_rate = 16000


def saveConvert_info(file):
    """
    input a wav file, return np array after stft
    """
    
    y, fs = librosa.load(file, sr=8000)
    D = librosa.core.stft(y, n_fft = 128)
    #sample_rate, samples = wavfile.read(file)
    #x=scipy.signal.stft(samples,sample_rate)
    D_a = np.abs(D)
    D_db = librosa.core.amplitude_to_db(D_a, ref=np.max)
    phase=np.angle(D)
    mean=np.mean(D_db,1)
    std=np.std(D_db,1)
    max_value=np.max(D_a)
    return [phase,mean,std,max_value]

def saveConvert_data(file):
    """
    input a wav file, return np array after stft
    """
    y, fs = librosa.load(file, sr=8000)
    D = librosa.core.stft(y, n_fft = 128)
    D_a = np.abs(D)

    D_db = librosa.core.amplitude_to_db(D_a, ref=np.max)
    return D_db
    

def normalize(data):
    """
    normalize data by each row
    
    intype: np array (n_fft // 2 + 1) * n
    rtype: np array (n_fft // 2 + 1) * n
    
    """
    #this function should not be utilized until we get the mean and std of our data
    return (data-np.mean(data,axis=1).reshape(-1, 1)) / np.std(data,axis=1).reshape(-1, 1)

test_dict={}
def processData(data_type):
    """
    Serialize, down-sample the sliced signals and save on separate folder.
    """
    mean=np.array([])
    count=0
    
    for snr in SNRs:
        for noise in noisy_types:
            #max_idxs=[]
            
            if data_type == 'train':
                clean_folder = clean_train_folder
                noisy_folder = noisy_train_folder+noise+snr
                serialized_folder = output_train_folder+noise+snr
            else:
                clean_folder = clean_test_folder
                noisy_folder = noisy_test_folder+noise+snr
                serialized_folder = output_test_folder+noise+snr
            if not os.path.exists(serialized_folder):
                os.makedirs(serialized_folder)
            
            #clean_folder = clean_test_folder
            #noisy_folder = noisy_test_folder+noise+snr
            
            """
            #this piece of code is used to generate converted data along with their phases,angle,etc.
            for root, dirs, files in os.walk(clean_folder):
                if len(files) == 0:
                    continue
                #print('current folder',dirs)
                for filename in tqdm(files, desc='Converting {} audios'.format(data_type)):
                    noisy_file = os.path.join(noisy_folder, filename)
                    if '.wav' in filename:
                    #[phase,mean,std,max_value]


                        data=saveConvert_data(noisy_file)
                        data=normalize(data)
                        (a,b,c,d)=saveConvert_info(noisy_file)
                        test_dict['phase']=a
                        test_dict['mean']=b
                        test_dict['std']=c
                        test_dict['max_value']=d
                        test_dict['data']=data
                        np.save(os.path.join(serialized_folder, '{}'.format(filename)),test_dict)
                        #np.save(os.path.join(serialized_folder, '{}'.format(filename)), arr=test_dict)
                        #print(noisy_file)

                        
            """
            
            for root, dirs, files in os.walk(clean_folder):
                for filename in tqdm_notebook(files, desc='Converting {} audios'.format(data_type)):
                    if '.wav' in filename:
                        noisy_file = os.path.join(noisy_folder, filename)
                        converted_noisy=saveConvert_data(noisy_file)
                        
                        #get the mean
                        if len(mean)==0:
                            mean=np.sum(converted_noisy,axis=1)
                        else:
                            mean+=np.sum(converted_noisy,axis=1)
                        
                        count+=len(converted_noisy[0])
                            
                        #normalization of data will be performed at the training stage
                        #test=normalize(converted_noisy)
                        np.save(os.path.join(serialized_folder, '{}'.format(filename)), arr=converted_noisy)
                        #print('saving dir',serialized_folder)
            

                    #print(np.sum(np.isnan(converted_noisy)))
                    #max_idxs.append((filename,converted_noisy.shape[1]))
            mean = mean / count
            np.save('test_mean.npy',mean)
            #print(serialized_folder)
            
    

In [9]:
processData('train')



Converting train audios:   0%|                                                                | 0/4620 [00:00<?, ?it/s]

Converting train audios:   0%|▏                                                     | 20/4620 [00:00<00:23, 194.64it/s]

Converting train audios:   1%|▍                                                     | 38/4620 [00:00<00:24, 188.97it/s]

Converting train audios:   1%|▋                                                     | 57/4620 [00:00<00:24, 188.87it/s]

Converting train audios:   2%|▉                                                     | 76/4620 [00:00<00:24, 188.22it/s]

Converting train audios:   2%|█                                                     | 91/4620 [00:00<00:33, 134.97it/s]

Converting train audios:   2%|█▎                                                   | 111/4620 [00:00<00:30, 147.99it/s]

Converting train audios:   3%|█▍                                                   | 126/4620 [00:00<00:37, 120.70it/s]

Converting train audios:   3%|

KeyboardInterrupt: 

In [2]:

#get the mean and std for each feature, and then feed in normalized ones only in the traininig process, done by pytorch
def get_std(data_type):
    """
    Serialize, down-sample the sliced signals and save on separate folder.
    """
    data=np.array([])
    for snr in SNRs:
        for noise in noisy_types:
            #max_idxs=[]
            
            if data_type == 'train':
                clean_folder = clean_train_folder
                noisy_folder = noisy_train_folder+noise+snr
                serialized_folder = serialized_train_folder+noise+snr
            else:
                clean_folder = clean_test_folder
                noisy_folder = noisy_test_folder+noise+snr
                serialized_folder = serialized_test_folder+noise+snr
            if not os.path.exists(serialized_folder):
                os.makedirs(serialized_folder)
            
            #clean_folder = clean_test_folder
            #noisy_folder = noisy_test_folder+noise+snr
            phase_max=0
            phase_min=0
            
            for root, dirs, files in os.walk(clean_folder):
                if len(files) == 0:
                    continue
                #print('current folder',dirs)
                for filename in tqdm_notebook(files, desc='Converting {} audios'.format(data_type)):
                    noisy_file = os.path.join(noisy_folder, filename)
                    if '.wav' in filename:
                    #[phase,mean,std,max_value]
                        if len(data)==0:
                            data=saveConvert_data(noisy_file)
                        else:
                            data=np.hstack((data,saveConvert_data(noisy_file)))



    return data


In [97]:
data=get_std('s')

Converting s audios: 100%|██████████████████████████████████████████████████████████| 201/201 [00:01<00:00, 159.55it/s]
Converting s audios: 100%|███████████████████████████████████████████████████████████| 201/201 [00:03<00:00, 54.32it/s]
Converting s audios: 100%|███████████████████████████████████████████████████████████| 201/201 [00:05<00:00, 36.27it/s]
Converting s audios: 100%|███████████████████████████████████████████████████████████| 201/201 [00:07<00:00, 27.62it/s]
Converting s audios: 100%|███████████████████████████████████████████████████████████| 201/201 [00:08<00:00, 22.49it/s]
Converting s audios: 100%|███████████████████████████████████████████████████████████| 201/201 [00:10<00:00, 18.56it/s]
Converting s audios: 100%|███████████████████████████████████████████████████████████| 201/201 [00:12<00:00, 15.78it/s]
Converting s audios: 100%|███████████████████████████████████████████████████████████| 201/201 [00:14<00:00, 13.92it/s]
Converting s audios: 100%|██████████████