In [1]:
import pickle
from glob import iglob
import numpy as np
import librosa
from shutil import rmtree
from constants import *
import speech_recognition as speech_r

DATA_AUDIO_DIR = '../augmentation3_train_data'
DATA_RESAMPLED_AUDIO_DIR = '../resampled_augmentation3_1_train_data'

list_dir = os.listdir(DATA_AUDIO_DIR)
list_dir.sort()

r = speech_r.Recognizer() 

OUTPUT_DIR = '../output_una_re_augmentation3_1'
OUTPUT_DIR_TRAIN = os.path.join(OUTPUT_DIR, 'train')
OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR, 'test')
OUTPUT_DIR_RESAMPLED_TRAIN = os.path.join(OUTPUT_DIR, 'resampled_train')
OUTPUT_DIR_RESAMPLED_TEST = os.path.join(OUTPUT_DIR, 'resampled_test')

PAD_SIZE = 0
TARGET_LENGTH = 200000
TARGET_SR = 8000
MAX_AUDIO_LENGTH = 200
AUDIO_LENGTH = TARGET_LENGTH
SEGMENT_NUM = 1

In [2]:
def mkdir_p(path):
    import errno
    try:
        os.makedirs(path)
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise


def del_folder(path):
    try:
        rmtree(path)
    except:
        pass

### Delete pickle file & Make directory

In [3]:
del_folder(OUTPUT_DIR_TRAIN)
del_folder(OUTPUT_DIR_TEST)
mkdir_p(OUTPUT_DIR_TRAIN)
mkdir_p(OUTPUT_DIR_TEST)

In [4]:
del_folder(OUTPUT_DIR_RESAMPLED_TRAIN)
del_folder(OUTPUT_DIR_RESAMPLED_TEST)
mkdir_p(OUTPUT_DIR_RESAMPLED_TRAIN)
mkdir_p(OUTPUT_DIR_RESAMPLED_TEST)

In [5]:
del_folder(DATA_RESAMPLED_AUDIO_DIR)
mkdir_p(DATA_RESAMPLED_AUDIO_DIR)

## Audio Resampling 

음성 길이를 TARGET_LENGTH로 맞추기 위한 전처리 진행   
Default Samplerate는 TARGET_SR 이며, TARGET_SR로 Audio를 load 하여 Audio Duration을 측정한다.  

**Audio Duration X Sample Rate = Result Audio Length**   

Audio Duration과 Result Audio Length(TARGET_LENGTH)를 통해 Sample Rate를 구한다.  


In [6]:
def resampling_audio():
     for i, wav_filename in enumerate(iglob(os.path.join(DATA_AUDIO_DIR, '**/**.wav'), recursive=True)):
        print(wav_filename)
        y, sr = librosa.load(wav_filename, sr=TARGET_SR, mono=True, duration=MAX_AUDIO_LENGTH)
        audio_duration = len(y) / sr
        
        resample_sr = TARGET_LENGTH / audio_duration        
        resample = librosa.resample(y, sr, resample_sr)
        print(i, wav_filename)
        print("Default SR : {}, Audio Length(Default SR) : {}, Audio Duration : {}".format(TARGET_SR, len(y), audio_duration))
        print("Audio Duration : {}, Resampling SR : {}, Result Audio Length : {}".format(audio_duration, resample_sr, len(resample)))
        
        if len(resample) < TARGET_LENGTH:
            resample = np.concatenate((resample, np.zeros(shape=(TARGET_LENGTH - len(resample), 1))))
        resample = resample[:TARGET_LENGTH]
        
#         print("Final Audio Length : {}".format(len(resample)))

        
        output_folder = DATA_RESAMPLED_AUDIO_DIR
        if not os.path.exists(os.path.join(DATA_RESAMPLED_AUDIO_DIR, wav_filename.split('/')[2])):
                mkdir_p(os.path.join(DATA_RESAMPLED_AUDIO_DIR, wav_filename.split('/')[2]))
                
        output_filename = os.path.join(DATA_RESAMPLED_AUDIO_DIR, wav_filename.split('/')[2], str(wav_filename.replace('.','/').split('/')[5])+".pkl")
        print("Output File Name: {}\n".format(output_filename))
        
        result = {'resampled_audio': resample,
               'resample_sr': resample_sr}
        
#         np.savetxt( DATA_RESAMPLED_AUDIO_DIR+"/"+str(wav_filename.replace('.','/').split('/')[5])+".txt", resample, delimiter=',')
        
        with open(output_filename, 'wb') as w:
                  pickle.dump(result, w)


In [7]:
resampling_audio()

../augmentation3_train_data/2017019770040_kimdajeong/5.wav
0 ../augmentation3_train_data/2017019770040_kimdajeong/5.wav
Default SR : 8000, Audio Length(Default SR) : 201088, Audio Duration : 25.136
Audio Duration : 25.136, Resampling SR : 7956.7154678548695, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770040_kimdajeong/5.pkl

../augmentation3_train_data/2017019770040_kimdajeong/4.wav
1 ../augmentation3_train_data/2017019770040_kimdajeong/4.wav
Default SR : 8000, Audio Length(Default SR) : 195712, Audio Duration : 24.464
Audio Duration : 24.464, Resampling SR : 8175.277959450622, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770040_kimdajeong/4.pkl

../augmentation3_train_data/2017019770040_kimdajeong/8.wav
2 ../augmentation3_train_data/2017019770040_kimdajeong/8.wav
Default SR : 8000, Audio Length(Default SR) : 298514, Audio Duration : 37.31425
Audio Duration : 37.31425, Resampling SR : 53

23 ../augmentation3_train_data/2017019740042_shinbeom/6.wav
Default SR : 8000, Audio Length(Default SR) : 176780, Audio Duration : 22.0975
Audio Duration : 22.0975, Resampling SR : 9050.797601538636, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740042_shinbeom/6.pkl

../augmentation3_train_data/2017019740042_shinbeom/7.wav
24 ../augmentation3_train_data/2017019740042_shinbeom/7.wav
Default SR : 8000, Audio Length(Default SR) : 170954, Audio Duration : 21.36925
Audio Duration : 21.36925, Resampling SR : 9359.242837254465, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740042_shinbeom/7.pkl

../augmentation3_train_data/2017019740042_shinbeom/3.wav
25 ../augmentation3_train_data/2017019740042_shinbeom/3.wav
Default SR : 8000, Audio Length(Default SR) : 254976, Audio Duration : 31.872
Audio Duration : 31.872, Resampling SR : 6275.100401606425, Result Audio Length : 200000
Output File Name: ../re

45 ../augmentation3_train_data/2017019880008_jangsoojin/3.wav
Default SR : 8000, Audio Length(Default SR) : 222079, Audio Duration : 27.759875
Audio Duration : 27.759875, Resampling SR : 7204.643392666573, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880008_jangsoojin/3.pkl

../augmentation3_train_data/2017019880008_jangsoojin/2.wav
46 ../augmentation3_train_data/2017019880008_jangsoojin/2.wav
Default SR : 8000, Audio Length(Default SR) : 223669, Audio Duration : 27.958625
Audio Duration : 27.958625, Resampling SR : 7153.427609548037, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880008_jangsoojin/2.pkl

../augmentation3_train_data/2017019880008_jangsoojin/10.wav
47 ../augmentation3_train_data/2017019880008_jangsoojin/10.wav
Default SR : 8000, Audio Length(Default SR) : 169751, Audio Duration : 21.218875
Audio Duration : 21.218875, Resampling SR : 9425.570394283392, Result Audio Length : 20

68 ../augmentation3_train_data/2017019770029_jueunhong/1.wav
Default SR : 8000, Audio Length(Default SR) : 240064, Audio Duration : 30.008
Audio Duration : 30.008, Resampling SR : 6664.889362836577, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770029_jueunhong/1.pkl

../augmentation3_train_data/2017019770029_jueunhong/9.wav
69 ../augmentation3_train_data/2017019770029_jueunhong/9.wav
Default SR : 8000, Audio Length(Default SR) : 183040, Audio Duration : 22.88
Audio Duration : 22.88, Resampling SR : 8741.258741258742, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770029_jueunhong/9.pkl

../augmentation3_train_data/2017019740021_kwakbokyeong/5.wav
70 ../augmentation3_train_data/2017019740021_kwakbokyeong/5.wav
Default SR : 8000, Audio Length(Default SR) : 330923, Audio Duration : 41.365375
Audio Duration : 41.365375, Resampling SR : 4834.961607382987, Result Audio Length : 200000
Output File 

90 ../augmentation3_train_data/2017019880002_kimkihyeon/5.wav
Default SR : 8000, Audio Length(Default SR) : 190682, Audio Duration : 23.83525
Audio Duration : 23.83525, Resampling SR : 8390.933596249253, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880002_kimkihyeon/5.pkl

../augmentation3_train_data/2017019880002_kimkihyeon/4.wav
91 ../augmentation3_train_data/2017019880002_kimkihyeon/4.wav
Default SR : 8000, Audio Length(Default SR) : 210045, Audio Duration : 26.255625
Audio Duration : 26.255625, Resampling SR : 7617.41531576567, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019880002_kimkihyeon/4.pkl

../augmentation3_train_data/2017019880002_kimkihyeon/8.wav
92 ../augmentation3_train_data/2017019880002_kimkihyeon/8.wav
Default SR : 8000, Audio Length(Default SR) : 292776, Audio Duration : 36.597
Audio Duration : 36.597, Resampling SR : 5464.928819302128, Result Audio Length : 200000
Output

113 ../augmentation3_train_data/2017019770012_ladakyeong/6.wav
Default SR : 8000, Audio Length(Default SR) : 192246, Audio Duration : 24.03075
Audio Duration : 24.03075, Resampling SR : 8322.669912507932, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770012_ladakyeong/6.pkl

../augmentation3_train_data/2017019770012_ladakyeong/7.wav
114 ../augmentation3_train_data/2017019770012_ladakyeong/7.wav
Default SR : 8000, Audio Length(Default SR) : 185518, Audio Duration : 23.18975
Audio Duration : 23.18975, Resampling SR : 8624.500048512813, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770012_ladakyeong/7.pkl

../augmentation3_train_data/2017019770012_ladakyeong/3.wav
115 ../augmentation3_train_data/2017019770012_ladakyeong/3.wav
Default SR : 8000, Audio Length(Default SR) : 232811, Audio Duration : 29.101375
Audio Duration : 29.101375, Resampling SR : 6872.527500848328, Result Audio Length : 20000

135 ../augmentation3_train_data/2017019880005_kimjihyeon/3.wav
Default SR : 8000, Audio Length(Default SR) : 245475, Audio Duration : 30.684375
Audio Duration : 30.684375, Resampling SR : 6517.975353905693, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880005_kimjihyeon/3.pkl

../augmentation3_train_data/2017019880005_kimjihyeon/2.wav
136 ../augmentation3_train_data/2017019880005_kimjihyeon/2.wav
Default SR : 8000, Audio Length(Default SR) : 227043, Audio Duration : 28.380375
Audio Duration : 28.380375, Resampling SR : 7047.1232321630705, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880005_kimjihyeon/2.pkl

../augmentation3_train_data/2017019880005_kimjihyeon/10.wav
137 ../augmentation3_train_data/2017019880005_kimjihyeon/10.wav
Default SR : 8000, Audio Length(Default SR) : 194954, Audio Duration : 24.36925
Audio Duration : 24.36925, Resampling SR : 8207.064230536434, Result Audio Length : 

157 ../augmentation3_train_data/2017019740025_ahnjeongsuk/10.wav
Default SR : 8000, Audio Length(Default SR) : 276617, Audio Duration : 34.577125
Audio Duration : 34.577125, Resampling SR : 5784.170893329043, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740025_ahnjeongsuk/10.pkl

../augmentation3_train_data/2017019740025_ahnjeongsuk/1.wav
158 ../augmentation3_train_data/2017019740025_ahnjeongsuk/1.wav
Default SR : 8000, Audio Length(Default SR) : 222080, Audio Duration : 27.76
Audio Duration : 27.76, Resampling SR : 7204.610951008645, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740025_ahnjeongsuk/1.pkl

../augmentation3_train_data/2017019740025_ahnjeongsuk/9.wav
159 ../augmentation3_train_data/2017019740025_ahnjeongsuk/9.wav
Default SR : 8000, Audio Length(Default SR) : 279771, Audio Duration : 34.971375
Audio Duration : 34.971375, Resampling SR : 5718.963009032387, Result Audio Length : 

180 ../augmentation3_train_data/2017019770008_parksomi/5.wav
Default SR : 8000, Audio Length(Default SR) : 207193, Audio Duration : 25.899125
Audio Duration : 25.899125, Resampling SR : 7722.268609460744, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770008_parksomi/5.pkl

../augmentation3_train_data/2017019770008_parksomi/4.wav
181 ../augmentation3_train_data/2017019770008_parksomi/4.wav
Default SR : 8000, Audio Length(Default SR) : 211399, Audio Duration : 26.424875
Audio Duration : 26.424875, Resampling SR : 7568.626152441591, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770008_parksomi/4.pkl

../augmentation3_train_data/2017019770008_parksomi/8.wav
182 ../augmentation3_train_data/2017019770008_parksomi/8.wav
Default SR : 8000, Audio Length(Default SR) : 303060, Audio Duration : 37.8825
Audio Duration : 37.8825, Resampling SR : 5279.482610704151, Result Audio Length : 200000
Output File 

202 ../augmentation3_train_data/2017019880012_kimsongyi/8.wav
Default SR : 8000, Audio Length(Default SR) : 301714, Audio Duration : 37.71425
Audio Duration : 37.71425, Resampling SR : 5303.035324844058, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880012_kimsongyi/8.pkl

../augmentation3_train_data/2017019880012_kimsongyi/6.wav
203 ../augmentation3_train_data/2017019880012_kimsongyi/6.wav
Default SR : 8000, Audio Length(Default SR) : 173506, Audio Duration : 21.68825
Audio Duration : 21.68825, Resampling SR : 9221.583115281315, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880012_kimsongyi/6.pkl

../augmentation3_train_data/2017019880012_kimsongyi/7.wav
204 ../augmentation3_train_data/2017019880012_kimsongyi/7.wav
Default SR : 8000, Audio Length(Default SR) : 174277, Audio Duration : 21.784625
Audio Duration : 21.784625, Resampling SR : 9180.78690819787, Result Audio Length : 200001
Output

225 ../augmentation3_train_data/2017019770017_hansohee/3.wav
Default SR : 8000, Audio Length(Default SR) : 264203, Audio Duration : 33.025375
Audio Duration : 33.025375, Resampling SR : 6055.949402542742, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019770017_hansohee/3.pkl

../augmentation3_train_data/2017019770017_hansohee/2.wav
226 ../augmentation3_train_data/2017019770017_hansohee/2.wav
Default SR : 8000, Audio Length(Default SR) : 255752, Audio Duration : 31.969
Audio Duration : 31.969, Resampling SR : 6256.060558666208, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770017_hansohee/2.pkl

../augmentation3_train_data/2017019770017_hansohee/10.wav
227 ../augmentation3_train_data/2017019770017_hansohee/10.wav
Default SR : 8000, Audio Length(Default SR) : 188095, Audio Duration : 23.511875
Audio Duration : 23.511875, Resampling SR : 8506.339881442887, Result Audio Length : 200000
Output File 

247 ../augmentation3_train_data/2017019740039_choiwooshik/10.wav
Default SR : 8000, Audio Length(Default SR) : 307399, Audio Duration : 38.424875
Audio Duration : 38.424875, Resampling SR : 5204.961629673487, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019740039_choiwooshik/10.pkl

../augmentation3_train_data/2017019740039_choiwooshik/1.wav
248 ../augmentation3_train_data/2017019740039_choiwooshik/1.wav
Default SR : 8000, Audio Length(Default SR) : 196800, Audio Duration : 24.6
Audio Duration : 24.6, Resampling SR : 8130.081300813008, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019740039_choiwooshik/1.pkl

../augmentation3_train_data/2017019740039_choiwooshik/9.wav
249 ../augmentation3_train_data/2017019740039_choiwooshik/9.wav
Default SR : 8000, Audio Length(Default SR) : 310507, Audio Duration : 38.813375
Audio Duration : 38.813375, Resampling SR : 5152.86289842097, Result Audio Length : 200

270 ../augmentation3_train_data/2017019770036_yuminji/5.wav
Default SR : 8000, Audio Length(Default SR) : 230240, Audio Duration : 28.78
Audio Duration : 28.78, Resampling SR : 6949.270326615705, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770036_yuminji/5.pkl

../augmentation3_train_data/2017019770036_yuminji/4.wav
271 ../augmentation3_train_data/2017019770036_yuminji/4.wav
Default SR : 8000, Audio Length(Default SR) : 231712, Audio Duration : 28.964
Audio Duration : 28.964, Resampling SR : 6905.123601712471, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019770036_yuminji/4.pkl

../augmentation3_train_data/2017019770036_yuminji/8.wav
272 ../augmentation3_train_data/2017019770036_yuminji/8.wav
Default SR : 8000, Audio Length(Default SR) : 307931, Audio Duration : 38.491375
Audio Duration : 38.491375, Resampling SR : 5195.969226872254, Result Audio Length : 200000
Output File Name: ../resample

293 ../augmentation3_train_data/2017019880031_ahnjiwoo/6.wav
Default SR : 8000, Audio Length(Default SR) : 190023, Audio Duration : 23.752875
Audio Duration : 23.752875, Resampling SR : 8420.033364382207, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880031_ahnjiwoo/6.pkl

../augmentation3_train_data/2017019880031_ahnjiwoo/7.wav
294 ../augmentation3_train_data/2017019880031_ahnjiwoo/7.wav
Default SR : 8000, Audio Length(Default SR) : 187520, Audio Duration : 23.44
Audio Duration : 23.44, Resampling SR : 8532.423208191125, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880031_ahnjiwoo/7.pkl

../augmentation3_train_data/2017019880031_ahnjiwoo/3.wav
295 ../augmentation3_train_data/2017019880031_ahnjiwoo/3.wav
Default SR : 8000, Audio Length(Default SR) : 232790, Audio Duration : 29.09875
Audio Duration : 29.09875, Resampling SR : 6873.147471970446, Result Audio Length : 200000
Output File Name: 

315 ../augmentation3_train_data/2017019770009_yuminji/3.wav
Default SR : 8000, Audio Length(Default SR) : 215552, Audio Duration : 26.944
Audio Duration : 26.944, Resampling SR : 7422.802850356295, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770009_yuminji/3.pkl

../augmentation3_train_data/2017019770009_yuminji/2.wav
316 ../augmentation3_train_data/2017019770009_yuminji/2.wav
Default SR : 8000, Audio Length(Default SR) : 227104, Audio Duration : 28.388
Audio Duration : 28.388, Resampling SR : 7045.230379033394, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770009_yuminji/2.pkl

../augmentation3_train_data/2017019770009_yuminji/10.wav
317 ../augmentation3_train_data/2017019770009_yuminji/10.wav
Default SR : 8000, Audio Length(Default SR) : 177108, Audio Duration : 22.1385
Audio Duration : 22.1385, Resampling SR : 9034.03572961131, Result Audio Length : 200000
Output File Name: ../resampled

338 ../augmentation3_train_data/2017019770016_parkjongae/1.wav
Default SR : 8000, Audio Length(Default SR) : 295542, Audio Duration : 36.94275
Audio Duration : 36.94275, Resampling SR : 5413.782135872398, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019770016_parkjongae/1.pkl

../augmentation3_train_data/2017019770016_parkjongae/9.wav
339 ../augmentation3_train_data/2017019770016_parkjongae/9.wav
Default SR : 8000, Audio Length(Default SR) : 194437, Audio Duration : 24.304625
Audio Duration : 24.304625, Resampling SR : 8228.88647736799, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019770016_parkjongae/9.pkl

../augmentation3_train_data/2017019770028_kimminyoung/5.wav
340 ../augmentation3_train_data/2017019770028_kimminyoung/5.wav
Default SR : 8000, Audio Length(Default SR) : 209381, Audio Duration : 26.172625
Audio Duration : 26.172625, Resampling SR : 7641.57206241254, Result Audio Length : 200

360 ../augmentation3_train_data/2017019740018_eundano/5.wav
Default SR : 8000, Audio Length(Default SR) : 179708, Audio Duration : 22.4635
Audio Duration : 22.4635, Resampling SR : 8903.332072027957, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019740018_eundano/5.pkl

../augmentation3_train_data/2017019740018_eundano/4.wav
361 ../augmentation3_train_data/2017019740018_eundano/4.wav
Default SR : 8000, Audio Length(Default SR) : 181700, Audio Duration : 22.7125
Audio Duration : 22.7125, Resampling SR : 8805.723720418273, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019740018_eundano/4.pkl

../augmentation3_train_data/2017019740018_eundano/8.wav
362 ../augmentation3_train_data/2017019740018_eundano/8.wav
Default SR : 8000, Audio Length(Default SR) : 298619, Audio Duration : 37.327375
Audio Duration : 37.327375, Resampling SR : 5357.997984053258, Result Audio Length : 200000
Output File Name: ../re

383 ../augmentation3_train_data/2017019880014_leedanbee/6.wav
Default SR : 8000, Audio Length(Default SR) : 183972, Audio Duration : 22.9965
Audio Duration : 22.9965, Resampling SR : 8696.975626725805, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880014_leedanbee/6.pkl

../augmentation3_train_data/2017019880014_leedanbee/7.wav
384 ../augmentation3_train_data/2017019880014_leedanbee/7.wav
Default SR : 8000, Audio Length(Default SR) : 208548, Audio Duration : 26.0685
Audio Duration : 26.0685, Resampling SR : 7672.094673648273, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880014_leedanbee/7.pkl

../augmentation3_train_data/2017019880014_leedanbee/3.wav
385 ../augmentation3_train_data/2017019880014_leedanbee/3.wav
Default SR : 8000, Audio Length(Default SR) : 269984, Audio Duration : 33.748
Audio Duration : 33.748, Resampling SR : 5926.277112717791, Result Audio Length : 200001
Output File Nam

405 ../augmentation3_train_data/2017019770027_kwonyulim/3.wav
Default SR : 8000, Audio Length(Default SR) : 230667, Audio Duration : 28.833375
Audio Duration : 28.833375, Resampling SR : 6936.406161262773, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770027_kwonyulim/3.pkl

../augmentation3_train_data/2017019770027_kwonyulim/2.wav
406 ../augmentation3_train_data/2017019770027_kwonyulim/2.wav
Default SR : 8000, Audio Length(Default SR) : 237675, Audio Duration : 29.709375
Audio Duration : 29.709375, Resampling SR : 6731.881771326391, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770027_kwonyulim/2.pkl

../augmentation3_train_data/2017019770027_kwonyulim/10.wav
407 ../augmentation3_train_data/2017019770027_kwonyulim/10.wav
Default SR : 8000, Audio Length(Default SR) : 174655, Audio Duration : 21.831875
Audio Duration : 21.831875, Resampling SR : 9160.917236838339, Result Audio Length : 200000

428 ../augmentation3_train_data/2017019770031_kwonnahui/1.wav
Default SR : 8000, Audio Length(Default SR) : 234016, Audio Duration : 29.252
Audio Duration : 29.252, Resampling SR : 6837.139340899767, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770031_kwonnahui/1.pkl

../augmentation3_train_data/2017019770031_kwonnahui/9.wav
429 ../augmentation3_train_data/2017019770031_kwonnahui/9.wav
Default SR : 8000, Audio Length(Default SR) : 310477, Audio Duration : 38.809625
Audio Duration : 38.809625, Resampling SR : 5153.3607964519115, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019770031_kwonnahui/9.pkl

../augmentation3_train_data/2017019740024_choijiwon/5.wav
430 ../augmentation3_train_data/2017019740024_choijiwon/5.wav
Default SR : 8000, Audio Length(Default SR) : 241536, Audio Duration : 30.192
Audio Duration : 30.192, Resampling SR : 6624.271330153683, Result Audio Length : 200000
Output File 

450 ../augmentation3_train_data/2017019880042_namyeji/5.wav
Default SR : 8000, Audio Length(Default SR) : 226454, Audio Duration : 28.30675
Audio Duration : 28.30675, Resampling SR : 7065.452586397238, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019880042_namyeji/5.pkl

../augmentation3_train_data/2017019880042_namyeji/4.wav
451 ../augmentation3_train_data/2017019880042_namyeji/4.wav
Default SR : 8000, Audio Length(Default SR) : 231392, Audio Duration : 28.924
Audio Duration : 28.924, Resampling SR : 6914.6729359701285, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880042_namyeji/4.pkl

../augmentation3_train_data/2017019880042_namyeji/8.wav
452 ../augmentation3_train_data/2017019880042_namyeji/8.wav
Default SR : 8000, Audio Length(Default SR) : 181218, Audio Duration : 22.65225
Audio Duration : 22.65225, Resampling SR : 8829.14500767032, Result Audio Length : 200000
Output File Name: ../resa

472 ../augmentation3_train_data/2017019880033_kimnoeun/8.wav
Default SR : 8000, Audio Length(Default SR) : 301227, Audio Duration : 37.653375
Audio Duration : 37.653375, Resampling SR : 5311.608853124057, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019880033_kimnoeun/8.pkl

../augmentation3_train_data/2017019880033_kimnoeun/6.wav
473 ../augmentation3_train_data/2017019880033_kimnoeun/6.wav
Default SR : 8000, Audio Length(Default SR) : 268907, Audio Duration : 33.613375
Audio Duration : 33.613375, Resampling SR : 5950.012457838584, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880033_kimnoeun/6.pkl

../augmentation3_train_data/2017019880033_kimnoeun/7.wav
474 ../augmentation3_train_data/2017019880033_kimnoeun/7.wav
Default SR : 8000, Audio Length(Default SR) : 291657, Audio Duration : 36.457125
Audio Duration : 36.457125, Resampling SR : 5485.896103985161, Result Audio Length : 200001
Output F

495 ../augmentation3_train_data/2017019740022_ahnhyojin/3.wav
Default SR : 8000, Audio Length(Default SR) : 238379, Audio Duration : 29.797375
Audio Duration : 29.797375, Resampling SR : 6712.000637640061, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740022_ahnhyojin/3.pkl

../augmentation3_train_data/2017019740022_ahnhyojin/2.wav
496 ../augmentation3_train_data/2017019740022_ahnhyojin/2.wav
Default SR : 8000, Audio Length(Default SR) : 232459, Audio Duration : 29.057375
Audio Duration : 29.057375, Resampling SR : 6882.934194847263, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740022_ahnhyojin/2.pkl

../augmentation3_train_data/2017019740022_ahnhyojin/10.wav
497 ../augmentation3_train_data/2017019740022_ahnhyojin/10.wav
Default SR : 8000, Audio Length(Default SR) : 192246, Audio Duration : 24.03075
Audio Duration : 24.03075, Resampling SR : 8322.669912507932, Result Audio Length : 200000
O

517 ../augmentation3_train_data/2017019770004_leebyeongjin/10.wav
Default SR : 8000, Audio Length(Default SR) : 234291, Audio Duration : 29.286375
Audio Duration : 29.286375, Resampling SR : 6829.114221203547, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019770004_leebyeongjin/10.pkl

../augmentation3_train_data/2017019770004_leebyeongjin/1.wav
518 ../augmentation3_train_data/2017019770004_leebyeongjin/1.wav
Default SR : 8000, Audio Length(Default SR) : 201414, Audio Duration : 25.17675
Audio Duration : 25.17675, Resampling SR : 7943.837071901656, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770004_leebyeongjin/1.pkl

../augmentation3_train_data/2017019770004_leebyeongjin/9.wav
519 ../augmentation3_train_data/2017019770004_leebyeongjin/9.wav
Default SR : 8000, Audio Length(Default SR) : 292527, Audio Duration : 36.565875
Audio Duration : 36.565875, Resampling SR : 5469.580585723711, Result Au

539 ../augmentation3_train_data/2017019740038_choihyeonseok/9.wav
Default SR : 8000, Audio Length(Default SR) : 292044, Audio Duration : 36.5055
Audio Duration : 36.5055, Resampling SR : 5478.626508334361, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740038_choihyeonseok/9.pkl

../augmentation3_train_data/2017019770011_kwonyeonwoo/5.wav
540 ../augmentation3_train_data/2017019770011_kwonyeonwoo/5.wav
Default SR : 8000, Audio Length(Default SR) : 181920, Audio Duration : 22.74
Audio Duration : 22.74, Resampling SR : 8795.074758135444, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770011_kwonyeonwoo/5.pkl

../augmentation3_train_data/2017019770011_kwonyeonwoo/4.wav
541 ../augmentation3_train_data/2017019770011_kwonyeonwoo/4.wav
Default SR : 8000, Audio Length(Default SR) : 189526, Audio Duration : 23.69075
Audio Duration : 23.69075, Resampling SR : 8442.113483110496, Result Audio Length : 2000

561 ../augmentation3_train_data/2017019770019_leewooju/4.wav
Default SR : 8000, Audio Length(Default SR) : 216799, Audio Duration : 27.099875
Audio Duration : 27.099875, Resampling SR : 7380.1078418258385, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770019_leewooju/4.pkl

../augmentation3_train_data/2017019770019_leewooju/8.wav
562 ../augmentation3_train_data/2017019770019_leewooju/8.wav
Default SR : 8000, Audio Length(Default SR) : 226027, Audio Duration : 28.253375
Audio Duration : 28.253375, Resampling SR : 7078.800320315715, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770019_leewooju/8.pkl

../augmentation3_train_data/2017019770019_leewooju/6.wav
563 ../augmentation3_train_data/2017019770019_leewooju/6.wav
Default SR : 8000, Audio Length(Default SR) : 199492, Audio Duration : 24.9365
Audio Duration : 24.9365, Resampling SR : 8020.371744230345, Result Audio Length : 200001
Output File

583 ../augmentation3_train_data/2017019770033_kwonsundo/6.wav
Default SR : 8000, Audio Length(Default SR) : 185060, Audio Duration : 23.1325
Audio Duration : 23.1325, Resampling SR : 8645.844590943478, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770033_kwonsundo/6.pkl

../augmentation3_train_data/2017019770033_kwonsundo/7.wav
584 ../augmentation3_train_data/2017019770033_kwonsundo/7.wav
Default SR : 8000, Audio Length(Default SR) : 202429, Audio Duration : 25.303625
Audio Duration : 25.303625, Resampling SR : 7904.005848964328, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770033_kwonsundo/7.pkl

../augmentation3_train_data/2017019770033_kwonsundo/3.wav
585 ../augmentation3_train_data/2017019770033_kwonsundo/3.wav
Default SR : 8000, Audio Length(Default SR) : 257355, Audio Duration : 32.169375
Audio Duration : 32.169375, Resampling SR : 6217.0931203978935, Result Audio Length : 200000
Outp

605 ../augmentation3_train_data/2017019880043_leegawon/3.wav
Default SR : 8000, Audio Length(Default SR) : 217483, Audio Duration : 27.185375
Audio Duration : 27.185375, Resampling SR : 7356.89686090407, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880043_leegawon/3.pkl

../augmentation3_train_data/2017019880043_leegawon/2.wav
606 ../augmentation3_train_data/2017019880043_leegawon/2.wav
Default SR : 8000, Audio Length(Default SR) : 209942, Audio Duration : 26.24275
Audio Duration : 26.24275, Resampling SR : 7621.152508788141, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880043_leegawon/2.pkl

../augmentation3_train_data/2017019880043_leegawon/10.wav
607 ../augmentation3_train_data/2017019880043_leegawon/10.wav
Default SR : 8000, Audio Length(Default SR) : 307429, Audio Duration : 38.428625
Audio Duration : 38.428625, Resampling SR : 5204.453711263414, Result Audio Length : 200001
Output Fi

628 ../augmentation3_train_data/2017019770035_leehyojin/1.wav
Default SR : 8000, Audio Length(Default SR) : 267968, Audio Duration : 33.496
Audio Duration : 33.496, Resampling SR : 5970.862192500596, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770035_leehyojin/1.pkl

../augmentation3_train_data/2017019770035_leehyojin/9.wav
629 ../augmentation3_train_data/2017019770035_leehyojin/9.wav
Default SR : 8000, Audio Length(Default SR) : 202191, Audio Duration : 25.273875
Audio Duration : 25.273875, Resampling SR : 7913.309692320628, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770035_leehyojin/9.pkl

../augmentation3_train_data/2017019880009_kimhongjoo/5.wav
630 ../augmentation3_train_data/2017019880009_kimhongjoo/5.wav
Default SR : 8000, Audio Length(Default SR) : 278176, Audio Duration : 34.772
Audio Duration : 34.772, Resampling SR : 5751.7542850569425, Result Audio Length : 200000
Output Fil

650 ../augmentation3_train_data/2017019770038_kanghyeyun/5.wav
Default SR : 8000, Audio Length(Default SR) : 217803, Audio Duration : 27.225375
Audio Duration : 27.225375, Resampling SR : 7346.087978586153, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770038_kanghyeyun/5.pkl

../augmentation3_train_data/2017019770038_kanghyeyun/4.wav
651 ../augmentation3_train_data/2017019770038_kanghyeyun/4.wav
Default SR : 8000, Audio Length(Default SR) : 212627, Audio Duration : 26.578375
Audio Duration : 26.578375, Resampling SR : 7524.914521674105, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770038_kanghyeyun/4.pkl

../augmentation3_train_data/2017019770038_kanghyeyun/8.wav
652 ../augmentation3_train_data/2017019770038_kanghyeyun/8.wav
Default SR : 8000, Audio Length(Default SR) : 175680, Audio Duration : 21.96
Audio Duration : 21.96, Resampling SR : 9107.468123861567, Result Audio Length : 200000
Ou

673 ../augmentation3_train_data/2017019740010_kwakmirae/6.wav
Default SR : 8000, Audio Length(Default SR) : 169428, Audio Duration : 21.1785
Audio Duration : 21.1785, Resampling SR : 9443.53943858158, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740010_kwakmirae/6.pkl

../augmentation3_train_data/2017019740010_kwakmirae/7.wav
674 ../augmentation3_train_data/2017019740010_kwakmirae/7.wav
Default SR : 8000, Audio Length(Default SR) : 170658, Audio Duration : 21.33225
Audio Duration : 21.33225, Resampling SR : 9375.476098395622, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740010_kwakmirae/7.pkl

../augmentation3_train_data/2017019740010_kwakmirae/3.wav
675 ../augmentation3_train_data/2017019740010_kwakmirae/3.wav
Default SR : 8000, Audio Length(Default SR) : 229440, Audio Duration : 28.68
Audio Duration : 28.68, Resampling SR : 6973.5006973500695, Result Audio Length : 200000
Output File Nam

695 ../augmentation3_train_data/2017019770037_jeonghojun/3.wav
Default SR : 8000, Audio Length(Default SR) : 224832, Audio Duration : 28.104
Audio Duration : 28.104, Resampling SR : 7116.424708226587, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770037_jeonghojun/3.pkl

../augmentation3_train_data/2017019770037_jeonghojun/2.wav
696 ../augmentation3_train_data/2017019770037_jeonghojun/2.wav
Default SR : 8000, Audio Length(Default SR) : 236640, Audio Duration : 29.58
Audio Duration : 29.58, Resampling SR : 6761.32521974307, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019770037_jeonghojun/2.pkl

../augmentation3_train_data/2017019770037_jeonghojun/10.wav
697 ../augmentation3_train_data/2017019770037_jeonghojun/10.wav
Default SR : 8000, Audio Length(Default SR) : 184320, Audio Duration : 23.04
Audio Duration : 23.04, Resampling SR : 8680.555555555557, Result Audio Length : 200001
Output File Nam

717 ../augmentation3_train_data/2017019880011_kimjihyun/10.wav
Default SR : 8000, Audio Length(Default SR) : 301777, Audio Duration : 37.722125
Audio Duration : 37.722125, Resampling SR : 5301.928245028614, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880011_kimjihyun/10.pkl

../augmentation3_train_data/2017019880011_kimjihyun/1.wav
718 ../augmentation3_train_data/2017019880011_kimjihyun/1.wav
Default SR : 8000, Audio Length(Default SR) : 210448, Audio Duration : 26.306
Audio Duration : 26.306, Resampling SR : 7602.828252109784, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880011_kimjihyun/1.pkl

../augmentation3_train_data/2017019880011_kimjihyun/9.wav
719 ../augmentation3_train_data/2017019880011_kimjihyun/9.wav
Default SR : 8000, Audio Length(Default SR) : 298841, Audio Duration : 37.355125
Audio Duration : 37.355125, Resampling SR : 5354.017688335938, Result Audio Length : 200001
Outpu

740 ../augmentation3_train_data/2017019880018_wonjoonho/5.wav
Default SR : 8000, Audio Length(Default SR) : 222080, Audio Duration : 27.76
Audio Duration : 27.76, Resampling SR : 7204.610951008645, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880018_wonjoonho/5.pkl

../augmentation3_train_data/2017019880018_wonjoonho/4.wav
741 ../augmentation3_train_data/2017019880018_wonjoonho/4.wav
Default SR : 8000, Audio Length(Default SR) : 205173, Audio Duration : 25.646625
Audio Duration : 25.646625, Resampling SR : 7798.297046882387, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880018_wonjoonho/4.pkl

../augmentation3_train_data/2017019880018_wonjoonho/8.wav
742 ../augmentation3_train_data/2017019880018_wonjoonho/8.wav
Default SR : 8000, Audio Length(Default SR) : 172492, Audio Duration : 21.5615
Audio Duration : 21.5615, Resampling SR : 9275.792500521764, Result Audio Length : 200000
Output File N

762 ../augmentation3_train_data/2017019740031_jominseok/8.wav
Default SR : 8000, Audio Length(Default SR) : 288214, Audio Duration : 36.02675
Audio Duration : 36.02675, Resampling SR : 5551.430534255796, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019740031_jominseok/8.pkl

../augmentation3_train_data/2017019740031_jominseok/6.wav
763 ../augmentation3_train_data/2017019740031_jominseok/6.wav
Default SR : 8000, Audio Length(Default SR) : 184895, Audio Duration : 23.111875
Audio Duration : 23.111875, Resampling SR : 8653.560128721707, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019740031_jominseok/6.pkl

../augmentation3_train_data/2017019740031_jominseok/7.wav
764 ../augmentation3_train_data/2017019740031_jominseok/7.wav
Default SR : 8000, Audio Length(Default SR) : 292191, Audio Duration : 36.523875
Audio Duration : 36.523875, Resampling SR : 5475.870235565093, Result Audio Length : 200001
Out

784 ../augmentation3_train_data/2017019740035_kimjaeyoung/7.wav
Default SR : 8000, Audio Length(Default SR) : 266423, Audio Duration : 33.302875
Audio Duration : 33.302875, Resampling SR : 6005.4875142161145, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740035_kimjaeyoung/7.pkl

../augmentation3_train_data/2017019740035_kimjaeyoung/3.wav
785 ../augmentation3_train_data/2017019740035_kimjaeyoung/3.wav
Default SR : 8000, Audio Length(Default SR) : 214592, Audio Duration : 26.824
Audio Duration : 26.824, Resampling SR : 7456.009543692216, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740035_kimjaeyoung/3.pkl

../augmentation3_train_data/2017019740035_kimjaeyoung/2.wav
786 ../augmentation3_train_data/2017019740035_kimjaeyoung/2.wav
Default SR : 8000, Audio Length(Default SR) : 186496, Audio Duration : 23.312
Audio Duration : 23.312, Resampling SR : 8579.27247769389, Result Audio Length : 200000

806 ../augmentation3_train_data/2017019880013_kotaewan/2.wav
Default SR : 8000, Audio Length(Default SR) : 194682, Audio Duration : 24.33525
Audio Duration : 24.33525, Resampling SR : 8218.530732168358, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019880013_kotaewan/2.pkl

../augmentation3_train_data/2017019880013_kotaewan/10.wav
807 ../augmentation3_train_data/2017019880013_kotaewan/10.wav
Default SR : 8000, Audio Length(Default SR) : 245201, Audio Duration : 30.650125
Audio Duration : 30.650125, Resampling SR : 6525.258869254204, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880013_kotaewan/10.pkl

../augmentation3_train_data/2017019880013_kotaewan/1.wav
808 ../augmentation3_train_data/2017019880013_kotaewan/1.wav
Default SR : 8000, Audio Length(Default SR) : 207090, Audio Duration : 25.88625
Audio Duration : 25.88625, Resampling SR : 7726.109421024675, Result Audio Length : 200000
Output Fi

829 ../augmentation3_train_data/2017019740020_kimseongje/9.wav
Default SR : 8000, Audio Length(Default SR) : 298697, Audio Duration : 37.337125
Audio Duration : 37.337125, Resampling SR : 5356.598827574431, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740020_kimseongje/9.pkl

../augmentation3_train_data/2017019880010_kimhyorin/5.wav
830 ../augmentation3_train_data/2017019880010_kimhyorin/5.wav
Default SR : 8000, Audio Length(Default SR) : 245272, Audio Duration : 30.659
Audio Duration : 30.659, Resampling SR : 6523.369972928015, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019880010_kimhyorin/5.pkl

../augmentation3_train_data/2017019880010_kimhyorin/4.wav
831 ../augmentation3_train_data/2017019880010_kimhyorin/4.wav
Default SR : 8000, Audio Length(Default SR) : 256383, Audio Duration : 32.047875
Audio Duration : 32.047875, Resampling SR : 6240.663382517562, Result Audio Length : 200001
Outpu

851 ../augmentation3_train_data/2017019770006_jennie/4.wav
Default SR : 8000, Audio Length(Default SR) : 197888, Audio Duration : 24.736
Audio Duration : 24.736, Resampling SR : 8085.3816300129365, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770006_jennie/4.pkl

../augmentation3_train_data/2017019770006_jennie/8.wav
852 ../augmentation3_train_data/2017019770006_jennie/8.wav
Default SR : 8000, Audio Length(Default SR) : 285257, Audio Duration : 35.657125
Audio Duration : 35.657125, Resampling SR : 5608.977167957316, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770006_jennie/8.pkl

../augmentation3_train_data/2017019770006_jennie/6.wav
853 ../augmentation3_train_data/2017019770006_jennie/6.wav
Default SR : 8000, Audio Length(Default SR) : 283704, Audio Duration : 35.463
Audio Duration : 35.463, Resampling SR : 5639.680794067056, Result Audio Length : 200000
Output File Name: ../resampled_au

873 ../augmentation3_train_data/2017019880029_kimminji/6.wav
Default SR : 8000, Audio Length(Default SR) : 182154, Audio Duration : 22.76925
Audio Duration : 22.76925, Resampling SR : 8783.776365053745, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880029_kimminji/6.pkl

../augmentation3_train_data/2017019880029_kimminji/7.wav
874 ../augmentation3_train_data/2017019880029_kimminji/7.wav
Default SR : 8000, Audio Length(Default SR) : 176862, Audio Duration : 22.10775
Audio Duration : 22.10775, Resampling SR : 9046.601304972239, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880029_kimminji/7.pkl

../augmentation3_train_data/2017019880029_kimminji/3.wav
875 ../augmentation3_train_data/2017019880029_kimminji/3.wav
Default SR : 8000, Audio Length(Default SR) : 220485, Audio Duration : 27.560625
Audio Duration : 27.560625, Resampling SR : 7256.729482731251, Result Audio Length : 200000
Output File 

895 ../augmentation3_train_data/2017019740013_kwakyiheon/3.wav
Default SR : 8000, Audio Length(Default SR) : 242390, Audio Duration : 30.29875
Audio Duration : 30.29875, Resampling SR : 6600.932381698915, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740013_kwakyiheon/3.pkl

../augmentation3_train_data/2017019740013_kwakyiheon/2.wav
896 ../augmentation3_train_data/2017019740013_kwakyiheon/2.wav
Default SR : 8000, Audio Length(Default SR) : 234379, Audio Duration : 29.297375
Audio Duration : 29.297375, Resampling SR : 6826.5501602106, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019740013_kwakyiheon/2.pkl

../augmentation3_train_data/2017019740013_kwakyiheon/10.wav
897 ../augmentation3_train_data/2017019740013_kwakyiheon/10.wav
Default SR : 8000, Audio Length(Default SR) : 181711, Audio Duration : 22.713875
Audio Duration : 22.713875, Resampling SR : 8805.190659894008, Result Audio Length : 200

917 ../augmentation3_train_data/2017019740036_leeyongseok/10.wav
Default SR : 8000, Audio Length(Default SR) : 273326, Audio Duration : 34.16575
Audio Duration : 34.16575, Resampling SR : 5853.81559017437, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740036_leeyongseok/10.pkl

../augmentation3_train_data/2017019740036_leeyongseok/1.wav
918 ../augmentation3_train_data/2017019740036_leeyongseok/1.wav
Default SR : 8000, Audio Length(Default SR) : 226656, Audio Duration : 28.332
Audio Duration : 28.332, Resampling SR : 7059.155724975293, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740036_leeyongseok/1.pkl

../augmentation3_train_data/2017019740036_leeyongseok/9.wav
919 ../augmentation3_train_data/2017019740036_leeyongseok/9.wav
Default SR : 8000, Audio Length(Default SR) : 286766, Audio Duration : 35.84575
Audio Duration : 35.84575, Resampling SR : 5579.462000376613, Result Audio Length : 200

939 ../augmentation3_train_data/2017019740017_kwakmihyang/9.wav
Default SR : 8000, Audio Length(Default SR) : 291541, Audio Duration : 36.442625
Audio Duration : 36.442625, Resampling SR : 5488.078863693271, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740017_kwakmihyang/9.pkl

../augmentation3_train_data/2017019740034_kimdongwook/5.wav
940 ../augmentation3_train_data/2017019740034_kimdongwook/5.wav
Default SR : 8000, Audio Length(Default SR) : 200619, Audio Duration : 25.077375
Audio Duration : 25.077375, Resampling SR : 7975.316395755138, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740034_kimdongwook/5.pkl

../augmentation3_train_data/2017019740034_kimdongwook/4.wav
941 ../augmentation3_train_data/2017019740034_kimdongwook/4.wav
Default SR : 8000, Audio Length(Default SR) : 190080, Audio Duration : 23.76
Audio Duration : 23.76, Resampling SR : 8417.508417508418, Result Audio Length : 20

961 ../augmentation3_train_data/2017019880030_hanseungoh/4.wav
Default SR : 8000, Audio Length(Default SR) : 200192, Audio Duration : 25.024
Audio Duration : 25.024, Resampling SR : 7992.3273657289, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880030_hanseungoh/4.pkl

../augmentation3_train_data/2017019880030_hanseungoh/8.wav
962 ../augmentation3_train_data/2017019880030_hanseungoh/8.wav
Default SR : 8000, Audio Length(Default SR) : 294004, Audio Duration : 36.7505
Audio Duration : 36.7505, Resampling SR : 5442.102828532945, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880030_hanseungoh/8.pkl

../augmentation3_train_data/2017019880030_hanseungoh/6.wav
963 ../augmentation3_train_data/2017019880030_hanseungoh/6.wav
Default SR : 8000, Audio Length(Default SR) : 295147, Audio Duration : 36.893375
Audio Duration : 36.893375, Resampling SR : 5421.027487997507, Result Audio Length : 200001
Output

983 ../augmentation3_train_data/2017019770010_parksojin/6.wav
Default SR : 8000, Audio Length(Default SR) : 289866, Audio Duration : 36.23325
Audio Duration : 36.23325, Resampling SR : 5519.7919038452255, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019770010_parksojin/6.pkl

../augmentation3_train_data/2017019770010_parksojin/7.wav
984 ../augmentation3_train_data/2017019770010_parksojin/7.wav
Default SR : 8000, Audio Length(Default SR) : 270521, Audio Duration : 33.815125
Audio Duration : 33.815125, Resampling SR : 5914.513106191386, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770010_parksojin/7.pkl

../augmentation3_train_data/2017019770010_parksojin/3.wav
985 ../augmentation3_train_data/2017019770010_parksojin/3.wav
Default SR : 8000, Audio Length(Default SR) : 185660, Audio Duration : 23.2075
Audio Duration : 23.2075, Resampling SR : 8617.90369492621, Result Audio Length : 200000
Output 

1005 ../augmentation3_train_data/2017019770039_ohjiwon/3.wav
Default SR : 8000, Audio Length(Default SR) : 193600, Audio Duration : 24.2
Audio Duration : 24.2, Resampling SR : 8264.462809917355, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770039_ohjiwon/3.pkl

../augmentation3_train_data/2017019770039_ohjiwon/2.wav
1006 ../augmentation3_train_data/2017019770039_ohjiwon/2.wav
Default SR : 8000, Audio Length(Default SR) : 209023, Audio Duration : 26.127875
Audio Duration : 26.127875, Resampling SR : 7654.660013491339, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770039_ohjiwon/2.pkl

../augmentation3_train_data/2017019770039_ohjiwon/10.wav
1007 ../augmentation3_train_data/2017019770039_ohjiwon/10.wav
Default SR : 8000, Audio Length(Default SR) : 267200, Audio Duration : 33.4
Audio Duration : 33.4, Resampling SR : 5988.023952095808, Result Audio Length : 200000
Output File Name: ../resampled

1028 ../augmentation3_train_data/2017019770030_leejeongju/1.wav
Default SR : 8000, Audio Length(Default SR) : 215807, Audio Duration : 26.975875
Audio Duration : 26.975875, Resampling SR : 7414.031982280464, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770030_leejeongju/1.pkl

../augmentation3_train_data/2017019770030_leejeongju/9.wav
1029 ../augmentation3_train_data/2017019770030_leejeongju/9.wav
Default SR : 8000, Audio Length(Default SR) : 307583, Audio Duration : 38.447875
Audio Duration : 38.447875, Resampling SR : 5201.847956486541, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770030_leejeongju/9.pkl

../augmentation3_train_data/2017019740002_kimjiho/5.wav
1030 ../augmentation3_train_data/2017019740002_kimjiho/5.wav
Default SR : 8000, Audio Length(Default SR) : 196907, Audio Duration : 24.613375
Audio Duration : 24.613375, Resampling SR : 8125.6633842372285, Result Audio Length : 200

1050 ../augmentation3_train_data/2017019740019_limjinju/5.wav
Default SR : 8000, Audio Length(Default SR) : 186080, Audio Duration : 23.26
Audio Duration : 23.26, Resampling SR : 8598.452278589853, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740019_limjinju/5.pkl

../augmentation3_train_data/2017019740019_limjinju/4.wav
1051 ../augmentation3_train_data/2017019740019_limjinju/4.wav
Default SR : 8000, Audio Length(Default SR) : 184675, Audio Duration : 23.084375
Audio Duration : 23.084375, Resampling SR : 8663.868958981995, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740019_limjinju/4.pkl

../augmentation3_train_data/2017019740019_limjinju/8.wav
1052 ../augmentation3_train_data/2017019740019_limjinju/8.wav
Default SR : 8000, Audio Length(Default SR) : 292429, Audio Duration : 36.553625
Audio Duration : 36.553625, Resampling SR : 5471.413573893151, Result Audio Length : 200001
Output File N

1073 ../augmentation3_train_data/2017019770023_kimsoyung/6.wav
Default SR : 8000, Audio Length(Default SR) : 195200, Audio Duration : 24.4
Audio Duration : 24.4, Resampling SR : 8196.72131147541, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019770023_kimsoyung/6.pkl

../augmentation3_train_data/2017019770023_kimsoyung/7.wav
1074 ../augmentation3_train_data/2017019770023_kimsoyung/7.wav
Default SR : 8000, Audio Length(Default SR) : 188760, Audio Duration : 23.595
Audio Duration : 23.595, Resampling SR : 8476.37211273575, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019770023_kimsoyung/7.pkl

../augmentation3_train_data/2017019770023_kimsoyung/3.wav
1075 ../augmentation3_train_data/2017019770023_kimsoyung/3.wav
Default SR : 8000, Audio Length(Default SR) : 232160, Audio Duration : 29.02
Audio Duration : 29.02, Resampling SR : 6891.7987594762235, Result Audio Length : 200000
Output File Name: ../re

1095 ../augmentation3_train_data/2017019880020_yoonhyeeun/3.wav
Default SR : 8000, Audio Length(Default SR) : 198720, Audio Duration : 24.84
Audio Duration : 24.84, Resampling SR : 8051.529790660225, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880020_yoonhyeeun/3.pkl

../augmentation3_train_data/2017019880020_yoonhyeeun/2.wav
1096 ../augmentation3_train_data/2017019880020_yoonhyeeun/2.wav
Default SR : 8000, Audio Length(Default SR) : 207328, Audio Duration : 25.916
Audio Duration : 25.916, Resampling SR : 7717.240314863405, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880020_yoonhyeeun/2.pkl

../augmentation3_train_data/2017019880020_yoonhyeeun/10.wav
1097 ../augmentation3_train_data/2017019880020_yoonhyeeun/10.wav
Default SR : 8000, Audio Length(Default SR) : 298011, Audio Duration : 37.251375
Audio Duration : 37.251375, Resampling SR : 5368.92933482321, Result Audio Length : 200000
Outp

1117 ../augmentation3_train_data/2017019880028_kimsunghan/10.wav
Default SR : 8000, Audio Length(Default SR) : 293486, Audio Duration : 36.68575
Audio Duration : 36.68575, Resampling SR : 5451.708088290413, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019880028_kimsunghan/10.pkl

../augmentation3_train_data/2017019880028_kimsunghan/1.wav
1118 ../augmentation3_train_data/2017019880028_kimsunghan/1.wav
Default SR : 8000, Audio Length(Default SR) : 230400, Audio Duration : 28.8
Audio Duration : 28.8, Resampling SR : 6944.444444444444, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880028_kimsunghan/1.pkl

../augmentation3_train_data/2017019880028_kimsunghan/9.wav
1119 ../augmentation3_train_data/2017019880028_kimsunghan/9.wav
Default SR : 8000, Audio Length(Default SR) : 297143, Audio Duration : 37.142875
Audio Duration : 37.142875, Resampling SR : 5384.612795859233, Result Audio Length : 200001
O

1139 ../augmentation3_train_data/2017019880032_seoyeonjoo/9.wav
Default SR : 8000, Audio Length(Default SR) : 298606, Audio Duration : 37.32575
Audio Duration : 37.32575, Resampling SR : 5358.23124786508, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019880032_seoyeonjoo/9.pkl

../augmentation3_train_data/2017019880027_leesol/5.wav
1140 ../augmentation3_train_data/2017019880027_leesol/5.wav
Default SR : 8000, Audio Length(Default SR) : 203446, Audio Duration : 25.43075
Audio Duration : 25.43075, Resampling SR : 7864.49475536506, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880027_leesol/5.pkl

../augmentation3_train_data/2017019880027_leesol/4.wav
1141 ../augmentation3_train_data/2017019880027_leesol/4.wav
Default SR : 8000, Audio Length(Default SR) : 198472, Audio Duration : 24.809
Audio Duration : 24.809, Resampling SR : 8061.590551815873, Result Audio Length : 200000
Output File Name: ../re

1161 ../augmentation3_train_data/2017019880006_choisuyeon/4.wav
Default SR : 8000, Audio Length(Default SR) : 254390, Audio Duration : 31.79875
Audio Duration : 31.79875, Resampling SR : 6289.555407052164, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880006_choisuyeon/4.pkl

../augmentation3_train_data/2017019880006_choisuyeon/8.wav
1162 ../augmentation3_train_data/2017019880006_choisuyeon/8.wav
Default SR : 8000, Audio Length(Default SR) : 182015, Audio Duration : 22.751875
Audio Duration : 22.751875, Resampling SR : 8790.484300744445, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019880006_choisuyeon/8.pkl

../augmentation3_train_data/2017019880006_choisuyeon/6.wav
1163 ../augmentation3_train_data/2017019880006_choisuyeon/6.wav
Default SR : 8000, Audio Length(Default SR) : 174245, Audio Duration : 21.780625
Audio Duration : 21.780625, Resampling SR : 9182.472954747625, Result Audio Length : 

1183 ../augmentation3_train_data/2017019740014_leesumin/6.wav
Default SR : 8000, Audio Length(Default SR) : 297860, Audio Duration : 37.2325
Audio Duration : 37.2325, Resampling SR : 5371.651111260323, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740014_leesumin/6.pkl

../augmentation3_train_data/2017019740014_leesumin/7.wav
1184 ../augmentation3_train_data/2017019740014_leesumin/7.wav
Default SR : 8000, Audio Length(Default SR) : 176132, Audio Duration : 22.0165
Audio Duration : 22.0165, Resampling SR : 9084.09601889492, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740014_leesumin/7.pkl

../augmentation3_train_data/2017019740014_leesumin/3.wav
1185 ../augmentation3_train_data/2017019740014_leesumin/3.wav
Default SR : 8000, Audio Length(Default SR) : 239875, Audio Duration : 29.984375
Audio Duration : 29.984375, Resampling SR : 6670.140698280355, Result Audio Length : 200000
Output File Na

1205 ../augmentation3_train_data/2017019740032_choiyeongmi/3.wav
Default SR : 8000, Audio Length(Default SR) : 216982, Audio Duration : 27.12275
Audio Duration : 27.12275, Resampling SR : 7373.8835479440695, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019740032_choiyeongmi/3.pkl

../augmentation3_train_data/2017019740032_choiyeongmi/2.wav
1206 ../augmentation3_train_data/2017019740032_choiyeongmi/2.wav
Default SR : 8000, Audio Length(Default SR) : 219030, Audio Duration : 27.37875
Audio Duration : 27.37875, Resampling SR : 7304.935396977583, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740032_choiyeongmi/2.pkl

../augmentation3_train_data/2017019740032_choiyeongmi/10.wav
1207 ../augmentation3_train_data/2017019740032_choiyeongmi/10.wav
Default SR : 8000, Audio Length(Default SR) : 181325, Audio Duration : 22.665625
Audio Duration : 22.665625, Resampling SR : 8823.93492347994, Result Audio Le

1227 ../augmentation3_train_data/2017019740011_kimhyeryeong/10.wav
Default SR : 8000, Audio Length(Default SR) : 173243, Audio Duration : 21.655375
Audio Duration : 21.655375, Resampling SR : 9235.582390053278, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740011_kimhyeryeong/10.pkl

../augmentation3_train_data/2017019740011_kimhyeryeong/1.wav
1228 ../augmentation3_train_data/2017019740011_kimhyeryeong/1.wav
Default SR : 8000, Audio Length(Default SR) : 251584, Audio Duration : 31.448
Audio Duration : 31.448, Resampling SR : 6359.70490969219, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019740011_kimhyeryeong/1.pkl

../augmentation3_train_data/2017019740011_kimhyeryeong/9.wav
1229 ../augmentation3_train_data/2017019740011_kimhyeryeong/9.wav
Default SR : 8000, Audio Length(Default SR) : 176394, Audio Duration : 22.04925
Audio Duration : 22.04925, Resampling SR : 9070.603308502556, Result Audio 

1250 ../augmentation3_train_data/2017019880034_leesujung/5.wav
Default SR : 8000, Audio Length(Default SR) : 235040, Audio Duration : 29.38
Audio Duration : 29.38, Resampling SR : 6807.351940095303, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880034_leesujung/5.pkl

../augmentation3_train_data/2017019880034_leesujung/4.wav
1251 ../augmentation3_train_data/2017019880034_leesujung/4.wav
Default SR : 8000, Audio Length(Default SR) : 241448, Audio Duration : 30.181
Audio Duration : 30.181, Resampling SR : 6626.685663165567, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880034_leesujung/4.pkl

../augmentation3_train_data/2017019880034_leesujung/8.wav
1252 ../augmentation3_train_data/2017019880034_leesujung/8.wav
Default SR : 8000, Audio Length(Default SR) : 185231, Audio Duration : 23.153875
Audio Duration : 23.153875, Resampling SR : 8637.862992695607, Result Audio Length : 200000
Output File 

1273 ../augmentation3_train_data/2017019880003_kimminji/6.wav
Default SR : 8000, Audio Length(Default SR) : 171231, Audio Duration : 21.403875
Audio Duration : 21.403875, Resampling SR : 9344.102411362428, Result Audio Length : 200000
Output File Name: ../resampled_augmentation3_1_train_data/2017019880003_kimminji/6.pkl

../augmentation3_train_data/2017019880003_kimminji/7.wav
1274 ../augmentation3_train_data/2017019880003_kimminji/7.wav
Default SR : 8000, Audio Length(Default SR) : 169801, Audio Duration : 21.225125
Audio Duration : 21.225125, Resampling SR : 9422.794918757842, Result Audio Length : 200001
Output File Name: ../resampled_augmentation3_1_train_data/2017019880003_kimminji/7.pkl

../augmentation3_train_data/2017019880003_kimminji/3.wav
1275 ../augmentation3_train_data/2017019880003_kimminji/3.wav
Default SR : 8000, Audio Length(Default SR) : 234658, Audio Duration : 29.33225
Audio Duration : 29.33225, Resampling SR : 6818.433635333124, Result Audio Length : 200001
Output 

In [8]:
order = 0
list_dir.sort()
class_ids = {list_dir[i]: i for i in range(len(list_dir))}

def extract_class_id(wav_filename):
    return class_ids.get(wav_filename.split('/')[2])

def read_audio_from_filename(filename, target_sr):
    audio, _ = librosa.load(filename, sr=TARGET_SR, mono=True)
    audio = audio.reshape(-1, 1)
    return audio

def get_audio_buf(filename):
    audio_buf = None
    sample_rate = None
    
    with open(filename, 'rb') as f:
        audio_element = pickle.load(f)
        audio_buf = audio_element['resampled_audio']
        sample_rate = audio_element['resample_sr']
    
    return audio_buf, sample_rate

def convert_data():
    
    for i, wav_filename in enumerate(iglob(os.path.join(DATA_RESAMPLED_AUDIO_DIR, '**/**.pkl'), recursive=True)):
        class_id = extract_class_id(wav_filename)
#         audio_buf = read_audio_from_filename(wav_filename, target_sr=TARGET_SR)
        audio_buf,_ = get_audio_buf(wav_filename)
        print(type(audio_buf))
        
        # normalize mean 0, variance 1
        audio_buf = (audio_buf - np.mean(audio_buf)) / np.std(audio_buf)
        original_length = len(audio_buf)
        print(i, wav_filename, original_length, np.round(np.mean(audio_buf), 4), np.std(audio_buf))
        
        voice_seg = []
        
        if original_length >= AUDIO_LENGTH + PAD_SIZE:
            audio_seg = audio_buf[PAD_SIZE : AUDIO_LENGTH + PAD_SIZE]
            voice_seg.append(audio_seg)
            print("Audio Segment Length : {}".format(len(audio_seg)))
        else:
            print('Drop Audio Segment, Audio length={}'.format(len(audio_buf)))
            
        output_folder = OUTPUT_DIR_RESAMPLED_TRAIN
        if wav_filename[-5:] == '5.pkl' or wav_filename[-5:] == '6.pkl':
            output_folder = OUTPUT_DIR_RESAMPLED_TEST
            
        print(str(wav_filename[24:-4]).replace('/', '_') )
        output_filename = os.path.join(output_folder ,wav_filename.split('/')[2]+"_"+str(wav_filename.replace('.','/').split('/')[5]) + '.pkl')
#         output_filename = os.path.join(output_folder, str(wav_filename[24:-4]).replace('/', '_') + '.pkl')

        out_segs = []
        for i_seg, audio_seg in enumerate(voice_seg) :
            out = {'class_id': class_id,
               'audio': audio_seg,
               'sr': TARGET_SR}
            
            with open(output_filename, 'wb') as w:
                pickle.dump(out, w)

            print("Output File Name : "+output_filename)
        
        print("-----------------")
      

### Convert Data

In [9]:
convert_data()

<class 'numpy.ndarray'>
0 ../resampled_augmentation3_1_train_data/2017019770040_kimdajeong/8.pkl 200000 -0.0 1.0000001
Audio Segment Length : 200000
n3_1_train_data_2017019770040_kimdajeong_8
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019770040_kimdajeong_8.pkl
-----------------
<class 'numpy.ndarray'>
1 ../resampled_augmentation3_1_train_data/2017019770040_kimdajeong/5.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019770040_kimdajeong_5
Output File Name : ../output_una_re_augmentation3_1/resampled_test/2017019770040_kimdajeong_5.pkl
-----------------
<class 'numpy.ndarray'>
2 ../resampled_augmentation3_1_train_data/2017019770040_kimdajeong/7.pkl 200000 -0.0 0.99999994
Audio Segment Length : 200000
n3_1_train_data_2017019770040_kimdajeong_7
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019770040_kimdajeong_7.pkl
-----------------
<class 'numpy.ndarray'>
3 ../resampled_augmentation3_1_train_data/2017019770040

Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019880002_kimkihyeon_10.pkl
-----------------
<class 'numpy.ndarray'>
97 ../resampled_augmentation3_1_train_data/2017019880002_kimkihyeon/9.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019880002_kimkihyeon_9
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019880002_kimkihyeon_9.pkl
-----------------
<class 'numpy.ndarray'>
98 ../resampled_augmentation3_1_train_data/2017019880002_kimkihyeon/3.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019880002_kimkihyeon_3
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019880002_kimkihyeon_3.pkl
-----------------
<class 'numpy.ndarray'>
99 ../resampled_augmentation3_1_train_data/2017019880002_kimkihyeon/2.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019880002_kimkihyeon_2
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019880002_kimki

Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740006_kangsubin_1.pkl
-----------------
<class 'numpy.ndarray'>
214 ../resampled_augmentation3_1_train_data/2017019740006_kangsubin/4.pkl 200000 0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019740006_kangsubin_4
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740006_kangsubin_4.pkl
-----------------
<class 'numpy.ndarray'>
215 ../resampled_augmentation3_1_train_data/2017019740006_kangsubin/6.pkl 200000 0.0 0.9999999
Audio Segment Length : 200000
n3_1_train_data_2017019740006_kangsubin_6
Output File Name : ../output_una_re_augmentation3_1/resampled_test/2017019740006_kangsubin_6.pkl
-----------------
<class 'numpy.ndarray'>
216 ../resampled_augmentation3_1_train_data/2017019740006_kangsubin/10.pkl 200000 0.0 0.9999999
Audio Segment Length : 200000
n3_1_train_data_2017019740006_kangsubin_10
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740006_ka

334 ../resampled_augmentation3_1_train_data/2017019770016_parkjongae/4.pkl 200000 0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019770016_parkjongae_4
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019770016_parkjongae_4.pkl
-----------------
<class 'numpy.ndarray'>
335 ../resampled_augmentation3_1_train_data/2017019770016_parkjongae/6.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019770016_parkjongae_6
Output File Name : ../output_una_re_augmentation3_1/resampled_test/2017019770016_parkjongae_6.pkl
-----------------
<class 'numpy.ndarray'>
336 ../resampled_augmentation3_1_train_data/2017019770016_parkjongae/10.pkl 200000 0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019770016_parkjongae_10
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019770016_parkjongae_10.pkl
-----------------
<class 'numpy.ndarray'>
337 ../resampled_augmentation3_1_train_data/2017019770016_parkjongae/9.pkl 200000 0.0

Output File Name : ../output_una_re_augmentation3_1/resampled_test/2017019880042_namyeji_6.pkl
-----------------
<class 'numpy.ndarray'>
456 ../resampled_augmentation3_1_train_data/2017019880042_namyeji/10.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019880042_namyeji_10
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019880042_namyeji_10.pkl
-----------------
<class 'numpy.ndarray'>
457 ../resampled_augmentation3_1_train_data/2017019880042_namyeji/9.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019880042_namyeji_9
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019880042_namyeji_9.pkl
-----------------
<class 'numpy.ndarray'>
458 ../resampled_augmentation3_1_train_data/2017019880042_namyeji/3.pkl 200000 0.0 0.99999994
Audio Segment Length : 200000
n3_1_train_data_2017019880042_namyeji_3
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019880042_namyeji_3.pkl
--------

Output File Name : ../output_una_re_augmentation3_1/resampled_test/2017019880035_songhyunjin_6.pkl
-----------------
<class 'numpy.ndarray'>
576 ../resampled_augmentation3_1_train_data/2017019880035_songhyunjin/10.pkl 200000 -0.0 1.0000001
Audio Segment Length : 200000
n3_1_train_data_2017019880035_songhyunjin_10
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019880035_songhyunjin_10.pkl
-----------------
<class 'numpy.ndarray'>
577 ../resampled_augmentation3_1_train_data/2017019880035_songhyunjin/9.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019880035_songhyunjin_9
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019880035_songhyunjin_9.pkl
-----------------
<class 'numpy.ndarray'>
578 ../resampled_augmentation3_1_train_data/2017019880035_songhyunjin/3.pkl 200000 0.0 0.99999994
Audio Segment Length : 200000
n3_1_train_data_2017019880035_songhyunjin_3
Output File Name : ../output_una_re_augmentation3_1/resampled_

696 ../resampled_augmentation3_1_train_data/2017019770037_jeonghojun/10.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019770037_jeonghojun_10
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019770037_jeonghojun_10.pkl
-----------------
<class 'numpy.ndarray'>
697 ../resampled_augmentation3_1_train_data/2017019770037_jeonghojun/9.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019770037_jeonghojun_9
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019770037_jeonghojun_9.pkl
-----------------
<class 'numpy.ndarray'>
698 ../resampled_augmentation3_1_train_data/2017019770037_jeonghojun/3.pkl 200000 -0.0 0.9999999
Audio Segment Length : 200000
n3_1_train_data_2017019770037_jeonghojun_3
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019770037_jeonghojun_3.pkl
-----------------
<class 'numpy.ndarray'>
699 ../resampled_augmentation3_1_train_data/2017019770037_jeonghojun/2.pkl 2

<class 'numpy.ndarray'>
817 ../resampled_augmentation3_1_train_data/2017019740043_kimtaeri/9.pkl 200000 0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019740043_kimtaeri_9
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740043_kimtaeri_9.pkl
-----------------
<class 'numpy.ndarray'>
818 ../resampled_augmentation3_1_train_data/2017019740043_kimtaeri/3.pkl 200000 -0.0 0.99999994
Audio Segment Length : 200000
n3_1_train_data_2017019740043_kimtaeri_3
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740043_kimtaeri_3.pkl
-----------------
<class 'numpy.ndarray'>
819 ../resampled_augmentation3_1_train_data/2017019740043_kimtaeri/2.pkl 200000 0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019740043_kimtaeri_2
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740043_kimtaeri_2.pkl
-----------------
<class 'numpy.ndarray'>
820 ../resampled_augmentation3_1_train_data/2017019740020_kimseongje/8.pkl

Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740017_kwakmihyang_2.pkl
-----------------
<class 'numpy.ndarray'>
940 ../resampled_augmentation3_1_train_data/2017019740034_kimdongwook/8.pkl 200000 -0.0 1.0000001
Audio Segment Length : 200000
n3_1_train_data_2017019740034_kimdongwook_8
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740034_kimdongwook_8.pkl
-----------------
<class 'numpy.ndarray'>
941 ../resampled_augmentation3_1_train_data/2017019740034_kimdongwook/5.pkl 200000 0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019740034_kimdongwook_5
Output File Name : ../output_una_re_augmentation3_1/resampled_test/2017019740034_kimdongwook_5.pkl
-----------------
<class 'numpy.ndarray'>
942 ../resampled_augmentation3_1_train_data/2017019740034_kimdongwook/7.pkl 200000 -0.0 1.0000001
Audio Segment Length : 200000
n3_1_train_data_2017019740034_kimdongwook_7
Output File Name : ../output_una_re_augmentation3_1/resampled_trai

Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740008_chochaeyeon_7.pkl
-----------------
<class 'numpy.ndarray'>
1063 ../resampled_augmentation3_1_train_data/2017019740008_chochaeyeon/1.pkl 200000 -0.0 0.9999999
Audio Segment Length : 200000
n3_1_train_data_2017019740008_chochaeyeon_1
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740008_chochaeyeon_1.pkl
-----------------
<class 'numpy.ndarray'>
1064 ../resampled_augmentation3_1_train_data/2017019740008_chochaeyeon/4.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019740008_chochaeyeon_4
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740008_chochaeyeon_4.pkl
-----------------
<class 'numpy.ndarray'>
1065 ../resampled_augmentation3_1_train_data/2017019740008_chochaeyeon/6.pkl 200000 0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019740008_chochaeyeon_6
Output File Name : ../output_una_re_augmentation3_1/resampled_test/2

<class 'numpy.ndarray'>
1184 ../resampled_augmentation3_1_train_data/2017019740014_leesumin/4.pkl 200000 0.0 1.0000001
Audio Segment Length : 200000
n3_1_train_data_2017019740014_leesumin_4
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740014_leesumin_4.pkl
-----------------
<class 'numpy.ndarray'>
1185 ../resampled_augmentation3_1_train_data/2017019740014_leesumin/6.pkl 200000 0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019740014_leesumin_6
Output File Name : ../output_una_re_augmentation3_1/resampled_test/2017019740014_leesumin_6.pkl
-----------------
<class 'numpy.ndarray'>
1186 ../resampled_augmentation3_1_train_data/2017019740014_leesumin/10.pkl 200000 0.0 1.0
Audio Segment Length : 200000
n3_1_train_data_2017019740014_leesumin_10
Output File Name : ../output_una_re_augmentation3_1/resampled_train/2017019740014_leesumin_10.pkl
-----------------
<class 'numpy.ndarray'>
1187 ../resampled_augmentation3_1_train_data/2017019740014_leesumin/9.p

In [10]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
import tensorflow.keras.backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Lambda, Bidirectional, LSTM
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow import convert_to_tensor
from tensorflow import expand_dims
import numpy as np
import pickle
import os
from glob import glob
import tensorflow

print(tensorflow.__version__)

2.1.0


In [11]:
list_dir = os.listdir(DATA_RESAMPLED_AUDIO_DIR)
list_dir.sort()

class_ids = {list_dir[i]: i for i in range(len(list_dir))}

In [12]:
def m5(num_classes):
    print("Class Num", num_classes)
    print('Using Model M5')
    m = Sequential()
    m.add(Conv1D(128,
                 input_shape=[AUDIO_LENGTH, 1],
                 kernel_size=80,
                 strides=4,
                 padding='same',
                 kernel_initializer='glorot_uniform',
                 kernel_regularizer=regularizers.l2(l=0.0001)))
    m.add(BatchNormalization())
    m.add(Activation('relu'))
    m.add(MaxPooling1D(pool_size=4, strides=None))
    m.add(Conv1D(128,
                 kernel_size=3,
                 strides=1,
                 padding='same',
                 kernel_initializer='glorot_uniform',
                 kernel_regularizer=regularizers.l2(l=0.0001)))
    m.add(BatchNormalization())
    m.add(Activation('relu'))
    m.add(MaxPooling1D(pool_size=4, strides=None))
    m.add(Conv1D(256,
                 kernel_size=3,
                 strides=1,
                 padding='same',
                 kernel_initializer='glorot_uniform',
                 kernel_regularizer=regularizers.l2(l=0.0001)))
    m.add(BatchNormalization())
    m.add(Activation('relu'))
    m.add(MaxPooling1D(pool_size=4, strides=None))
    m.add(Conv1D(512,
                 kernel_size=3,
                 strides=1,
                 padding='same',
                 kernel_initializer='glorot_uniform',
                 kernel_regularizer=regularizers.l2(l=0.0001)))
    m.add(BatchNormalization())
    m.add(Activation('relu'))
    m.add(MaxPooling1D(pool_size=4, strides=None))
    m.add(Lambda(lambda x: K.mean(x, axis=1)))  # Same as GAP for 1D Conv Layer
    m.add(Dense(num_classes, activation='softmax'))
    return m

In [13]:
def get_data(file_list):
    def load_into(_filename, _x, _y):
        with open(_filename, 'rb') as f:
            audio_element = pickle.load(f)
            _x.append(audio_element['audio'])
            _y.append(int(audio_element['class_id']))

    x, y = [], []
    for filename in file_list:
        load_into(filename, x, y)
    return np.array(x), np.array(y)

In [14]:
num_classes = len(list_dir)
model = m5(num_classes)

if model is None:
    exit('Something went wrong!!')

model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
print(model.summary())

Class Num 128
Using Model M5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 50000, 128)        10368     
_________________________________________________________________
batch_normalization (BatchNo (None, 50000, 128)        512       
_________________________________________________________________
activation (Activation)      (None, 50000, 128)        0         
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 12500, 128)        0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 12500, 128)        49280     
_________________________________________________________________
batch_normalization_1 (Batch (None, 12500, 128)        512       
_________________________________________________________________
activation_1 (Activation)  

In [15]:
train_files = glob(os.path.join(OUTPUT_DIR_RESAMPLED_TRAIN, '**.pkl'))
print(os.path.join(OUTPUT_DIR_RESAMPLED_TRAIN, '**.pkl'))
x_tr, y_tr = get_data(train_files)

y_tr = to_categorical(y_tr, num_classes=num_classes)
x_tr = x_tr.reshape(-1, x_tr.shape[1], 1)


test_files = glob(os.path.join(OUTPUT_DIR_RESAMPLED_TEST, '**.pkl'))
x_te, y_te = get_data(test_files)

y_te = to_categorical(y_te, num_classes=num_classes)
x_te = x_te.reshape(-1, x_te.shape[1], 1)


print('x_tr.shape =', x_tr.shape)
print('y_tr.shape =', y_tr.shape)
print('x_te.shape =', x_te.shape)
print('y_te.shape =', y_te.shape)

../output_una_re_augmentation3_1/resampled_train/**.pkl
x_tr.shape = (1024, 200000, 1)
y_tr.shape = (1024, 128)
x_te.shape = (256, 200000, 1)
y_te.shape = (256, 128)


In [16]:
# if the accuracy does not increase over 10 epochs, reduce the learning rate by half.
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=10, min_lr=0.00005, verbose=1)
batch_size = 128
history = model.fit(x=x_tr, y=y_tr, batch_size=16, epochs=100, verbose=2, shuffle=True, validation_data=(x_te, y_te), callbacks=[reduce_lr])


Train on 1024 samples, validate on 256 samples
Epoch 1/100
1024/1024 - 14s - loss: 5.0321 - accuracy: 0.0254 - val_loss: 4.7272 - val_accuracy: 0.0234
Epoch 2/100
1024/1024 - 12s - loss: 4.0504 - accuracy: 0.0762 - val_loss: 4.7831 - val_accuracy: 0.0273
Epoch 3/100
1024/1024 - 12s - loss: 3.5338 - accuracy: 0.1475 - val_loss: 4.6556 - val_accuracy: 0.0352
Epoch 4/100
1024/1024 - 12s - loss: 3.0477 - accuracy: 0.2832 - val_loss: 3.5732 - val_accuracy: 0.1680
Epoch 5/100
1024/1024 - 13s - loss: 2.6463 - accuracy: 0.3418 - val_loss: 3.3376 - val_accuracy: 0.2109
Epoch 6/100
1024/1024 - 13s - loss: 2.2836 - accuracy: 0.4570 - val_loss: 3.4909 - val_accuracy: 0.1914
Epoch 7/100
1024/1024 - 13s - loss: 2.0498 - accuracy: 0.5117 - val_loss: 2.7633 - val_accuracy: 0.2930
Epoch 8/100
1024/1024 - 13s - loss: 1.8321 - accuracy: 0.5615 - val_loss: 3.0966 - val_accuracy: 0.2891
Epoch 9/100
1024/1024 - 13s - loss: 1.6707 - accuracy: 0.6162 - val_loss: 2.3218 - val_accuracy: 0.4531
Epoch 10/100
1024

Epoch 76/100
1024/1024 - 12s - loss: 0.1201 - accuracy: 0.9766 - val_loss: 0.6646 - val_accuracy: 0.8516
Epoch 77/100
1024/1024 - 12s - loss: 0.1199 - accuracy: 0.9814 - val_loss: 0.6942 - val_accuracy: 0.8477
Epoch 78/100
1024/1024 - 12s - loss: 0.1188 - accuracy: 0.9766 - val_loss: 0.6811 - val_accuracy: 0.8477
Epoch 79/100
1024/1024 - 12s - loss: 0.1177 - accuracy: 0.9834 - val_loss: 0.6592 - val_accuracy: 0.8633
Epoch 80/100
1024/1024 - 12s - loss: 0.1221 - accuracy: 0.9746 - val_loss: 0.6838 - val_accuracy: 0.8438
Epoch 81/100
1024/1024 - 12s - loss: 0.1195 - accuracy: 0.9805 - val_loss: 0.6720 - val_accuracy: 0.8594
Epoch 82/100
1024/1024 - 13s - loss: 0.1191 - accuracy: 0.9795 - val_loss: 0.6723 - val_accuracy: 0.8516
Epoch 83/100
1024/1024 - 12s - loss: 0.1195 - accuracy: 0.9805 - val_loss: 0.6897 - val_accuracy: 0.8398
Epoch 84/100
1024/1024 - 12s - loss: 0.1168 - accuracy: 0.9814 - val_loss: 0.7038 - val_accuracy: 0.8438
Epoch 85/100
1024/1024 - 12s - loss: 0.1156 - accuracy:

In [17]:
import matplotlib.pyplot as plt
plt.figure(figsize=(14, 6))
plt.rc('font', size=18)

plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.ylabel("accuracy")
plt.xlabel("epoch")
plt.legend(["train_accuracy", "val_accuracy"])
plt.show()

<Figure size 1400x600 with 1 Axes>

In [38]:
import pickle
from glob import iglob
import numpy as np
import librosa
from shutil import rmtree
from constants import *


OUTPUT_VAL_DIR = '../output_val_una_re_augmentation'
OUTPUT_DIR_VAL_TRAIN = os.path.join(OUTPUT_VAL_DIR, 'train')
OUTPUT_DIR_VAL_TEST = os.path.join(OUTPUT_VAL_DIR, 'test')

DATA_AUDIO_VAL_DIR = '../vad_test_audio'

OUTPUT_DIR_RESAMPLED_VAL_TRAIN = os.path.join(OUTPUT_VAL_DIR, 'resampled_train')
OUTPUT_DIR_RESAMPLED_VAL_TEST = os.path.join(OUTPUT_VAL_DIR, 'resampled_test')

DATA_RESAMPLED_AUDIO_VAL_DIR = '../resampled_augmentation_val_data'

In [39]:
del_folder(DATA_RESAMPLED_AUDIO_VAL_DIR)
mkdir_p(DATA_RESAMPLED_AUDIO_VAL_DIR)

In [40]:
del_folder(OUTPUT_DIR_VAL_TRAIN)
del_folder(OUTPUT_DIR_VAL_TEST)
mkdir_p(OUTPUT_DIR_VAL_TRAIN)
mkdir_p(OUTPUT_DIR_VAL_TEST)

In [41]:
del_folder(OUTPUT_DIR_RESAMPLED_VAL_TRAIN)
del_folder(OUTPUT_DIR_RESAMPLED_VAL_TEST)
mkdir_p(OUTPUT_DIR_RESAMPLED_VAL_TRAIN)
mkdir_p(OUTPUT_DIR_RESAMPLED_VAL_TEST)

In [42]:
def resampling_val_audio():
     for i, wav_filename in enumerate(iglob(os.path.join(DATA_AUDIO_VAL_DIR, '**/**.wav'), recursive=True)):
        y, sr = librosa.load(wav_filename, sr=TARGET_SR, mono=True, duration=MAX_AUDIO_LENGTH)
        audio_duration = len(y) / sr
        
        resample_sr = TARGET_LENGTH / audio_duration        
        resample = librosa.resample(y, sr, resample_sr)
        print(i, wav_filename)
        print("Default SR : {}, Audio Length(Default SR) : {}, Audio Duration : {}".format(TARGET_SR, len(y), audio_duration))
        print("Audio Duration : {}, Resampling SR : {}, Result Audio Length : {}".format(audio_duration, resample_sr, len(resample)))
        
        if len(resample) < TARGET_LENGTH:
            resample = np.concatenate((resample, np.zeros(shape=(TARGET_LENGTH - len(resample), 1))))
        resample = resample[:TARGET_LENGTH]
        
        print("Final Audio Length : {}".format(len(resample)))

        
        output_folder = DATA_RESAMPLED_AUDIO_VAL_DIR
        if not os.path.exists(os.path.join(output_folder, wav_filename.split('/')[2])):
                mkdir_p(os.path.join(output_folder, wav_filename.split('/')[2]))
                
        output_filename = os.path.join(output_folder ,wav_filename.split('/')[2],str(wav_filename.replace('.','/').split('/')[5]) + '.pkl')
#         output_filename = os.path.join(output_folder, wav_filename.split('/')[2], str(wav_filename.split('/')[3][0]+".pkl"))
        print("Output File Name: {}\n".format(output_filename))
        
        result = {'resampled_audio': resample,
               'resample_sr': resample_sr}

        with open(output_filename, 'wb') as w:
                  pickle.dump(result, w)

In [43]:
resampling_val_audio()

0 ../vad_test_audio/2017019880001_kimsubin/5.wav
Default SR : 16000, Audio Length(Default SR) : 329874, Audio Duration : 20.617125
Audio Duration : 20.617125, Resampling SR : 9700.673590522441, Result Audio Length : 200000
Final Audio Length : 200000
Output File Name: ../resampled_augmentation_val_data/2017019880001_kimsubin/5.pkl

1 ../vad_test_audio/2017019880001_kimsubin/4.wav
Default SR : 16000, Audio Length(Default SR) : 346389, Audio Duration : 21.6493125
Audio Duration : 21.6493125, Resampling SR : 9238.168648542534, Result Audio Length : 200000
Final Audio Length : 200000
Output File Name: ../resampled_augmentation_val_data/2017019880001_kimsubin/4.pkl

2 ../vad_test_audio/2017019880001_kimsubin/6.wav
Default SR : 16000, Audio Length(Default SR) : 282013, Audio Duration : 17.6258125
Audio Duration : 17.6258125, Resampling SR : 11346.99464209097, Result Audio Length : 200001
Final Audio Length : 200000
Output File Name: ../resampled_augmentation_val_data/2017019880001_kimsubin/6

In [44]:
PAD_SIZE = 0
TARGET_LENGTH = 200000
TARGET_SR = 8000
MAX_AUDIO_LENGTH = 200
AUDIO_LENGTH = TARGET_LENGTH
SEGMENT_NUM = 1

In [45]:
def convert_val_data():

    for i, wav_filename in enumerate(iglob(os.path.join(DATA_RESAMPLED_AUDIO_VAL_DIR, '**/**.pkl'), recursive=True)):
        class_id = extract_class_id(wav_filename)
#         audio_buf = read_audio_from_filename(wav_filename, target_sr=TARGET_SR)
        audio_buf,_ = get_audio_buf(wav_filename)
        print(type(audio_buf))
        
        # normalize mean 0, variance 1
        audio_buf = (audio_buf - np.mean(audio_buf)) / np.std(audio_buf)
        original_length = len(audio_buf)
        print(i, wav_filename, original_length, np.round(np.mean(audio_buf), 4), np.std(audio_buf))
        
        voice_seg = []
        
        if original_length >= AUDIO_LENGTH + PAD_SIZE:
            audio_seg = audio_buf[PAD_SIZE : AUDIO_LENGTH + PAD_SIZE]
            voice_seg.append(audio_seg)
            print("Audio Segment Length : {}".format(len(audio_seg)))
        else:
            print('Drop Audio Segment, Audio length={}'.format(len(audio_buf)))
            
        output_folder = OUTPUT_DIR_RESAMPLED_VAL_TRAIN
            
        output_filename = os.path.join(output_folder ,wav_filename.split('/')[2]+"_"+str(wav_filename.replace('.','/').split('/')[5]) + '.pkl')
#         output_filename = os.path.join(output_folder, str(wav_filename[24:-4]).replace('/', '_') + '.pkl')

        out_segs = []
        for i_seg, audio_seg in enumerate(voice_seg) :
            out = {'class_id': class_id,
               'audio': audio_seg,
               'sr': TARGET_SR}
            
            with open(output_filename, 'wb') as w:
                pickle.dump(out, w)

            print("Output File Name : "+output_filename)
        
        print("-----------------")

if __name__ == '__main__':
    convert_val_data()

<class 'numpy.ndarray'>
0 ../resampled_augmentation_val_data/2017019880001_kimsubin/5.pkl 200000 0.0 0.99999994
Audio Segment Length : 200000
Output File Name : ../output_val_una_re_augmentation/resampled_train/2017019880001_kimsubin_5.pkl
-----------------
<class 'numpy.ndarray'>
1 ../resampled_augmentation_val_data/2017019880001_kimsubin/1.pkl 200000 -0.0 0.99999994
Audio Segment Length : 200000
Output File Name : ../output_val_una_re_augmentation/resampled_train/2017019880001_kimsubin_1.pkl
-----------------
<class 'numpy.ndarray'>
2 ../resampled_augmentation_val_data/2017019880001_kimsubin/4.pkl 200000 0.0 1.0
Audio Segment Length : 200000
Output File Name : ../output_val_una_re_augmentation/resampled_train/2017019880001_kimsubin_4.pkl
-----------------
<class 'numpy.ndarray'>
3 ../resampled_augmentation_val_data/2017019880001_kimsubin/6.pkl 200000 -0.0 1.0
Audio Segment Length : 200000
Output File Name : ../output_val_una_re_augmentation/resampled_train/2017019880001_kimsubin_6.pk

In [46]:
from sklearn.metrics import accuracy_score

val_files = glob(os.path.join(OUTPUT_DIR_RESAMPLED_VAL_TRAIN, '**.pkl'))
val_files.sort()
# print(val_files)
x_val, y_val = get_data(val_files)
x_val = x_val.reshape(-1, x_val.shape[1], 1)

print('y_val : ', y_val)
print(x_val.shape, y_val.shape)


y_val :  [ 0  0  0  0  0  0 13 13 13 13 13 13 28 43 43 43 43 43 43 83 83 83 83 83
 83]
(25, 200000, 1) (25,)


In [47]:
pred_out = model.predict(x_val)

real =[]
pred_out_idex=[]

list_dir = os.listdir(DATA_RESAMPLED_AUDIO_DIR)
list_dir.sort()

j = 0
for i,pred in enumerate(pred_out) :
    if np.argmax(pred) != y_val[i]:
        print('\033[31m'+"Predict :"+str(list_dir[np.argmax(pred)])+" , Real :"+str(list_dir[y_val[i]])+", Likelihood :"+str(np.max(pred)))
    else:
        print('\033[30m'+"Predict :"+str(list_dir[np.argmax(pred)])+" , Real :"+str(list_dir[y_val[i]])+", Likelihood :"+str(np.max(pred)))
    real.append(list_dir.index(list_dir[y_val[i]]))
    pred_out_idex.append(np.argmax(pred))
    
# get the accuracy
print ('\n\033[30m'+"Final Predict Likelihood : "+format(accuracy_score(real, pred_out_idex)*100, ".4f"))

[30mPredict :2017019740001_kwakjuheon , Real :2017019740001_kwakjuheon, Likelihood :0.99424756
[30mPredict :2017019740001_kwakjuheon , Real :2017019740001_kwakjuheon, Likelihood :0.99950683
[30mPredict :2017019740001_kwakjuheon , Real :2017019740001_kwakjuheon, Likelihood :0.9981945
[30mPredict :2017019740001_kwakjuheon , Real :2017019740001_kwakjuheon, Likelihood :0.97704405
[30mPredict :2017019740001_kwakjuheon , Real :2017019740001_kwakjuheon, Likelihood :0.9876264
[31mPredict :2017019740002_kimjiho , Real :2017019740001_kwakjuheon, Likelihood :0.9678687
[30mPredict :2017019740014_leesumin , Real :2017019740014_leesumin, Likelihood :0.7057984
[31mPredict :2017019740038_choihyeonseok , Real :2017019740014_leesumin, Likelihood :0.5008034
[31mPredict :2017019740038_choihyeonseok , Real :2017019740014_leesumin, Likelihood :0.57155854
[30mPredict :2017019740014_leesumin , Real :2017019740014_leesumin, Likelihood :0.62993175
[30mPredict :2017019740014_leesumin , Real :201701974