In [22]:
import pickle
from glob import iglob
import numpy as np
import librosa
from shutil import rmtree
from constants import *

DATA_AUDIO_DIR = '../test_train_data'
list_dir = os.listdir(DATA_AUDIO_DIR)
list_dir.sort()

OUTPUT_DIR = '../output'
OUTPUT_DIR_TRAIN = os.path.join(OUTPUT_DIR, 'train')
OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR, 'test')


PAD_SIZE = 10000
TARGET_SR = 22050
AUDIO_LENGTH = TARGET_SR * 10
SEGMENT_NUM = 2

In [26]:
def mkdir_p(path):
    import errno
    try:
        os.makedirs(path)
    except OSError as exc:
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise


def del_folder(path):
    try:
        rmtree(path)
    except:
        pass


del_folder(OUTPUT_DIR_TRAIN)
del_folder(OUTPUT_DIR_TEST)
mkdir_p(OUTPUT_DIR_TRAIN)
mkdir_p(OUTPUT_DIR_TEST)

In [27]:
order = 0
class_ids = {list_dir[i]: i for i in range(len(list_dir))}

def extract_class_id(wav_filename):
    return class_ids.get(wav_filename.split('/')[2])

def read_audio_from_filename(filename, target_sr):
    audio, _ = librosa.load(filename, sr=TARGET_SR, mono=True)
    audio = audio.reshape(-1, 1)
    return audio


def convert_data():

    for i, wav_filename in enumerate(iglob(os.path.join(DATA_AUDIO_DIR, '**/**.wav'), recursive=True)):
        class_id = extract_class_id(wav_filename)
        audio_buf = read_audio_from_filename(wav_filename, target_sr=TARGET_SR)
        
        # normalize mean 0, variance 1
        audio_buf = (audio_buf - np.mean(audio_buf)) / np.std(audio_buf)
        original_length = len(audio_buf)
        print(i, wav_filename, original_length, np.round(np.mean(audio_buf), 4), np.std(audio_buf))
        
        voice_seg = []
        if original_length > AUDIO_LENGTH * SEGMENT_NUM + PAD_SIZE:
            first_seg = audio_buf[PAD_SIZE : AUDIO_LENGTH + PAD_SIZE]
            second_seg = audio_buf[AUDIO_LENGTH+PAD_SIZE+1 : PAD_SIZE+AUDIO_LENGTH*2+1]
            
            voice_seg.append(first_seg)
            voice_seg.append(second_seg)
            print(len(first_seg),len(second_seg))
        elif original_length > AUDIO_LENGTH + PAD_SIZE:
            first_seg = audio_buf[PAD_SIZE : AUDIO_LENGTH + PAD_SIZE]
            voice_seg.append(first_seg)
            print('Drop 1 Segment, Audio length={}'.format(len(audio_buf)))
        else:
            print('Drop 2 Segment, Audio length={}'.format(len(audio_buf)))
            

        output_folder = OUTPUT_DIR_TRAIN
        if wav_filename[-5:] == '5.wav':
            output_folder = OUTPUT_DIR_TEST

        output_filename = os.path.join(output_folder, str(wav_filename[19:-4]).replace('/', '_') + '.pkl')
        out_segs = []
        for i_seg, audio_seg in enumerate(voice_seg) :
            out = {'class_id': class_id,
               'audio': audio_seg,
               'sr': TARGET_SR}
            
            with open(str(output_filename+"_"+str(i_seg)), 'wb') as w:
                pickle.dump(out, w)

            print(str(output_filename+"_"+str(i_seg)))
        
        print("-----------------")
        
if __name__ == '__main__':
    convert_data()

0 ../test_train_data/2017019770032_kimdayeong/5.wav 783687 0.0 1.0
220500 220500
../output/test/2017019770032_kimdayeong_5.pkl_0
../output/test/2017019770032_kimdayeong_5.pkl_1
-----------------
1 ../test_train_data/2017019770032_kimdayeong/1.wav 802503 0.0 1.0
220500 220500
../output/train/2017019770032_kimdayeong_1.pkl_0
../output/train/2017019770032_kimdayeong_1.pkl_1
-----------------
2 ../test_train_data/2017019770032_kimdayeong/3.wav 756404 0.0 1.0
220500 220500
../output/train/2017019770032_kimdayeong_3.pkl_0
../output/train/2017019770032_kimdayeong_3.pkl_1
-----------------
3 ../test_train_data/2017019770032_kimdayeong/2.wav 725357 -0.0 1.0000002
220500 220500
../output/train/2017019770032_kimdayeong_2.pkl_0
../output/train/2017019770032_kimdayeong_2.pkl_1
-----------------
4 ../test_train_data/2017019770032_kimdayeong/4.wav 745584 -0.0 1.0
220500 220500
../output/train/2017019770032_kimdayeong_4.pkl_0
../output/train/2017019770032_kimdayeong_4.pkl_1
-----------------
5 ../test

41 ../test_train_data/2017019770029_jueunhong/1.wav 715479 -0.0 1.0
220500 220500
../output/train/2017019770029_jueunhong_1.pkl_0
../output/train/2017019770029_jueunhong_1.pkl_1
-----------------
42 ../test_train_data/2017019770029_jueunhong/3.wav 682551 -0.0 1.0000001
220500 220500
../output/train/2017019770029_jueunhong_3.pkl_0
../output/train/2017019770029_jueunhong_3.pkl_1
-----------------
43 ../test_train_data/2017019770029_jueunhong/2.wav 666557 0.0 1.0
220500 220500
../output/train/2017019770029_jueunhong_2.pkl_0
../output/train/2017019770029_jueunhong_2.pkl_1
-----------------
44 ../test_train_data/2017019770029_jueunhong/4.wav 714068 0.0 0.99999994
220500 220500
../output/train/2017019770029_jueunhong_4.pkl_0
../output/train/2017019770029_jueunhong_4.pkl_1
-----------------
45 ../test_train_data/2017019770001_kwonyuna/5.wav 537168 0.0 0.99999994
220500 220500
../output/test/2017019770001_kwonyuna_5.pkl_0
../output/test/2017019770001_kwonyuna_5.pkl_1
-----------------
46 ../te

83 ../test_train_data/2017019880037_jusunghyun/2.wav 594944 0.0 0.99999976
220500 220500
../output/train/2017019880037_jusunghyun_2.pkl_0
../output/train/2017019880037_jusunghyun_2.pkl_1
-----------------
84 ../test_train_data/2017019880037_jusunghyun/4.wav 544768 0.0 0.9999998
220500 220500
../output/train/2017019880037_jusunghyun_4.pkl_0
../output/train/2017019880037_jusunghyun_4.pkl_1
-----------------
85 ../test_train_data/2017019740001_kwakjuheon/5.wav 611961 0.0 1.0
220500 220500
../output/test/2017019740001_kwakjuheon_5.pkl_0
../output/test/2017019740001_kwakjuheon_5.pkl_1
-----------------
86 ../test_train_data/2017019740001_kwakjuheon/1.wav 599731 0.0 0.99999994
220500 220500
../output/train/2017019740001_kwakjuheon_1.pkl_0
../output/train/2017019740001_kwakjuheon_1.pkl_1
-----------------
87 ../test_train_data/2017019740001_kwakjuheon/3.wav 595968 0.0 0.9999999
220500 220500
../output/train/2017019740001_kwakjuheon_3.pkl_0
../output/train/2017019740001_kwakjuheon_3.pkl_1
----

124 ../test_train_data/2017019880026_parkjongsang/4.wav 524967 0.0 1.0
220500 220500
../output/train/2017019880026_parkjongsang_4.pkl_0
../output/train/2017019880026_parkjongsang_4.pkl_1
-----------------
125 ../test_train_data/2017019740037_ohchaebin/5.wav 639274 -0.0 1.0000001
220500 220500
../output/test/2017019740037_ohchaebin_5.pkl_0
../output/test/2017019740037_ohchaebin_5.pkl_1
-----------------
126 ../test_train_data/2017019740037_ohchaebin/1.wav 675495 -0.0 0.99999994
220500 220500
../output/train/2017019740037_ohchaebin_1.pkl_0
../output/train/2017019740037_ohchaebin_1.pkl_1
-----------------
127 ../test_train_data/2017019740037_ohchaebin/3.wav 649152 -0.0 1.0000001
220500 220500
../output/train/2017019740037_ohchaebin_3.pkl_0
../output/train/2017019740037_ohchaebin_3.pkl_1
-----------------
128 ../test_train_data/2017019740037_ohchaebin/2.wav 651504 0.0 0.99999994
220500 220500
../output/train/2017019740037_ohchaebin_2.pkl_0
../output/train/2017019740037_ohchaebin_2.pkl_1
--

165 ../test_train_data/2017019740034_kimdongwook/5.wav 563981 0.0 0.99999994
220500 220500
../output/test/2017019740034_kimdongwook_5.pkl_0
../output/test/2017019740034_kimdongwook_5.pkl_1
-----------------
166 ../test_train_data/2017019740034_kimdongwook/1.wav 560217 0.0 0.9999999
220500 220500
../output/train/2017019740034_kimdongwook_1.pkl_0
../output/train/2017019740034_kimdongwook_1.pkl_1
-----------------
167 ../test_train_data/2017019740034_kimdongwook/3.wav 566803 -0.0 1.0
220500 220500
../output/train/2017019740034_kimdongwook_3.pkl_0
../output/train/2017019740034_kimdongwook_3.pkl_1
-----------------
168 ../test_train_data/2017019740034_kimdongwook/2.wav 619958 -0.0 1.0000001
220500 220500
../output/train/2017019740034_kimdongwook_2.pkl_0
../output/train/2017019740034_kimdongwook_2.pkl_1
-----------------
169 ../test_train_data/2017019740034_kimdongwook/4.wav 566333 0.0 1.0
220500 220500
../output/train/2017019740034_kimdongwook_4.pkl_0
../output/train/2017019740034_kimdongwo

205 ../test_train_data/2017019740021_kwakbokyeong/5.wav 912106 0.0 1.0
220500 220500
../output/test/2017019740021_kwakbokyeong_5.pkl_0
../output/test/2017019740021_kwakbokyeong_5.pkl_1
-----------------
206 ../test_train_data/2017019740021_kwakbokyeong/1.wav 867418 0.0 0.99999994
220500 220500
../output/train/2017019740021_kwakbokyeong_1.pkl_0
../output/train/2017019740021_kwakbokyeong_1.pkl_1
-----------------
207 ../test_train_data/2017019740021_kwakbokyeong/3.wav 854717 -0.0 0.9999998
220500 220500
../output/train/2017019740021_kwakbokyeong_3.pkl_0
../output/train/2017019740021_kwakbokyeong_3.pkl_1
-----------------
208 ../test_train_data/2017019740021_kwakbokyeong/2.wav 840605 0.0 1.0
220500 220500
../output/train/2017019740021_kwakbokyeong_2.pkl_0
../output/train/2017019740021_kwakbokyeong_2.pkl_1
-----------------
209 ../test_train_data/2017019740021_kwakbokyeong/4.wav 855658 -0.0 0.9999999
220500 220500
../output/train/2017019740021_kwakbokyeong_4.pkl_0
../output/train/201701974

246 ../test_train_data/2017019880016_parkjongkook/1.wav 651005 -0.0 1.0000001
220500 220500
../output/train/2017019880016_parkjongkook_1.pkl_0
../output/train/2017019880016_parkjongkook_1.pkl_1
-----------------
247 ../test_train_data/2017019880016_parkjongkook/3.wav 647241 0.0 1.0
220500 220500
../output/train/2017019880016_parkjongkook_3.pkl_0
../output/train/2017019880016_parkjongkook_3.pkl_1
-----------------
248 ../test_train_data/2017019880016_parkjongkook/2.wav 608669 -0.0 1.0
220500 220500
../output/train/2017019880016_parkjongkook_2.pkl_0
../output/train/2017019880016_parkjongkook_2.pkl_1
-----------------
249 ../test_train_data/2017019880016_parkjongkook/4.wav 687696 -0.0 1.0000001
220500 220500
../output/train/2017019880016_parkjongkook_4.pkl_0
../output/train/2017019880016_parkjongkook_4.pkl_1
-----------------
250 ../test_train_data/2017019880023_yoohaekyung/5.wav 650093 -0.0 0.99999994
220500 220500
../output/test/2017019880023_yoohaekyung_5.pkl_0
../output/test/201701988

287 ../test_train_data/2017019880019_wonsonghee/3.wav 729088 -0.0 1.0
220500 220500
../output/train/2017019880019_wonsonghee_3.pkl_0
../output/train/2017019880019_wonsonghee_3.pkl_1
-----------------
288 ../test_train_data/2017019880019_wonsonghee/2.wav 718336 -0.0 1.0
220500 220500
../output/train/2017019880019_wonsonghee_2.pkl_0
../output/train/2017019880019_wonsonghee_2.pkl_1
-----------------
289 ../test_train_data/2017019880019_wonsonghee/4.wav 748032 -0.0 0.99999976
220500 220500
../output/train/2017019880019_wonsonghee_4.pkl_0
../output/train/2017019880019_wonsonghee_4.pkl_1
-----------------
290 ../test_train_data/2017019880043_leegawon/5.wav 622310 0.0 1.0
220500 220500
../output/test/2017019880043_leegawon_5.pkl_0
../output/test/2017019880043_leegawon_5.pkl_1
-----------------
291 ../test_train_data/2017019880043_leegawon/1.wav 613843 0.0 1.0
220500 220500
../output/train/2017019880043_leegawon_1.pkl_0
../output/train/2017019880043_leegawon_1.pkl_1
-----------------
292 ../te

328 ../test_train_data/2017019880010_kimhyorin/2.wav 724992 0.0 1.0
220500 220500
../output/train/2017019880010_kimhyorin_2.pkl_0
../output/train/2017019880010_kimhyorin_2.pkl_1
-----------------
329 ../test_train_data/2017019880010_kimhyorin/4.wav 732672 0.0 1.0
220500 220500
../output/train/2017019880010_kimhyorin_4.pkl_0
../output/train/2017019880010_kimhyorin_4.pkl_1
-----------------
330 ../test_train_data/2017019740004_parksohui/5.wav 667028 0.0 0.99999994
220500 220500
../output/test/2017019740004_parksohui_5.pkl_0
../output/test/2017019740004_parksohui_5.pkl_1
-----------------
331 ../test_train_data/2017019740004_parksohui/1.wav 673613 0.0 1.0000001
220500 220500
../output/train/2017019740004_parksohui_1.pkl_0
../output/train/2017019740004_parksohui_1.pkl_1
-----------------
332 ../test_train_data/2017019740004_parksohui/3.wav 659501 0.0 1.0
220500 220500
../output/train/2017019740004_parksohui_3.pkl_0
../output/train/2017019740004_parksohui_3.pkl_1
-----------------
333 ../te

369 ../test_train_data/2017019770005_hyeonsanghyeok/4.wav 549898 -0.0 0.9999999
220500 220500
../output/train/2017019770005_hyeonsanghyeok_4.pkl_0
../output/train/2017019770005_hyeonsanghyeok_4.pkl_1
-----------------
370 ../test_train_data/2017019770040_kimdajeong/5.wav 570125 0.0 0.9999999
220500 220500
../output/test/2017019770040_kimdajeong_5.pkl_0
../output/test/2017019770040_kimdajeong_5.pkl_1
-----------------
371 ../test_train_data/2017019770040_kimdajeong/1.wav 651975 0.0 0.99999994
220500 220500
../output/train/2017019770040_kimdajeong_1.pkl_0
../output/train/2017019770040_kimdajeong_1.pkl_1
-----------------
372 ../test_train_data/2017019770040_kimdajeong/3.wav 587060 0.0 1.0
220500 220500
../output/train/2017019770040_kimdajeong_3.pkl_0
../output/train/2017019770040_kimdajeong_3.pkl_1
-----------------
373 ../test_train_data/2017019770040_kimdajeong/2.wav 612461 0.0 0.99999994
220500 220500
../output/train/2017019770040_kimdajeong_2.pkl_0
../output/train/2017019770040_kimda

410 ../test_train_data/2017019880036_kimjihye/5.wav 597504 -0.0 0.9999998
220500 220500
../output/test/2017019880036_kimjihye_5.pkl_0
../output/test/2017019880036_kimjihye_5.pkl_1
-----------------
411 ../test_train_data/2017019880036_kimjihye/1.wav 612352 0.0 1.0
220500 220500
../output/train/2017019880036_kimjihye_1.pkl_0
../output/train/2017019880036_kimjihye_1.pkl_1
-----------------
412 ../test_train_data/2017019880036_kimjihye/3.wav 584704 0.0 1.0
220500 220500
../output/train/2017019880036_kimjihye_3.pkl_0
../output/train/2017019880036_kimjihye_3.pkl_1
-----------------
413 ../test_train_data/2017019880036_kimjihye/2.wav 604672 -0.0 1.0000001
220500 220500
../output/train/2017019880036_kimjihye_2.pkl_0
../output/train/2017019880036_kimjihye_2.pkl_1
-----------------
414 ../test_train_data/2017019880036_kimjihye/4.wav 587264 -0.0 0.99999994
220500 220500
../output/train/2017019880036_kimjihye_4.pkl_0
../output/train/2017019880036_kimjihye_4.pkl_1
-----------------
415 ../test_tra

451 ../test_train_data/2017019880028_kimsunghan/1.wav 670320 0.0 1.0
220500 220500
../output/train/2017019880028_kimsunghan_1.pkl_0
../output/train/2017019880028_kimsunghan_1.pkl_1
-----------------
452 ../test_train_data/2017019880028_kimsunghan/3.wav 692900 -0.0 0.9999998
220500 220500
../output/train/2017019880028_kimsunghan_3.pkl_0
../output/train/2017019880028_kimsunghan_3.pkl_1
-----------------
453 ../test_train_data/2017019880028_kimsunghan/2.wav 673143 0.0 1.0000002
220500 220500
../output/train/2017019880028_kimsunghan_2.pkl_0
../output/train/2017019880028_kimsunghan_2.pkl_1
-----------------
454 ../test_train_data/2017019880028_kimsunghan/4.wav 671732 -0.0 1.0
220500 220500
../output/train/2017019880028_kimsunghan_4.pkl_0
../output/train/2017019880028_kimsunghan_4.pkl_1
-----------------
455 ../test_train_data/2017019880009_kimhongjoo/5.wav 766723 0.0 0.99999994
220500 220500
../output/test/2017019880009_kimhongjoo_5.pkl_0
../output/test/2017019880009_kimhongjoo_5.pkl_1
----

492 ../test_train_data/2017019770014_parkeunbi/3.wav 641024 0.0 0.99999994
220500 220500
../output/train/2017019770014_parkeunbi_3.pkl_0
../output/train/2017019770014_parkeunbi_3.pkl_1
-----------------
493 ../test_train_data/2017019770014_parkeunbi/2.wav 671232 0.0 0.99999994
220500 220500
../output/train/2017019770014_parkeunbi_2.pkl_0
../output/train/2017019770014_parkeunbi_2.pkl_1
-----------------
494 ../test_train_data/2017019770014_parkeunbi/4.wav 646144 0.0 1.0
220500 220500
../output/train/2017019770014_parkeunbi_4.pkl_0
../output/train/2017019770014_parkeunbi_4.pkl_1
-----------------
495 ../test_train_data/2017019880029_kimminji/5.wav 667648 0.0 0.9999999
220500 220500
../output/test/2017019880029_kimminji_5.pkl_0
../output/test/2017019880029_kimminji_5.pkl_1
-----------------
496 ../test_train_data/2017019880029_kimminji/1.wav 701952 0.0 1.0
220500 220500
../output/train/2017019880029_kimminji_1.pkl_0
../output/train/2017019880029_kimminji_1.pkl_1
-----------------
497 ../t

533 ../test_train_data/2017019770002_kwoneunkyung/2.wav 620032 -0.0 0.9999998
220500 220500
../output/train/2017019770002_kwoneunkyung_2.pkl_0
../output/train/2017019770002_kwoneunkyung_2.pkl_1
-----------------
534 ../test_train_data/2017019770002_kwoneunkyung/4.wav 571904 0.0 1.0
220500 220500
../output/train/2017019770002_kwoneunkyung_4.pkl_0
../output/train/2017019770002_kwoneunkyung_4.pkl_1
-----------------
535 ../test_train_data/2017019770034_kimhyeona/5.wav 571448 0.0 1.0
220500 220500
../output/test/2017019770034_kimhyeona_5.pkl_0
../output/test/2017019770034_kimhyeona_5.pkl_1
-----------------
536 ../test_train_data/2017019770034_kimhyeona/1.wav 588383 -0.0 0.99999994
220500 220500
../output/train/2017019770034_kimhyeona_1.pkl_0
../output/train/2017019770034_kimhyeona_1.pkl_1
-----------------
537 ../test_train_data/2017019770034_kimhyeona/3.wav 585560 -0.0 1.0
220500 220500
../output/train/2017019770034_kimhyeona_3.pkl_0
../output/train/2017019770034_kimhyeona_3.pkl_1
------

575 ../test_train_data/2017019740020_kimseongje/5.wav 601172 0.0 0.9999998
220500 220500
../output/test/2017019740020_kimseongje_5.pkl_0
../output/test/2017019740020_kimseongje_5.pkl_1
-----------------
576 ../test_train_data/2017019740020_kimseongje/1.wav 725357 0.0 1.0000001
220500 220500
../output/train/2017019740020_kimseongje_1.pkl_0
../output/train/2017019740020_kimseongje_1.pkl_1
-----------------
577 ../test_train_data/2017019740020_kimseongje/3.wav 629396 0.0 1.0000001
220500 220500
../output/train/2017019740020_kimseongje_3.pkl_0
../output/train/2017019740020_kimseongje_3.pkl_1
-----------------
578 ../test_train_data/2017019740020_kimseongje/2.wav 660442 0.0 0.9999999
220500 220500
../output/train/2017019740020_kimseongje_2.pkl_0
../output/train/2017019740020_kimseongje_2.pkl_1
-----------------
579 ../test_train_data/2017019740020_kimseongje/4.wav 612461 -0.0 1.0
220500 220500
../output/train/2017019740020_kimseongje_4.pkl_0
../output/train/2017019740020_kimseongje_4.pkl_1


616 ../test_train_data/2017019740013_kwakyiheon/1.wav 698045 -0.0 1.0000001
220500 220500
../output/train/2017019740013_kwakyiheon_1.pkl_0
../output/train/2017019740013_kwakyiheon_1.pkl_1
-----------------
617 ../test_train_data/2017019740013_kwakyiheon/3.wav 691459 0.0 0.99999994
220500 220500
../output/train/2017019740013_kwakyiheon_3.pkl_0
../output/train/2017019740013_kwakyiheon_3.pkl_1
-----------------
618 ../test_train_data/2017019740013_kwakyiheon/2.wav 662765 -0.0 1.0
220500 220500
../output/train/2017019740013_kwakyiheon_2.pkl_0
../output/train/2017019740013_kwakyiheon_2.pkl_1
-----------------
619 ../test_train_data/2017019740013_kwakyiheon/4.wav 719213 0.0 1.0000001
220500 220500
../output/train/2017019740013_kwakyiheon_4.pkl_0
../output/train/2017019740013_kwakyiheon_4.pkl_1
-----------------
620 ../test_train_data/2017019880011_kimjihyun/5.wav 592384 -0.0 0.9999999
220500 220500
../output/test/2017019880011_kimjihyun_5.pkl_0
../output/test/2017019880011_kimjihyun_5.pkl_1


In [28]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
import tensorflow.keras.backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Lambda, Bidirectional, LSTM
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow import convert_to_tensor
from tensorflow import expand_dims
import numpy as np
import pickle
import os
from glob import glob

In [29]:
DATA_AUDIO_DIR = '../test_train_data'
list_dir = os.listdir(DATA_AUDIO_DIR)
list_dir.sort()

class_ids = {list_dir[i]: i for i in range(len(list_dir))}

In [34]:
def m5(num_classes):
    print("!!!!!!!!", num_classes)
    print('Using Model M5')
    m = Sequential()
    m.add(Conv1D(128,
                 input_shape=[AUDIO_LENGTH, 1],
                 kernel_size=80,
                 strides=4,
                 padding='same',
                 kernel_initializer='glorot_uniform',
                 kernel_regularizer=regularizers.l2(l=0.0001)))
    m.add(BatchNormalization())
    m.add(Activation('relu'))
    m.add(MaxPooling1D(pool_size=4, strides=None))
    m.add(Conv1D(128,
                 kernel_size=3,
                 strides=1,
                 padding='same',
                 kernel_initializer='glorot_uniform',
                 kernel_regularizer=regularizers.l2(l=0.0001)))
    m.add(BatchNormalization())
    m.add(Activation('relu'))
    m.add(MaxPooling1D(pool_size=4, strides=None))
    m.add(Conv1D(256,
                 kernel_size=3,
                 strides=1,
                 padding='same',
                 kernel_initializer='glorot_uniform',
                 kernel_regularizer=regularizers.l2(l=0.0001)))
    m.add(BatchNormalization())
    m.add(Activation('relu'))
    m.add(MaxPooling1D(pool_size=4, strides=None))
    m.add(Conv1D(512,
                 kernel_size=3,
                 strides=1,
                 padding='same',
                 kernel_initializer='glorot_uniform',
                 kernel_regularizer=regularizers.l2(l=0.0001)))
    m.add(BatchNormalization())
    m.add(Activation('relu'))
    m.add(MaxPooling1D(pool_size=4, strides=None))
    m.add(Lambda(lambda x: K.mean(x, axis=1)))  # Same as GAP for 1D Conv Layer
    m.add(Dense(num_classes, activation='softmax'))
    return m


def get_data(file_list):
    def load_into(_filename, _x, _y):
        with open(_filename, 'rb') as f:
            audio_element = pickle.load(f)
            _x.append(audio_element['audio'])
            _y.append(int(audio_element['class_id']))

    x, y = [], []
    for filename in file_list:
        load_into(filename, x, y)
    return np.array(x), np.array(y)

In [35]:
num_classes = len(list_dir)
model = m5(num_classes)

if model is None:
    exit('Something went wrong!!')

model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
print(model.summary())

train_files = glob(os.path.join(OUTPUT_DIR_TRAIN, '**.pkl_*'))
print(os.path.join(OUTPUT_DIR_TRAIN, '**.pkl_*'))
x_tr, y_tr = get_data(train_files)
y_tr = to_categorical(y_tr, num_classes=num_classes)

test_files = glob(os.path.join(OUTPUT_DIR_TEST, '**.pkl_*'))
x_te, y_te = get_data(test_files)
y_te = to_categorical(y_te, num_classes=num_classes)

print('x_tr.shape =', x_tr.shape)
print('y_tr.shape =', y_tr.shape)
print('x_te.shape =', x_te.shape)
print('y_te.shape =', y_te.shape)
print(type(x_te))

# if the accuracy does not increase over 10 epochs, reduce the learning rate by half.
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=10, min_lr=0.00005, verbose=1)
batch_size = 128
history = model.fit(x=x_tr, y=y_tr, batch_size=16, epochs=100, verbose=2, shuffle=True, validation_data=(x_te, y_te), callbacks=[reduce_lr])


# epoch 500 audio_length 10000 min_lr 0.0001 sr 8000 batch_size 16 : val_acc 0.73
# epoch 500 audio_length 10000 min_lr 0.00005 sr 8000 batch_size 16 : val_acc 0.77(0.83)
# epoch 50 audio_length 20000 min_lr 0.00005 sr 8000 batch_size 16 : val_acc 0.90
# epoch 100 audio_length 20000 min_lr 0.00005 sr 8000 batch_size 16 : val_acc 0.93
# epoch 100 audio_length 20000 min_lr 0.00005 sr 8000 batch_size 32 : val_acc 0.89(0.90) predict 0.75
# epoch 150 audio_length 20000 min_lr 0.00005 sr 8000 batch_size 16 : val_acc 0.91 predict 0.75

!!!!!!!! 128
Using Model M5
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_12 (Conv1D)           (None, 55125, 128)        10368     
_________________________________________________________________
batch_normalization_12 (Batc (None, 55125, 128)        512       
_________________________________________________________________
activation_12 (Activation)   (None, 55125, 128)        0         
_________________________________________________________________
max_pooling1d_12 (MaxPooling (None, 13781, 128)        0         
_________________________________________________________________
conv1d_13 (Conv1D)           (None, 13781, 128)        49280     
_________________________________________________________________
batch_normalization_13 (Batc (None, 13781, 128)        512       
_________________________________________________________________
activation_13 (Activation)

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(14, 6))
plt.rc('font', size=18)

plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.ylabel("accuracy")
plt.xlabel("epoch")
plt.legend(["train_accuracy", "val_accuracy"])
plt.show()

In [None]:
import pickle
from glob import iglob
import numpy as np
import librosa
from shutil import rmtree
from constants import *

DATA_AUDIO_VAL_DIR = '../test_val_data'
OUTPUT_DIR_VAL_TRAIN = '../output_val/train'
OUTPUT_DIR_VAL_TEST = '../output_val/test'

DATA_AUDIO_DIR = '../test_train_data'
list_dir = os.listdir(DATA_AUDIO_DIR)
list_dir.sort()


del_folder(OUTPUT_DIR_VAL_TRAIN)
del_folder(OUTPUT_DIR_VAL_TEST)
mkdir_p(OUTPUT_DIR_VAL_TRAIN)
mkdir_p(OUTPUT_DIR_VAL_TEST)

class_ids = {list_dir[i]: i for i in range(len(list_dir))}

In [None]:
def convert_val_data():
    
    list_dir = os.listdir(DATA_AUDIO_VAL_DIR)
    list_dir.sort()
    
    for i, wav_filename in enumerate(iglob(os.path.join(DATA_AUDIO_VAL_DIR, '**/**.wav'), recursive=True)):
        class_id = extract_class_id(wav_filename)
        audio_buf = read_audio_from_filename(wav_filename, target_sr=TARGET_SR)
        
        # normalize mean 0, variance 1
        audio_buf = (audio_buf - np.mean(audio_buf)) / np.std(audio_buf)
        original_length = len(audio_buf)
        print(i, wav_filename, original_length, np.round(np.mean(audio_buf), 4), np.std(audio_buf))
        
        voice_seg = []
        if original_length > AUDIO_LENGTH * SEGMENT_NUM + PAD_SIZE:
            first_seg = audio_buf[PAD_SIZE : AUDIO_LENGTH + PAD_SIZE]
            second_seg = audio_buf[AUDIO_LENGTH+PAD_SIZE+1 : PAD_SIZE+AUDIO_LENGTH*2+1]
            
            voice_seg.append(first_seg)
            voice_seg.append(second_seg)
            print(len(first_seg),len(second_seg))
        elif original_length > AUDIO_LENGTH + PAD_SIZE:
            first_seg = audio_buf[PAD_SIZE : AUDIO_LENGTH + PAD_SIZE]
            voice_seg.append(first_seg)
            print('Drop 1 Segment, Audio length={}'.format(len(audio_buf)))
        else:
            print('Drop 2 Segment, Audio length={}'.format(len(audio_buf)))
            

        output_folder = OUTPUT_DIR_VAL_TRAIN
        if wav_filename[-5:] == '5.wav':
            output_folder = OUTPUT_DIR_VAL_TEST

        output_filename = os.path.join(output_folder, str(wav_filename[19:-4]).replace('/', '_') + '.pkl')
        out_segs = []
        for i_seg, audio_seg in enumerate(voice_seg) :
            out = {'class_id': class_id,
               'audio': audio_seg,
               'sr': TARGET_SR}
            
            with open(str(output_filename+"_"+str(i_seg)), 'wb') as w:
                pickle.dump(out, w)

            print(str(output_filename+"_"+str(i_seg)))
        
        print("-----------------")

if __name__ == '__main__':
    convert_val_data()

In [None]:
from sklearn.metrics import accuracy_score

val_files = glob(os.path.join(OUTPUT_DIR_VAL_TRAIN, '**.pkl_*'))
val_files.sort()

x_val, y_val = get_data(val_files)
print('y_val : ', y_val)

y_val_encoding = to_categorical(y_val, num_classes = num_classes)

pred_out = model.predict(x_val)

real =[]
pred_out_idex=[]

val_class_names = os.listdir(DATA_AUDIO_VAL_DIR)
val_class_names.sort()

list_dir = os.listdir(DATA_AUDIO_DIR)
list_dir.sort()

j = 0
for i,pred in enumerate(pred_out) :
    print("Predict :",list_dir[np.argmax(pred)], ", Real :", list_dir[y_val[i]], ", Likelihood :", np.max(pred))
    real.append(list_dir.index(list_dir[y_val[i]]))
    pred_out_idex.append(np.argmax(pred))
    
# get the accuracy
print (accuracy_score(real, pred_out_idex))