# WavCeption V1: a 1-D Inception approach
https://www.kaggle.com/ivallesp/wavception-v1-a-1-d-inception-approach-lb-0-76

## Load modules and libraries


In [1]:
%matplotlib inline
import numpy as np 
import pandas as pd 
import os
import shutil
import glob
import random
from tqdm import tqdm
from collections import Counter
from sklearn.preprocessing import LabelEncoder
import IPython
from numpy.fft import rfft, irfft
import numpy as np
import random
import itertools

from scipy.io import wavfile
import IPython.display as ipd
import matplotlib.pyplot as plt
import scipy as sp
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() 

Instructions for updating:
non-resource variables are not supported in the long term


## Noise generation functions

In [2]:
def ms(x):
    """시그널 x의 제곱의 평균 값.
    x: Dynamic quantity.
    returns: x의 제곱 평균
    """
    return (np.abs(x)**2.0).mean()

def normalize(y, x=None):
    """y를 (표준 정규) 백색 노이즈 신호로 정규화.
    선택적으로 시그널 x의 파워로 정규화
    """
    if x is not None:
        x = ms(x)
    else:
        x = 1.0
    return y * np.sqrt( x / ms(y) )

def white_noise(N, state=None):
    state = np.random.RandomState() if state is None else state
    return state.randn(N)

def pink_noise(N, state=None):

    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = np.sqrt(np.arange(len(X))+1.) # +1 to avoid divide by zero
    y = (irfft(X/S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

def blue_noise(N, state=None):
    """
    청색 노이즈
    N: 샘플 수
    state: PRNG 상태
    type state: :class:`np.random.RandomState`
    
    파워는 옥타브당 6dB 증가
    파워 밀도는 옥타브당 3dB 증가

    """
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = np.sqrt(np.arange(len(X)))# Filter
    y = (irfft(X*S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

def brown_noise(N, state=None):
    """
    갈색 노이즈
    N: 샘플 수
    state: PRNG 상태
    type state: :class:`np.random.RandomState`
    
    파워는 옥타브당 3dB 감소
    파워 밀도는 옥타브당 6dB 증가

    """
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = (np.arange(len(X))+1)# Filter
    y = (irfft(X/S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

def violet_noise(N, state=None):
    """
    보라 노이즈
    N: 샘플 수
    state: PRNG 상태
    type state: :class:`np.random.RandomState`
    
    파워는 옥타브당 3dB 감소
    파워 밀도는 옥타브당 6dB 감소

    """
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = (np.arange(len(X)))# Filter
    y = (irfft(X*S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

## Tensorflow utilies
텐서플로우의 공통 작업을 모듈화합니다.

In [3]:
def get_tensorflow_configuration(device='0', memory_fraction=1):
    """
    사용할 GPU 및 프로세스에서 사용할 수 있는 메모리 양을 선택하는 기능입니다.
    device: 사용할 디바이스(str)
    memory_complet: 할당해야하는 메모리 비율(comparent)
    return: 세션에 전달할 config(tf 개체)
    """
    device = str(device)
    config = tf.ConfigProto()
    config.allow_soft_placement = True
    config.gpu_options.per_process_gpu_memory_fraction = memory_fraction
    config.gpu_options.visible_device_list = device
    return(config)

def start_tensorflow_session(device="0", memory_fraction=1):
    """
    사용할 GPU 장치를 관리하는 텐서 플로우 세션을 시작합니다. 즉, 사전 할당될 메모리의 비율입니다.
    device: 사용할 디바이스(str): 디바이스 번호(str)가 있는 문자열입니다.
    memory_fraction: 지정된 메모리(float[0,1)에서 사전 할당될 메모리의 일부입니다.
    return: 세션에 전달할 config(tf 개체)
    """
    return(tf.Session(config=get_tensorflow_configuration(device=device, memory_fraction=memory_fraction)))

def get_summary_writer(session, logs_path, project_id, version_id):
    """
    텐서보드 리포팅
    session: 열린 텐서 플로우 세션
    logs_path: 텐서보드가 로그를 찾는 경로(str)
    project_id: 보고용 프로젝트 이름(str)
    version_id: 보고용 버전 이름(str)
    return summary_writer: 텐서보드 writer
    """
    path = os.path.join(logs_path,"{}_{}".format(project_id, version_id)) 
    if os.path.exists(path):
        shutil.rmtree(path)
    summary_writer = tf.summary.FileWriter(path, graph_def=session.graph_def)
    return(summary_writer)

## Paths management module
경로를 다루는 모듈입니다.

In [4]:
# Common paths
def _norm_path(path):
    """
    경로 검색 기능의 출력을 정규화하는데 사용합니다. 
    슬래시나 역슬래시 케이스를 고치는데 유용합니다.
    """
    def normalize_path(*args, **kwargs):
        return os.path.normpath(path(*args, **kwargs))
    return normalize_path

def _assure_path_exists(path):
    """
    경로 검색 기능의 출력 유무를 확인하기 위한 함수입니다.
    """
    def assure_exists(*args, **kwargs):
        p = path(*args, **kwargs)
        assert os.path.exists(p), "다음 경로가 존재하지 않습니다: '{}'".format(p)
        return p
    return assure_exists

def _is_output_path(path):
    """
    경로 검색 함수의 출력에 적용되는 함수를 그룹화하기 위한 함수입니다.
    """
    @_norm_path
    @_assure_path_exists
    def check_existence_or_create_it(*args, **kwargs):
        if not os.path.exists(path(*args, **kwargs)):
            "경로가 존재하지 않습니다.. 생성: {}".format(path(*args, **kwargs))
            os.makedirs(path(*args, **kwargs))
        return path(*args, **kwargs)
    return check_existence_or_create_it

def _is_input_path(path):
    """
    입력 경로 검색 함수의 출력에 적용되는 함수를 그룹화하기 위한 데코레이터 함수입니다.
    """
    @_norm_path
    @_assure_path_exists
    def check_existence(*args, **kwargs):
        return path(*args, **kwargs)
    return check_existence

@_is_input_path
def get_train_path():
    path = '../input/train'
    return path

@_is_input_path
def get_test_path():
    path = '../input/test'
    return path

@_is_input_path
def get_train_audio_path():
    path = os.path.join(get_train_path(), 'audio')
    return path

@_is_input_path
def get_scoring_audio_path():
    path = os.path.join(get_test_path(), "audio")
    return path

@_is_output_path
def get_submissions_path():
    path = '../working/output'
    return path

@_is_output_path
def get_silence_path():
    path = '../working/silence'
    return path

## Utilities
범용 유틸리티 입니다.

In [5]:
flatten = lambda l: [item for sublist in l for item in sublist]

def batching(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

## Data Tools
데이터 핸들링 도구 입니다.

In [6]:
def read_wav(filepath, pad=True):
    """
    wav 파일의 경로를 지정하면 이 함수가 파일을 읽고 정규화하여 16k 샘플이 있는지     확인합니다.
    filepath: wav 파일의 기존 파일 경로(str)입니다.
    pad: 패딩 여부(bool)
    returns: 샘플과 타겟 변수(tuple (np.array, str)를 반환합니다.
    """
    sample_rate, x = wavfile.read(filepath)
    target = os.path.split(os.path.split(filepath)[0])[1]
    assert sample_rate==16000
    if pad:
        return np.pad(x, (0, 16000-len(x)), mode="constant")/32768, target
    else:
        return x/32768, target
    
def get_batcher(list_of_paths, batch_size, label_encoder=None, scoring=False):
    """
    배치 목록이 지정된 배치 생성기를 빌드합니다.
    list_of_class: (파일 경로, 대상)이 있는 튜플 리스트입니다.(list)
    batch_size: 배치의 크기(int)입니다.
    label_encoder: 라벨 인코더입니다.
    scoring: 타겟 고려 여부(bool)
    returns: 배치 생성기
    """
    for filepaths in batching(list_of_paths, batch_size):
        wavs, targets = zip(*list(map(read_wav, filepaths)))
        if scoring:
            yield np.expand_dims(np.row_stack(wavs), 2), filepaths
        else:
            if label_encoder is None:
                yield np.expand_dims(np.row_stack(wavs), 2), np.row_stack(targets)
            else:
                yield np.expand_dims(np.row_stack(wavs), 2), np.expand_dims(label_encoder.transform(np.squeeze(targets)),1)

## Architecture building blocks
Inception-1D(일명 wavception)는 일반 컨볼루션 신경망의 성능을 크게 향상시킵니다.

In [7]:
class BatchNorm(object):
    def __init__(self, epsilon=1e-5, momentum=0.999, name="batch_norm"):
        with tf.variable_scope(name):
            self.epsilon = epsilon
            self.momentum = momentum
            self.name = name

    def __call__(self, x, train=True):
        return tf.layers.batch_normalization(x,
                                            momentum=self.momentum,
                                            epsilon=self.epsilon,
                                            scale=True,
                                            training=train,
                                            name=self.name)
    
def inception_1d(x, is_train, depth, norm_function, activ_function, name):
    """
    Inception 1d 모듈 구현입니다.
    x: 현재 모듈에 대한 입력(채널-마지막을 포함한 4D 텐서)
    is_train: 배치정규화 동작을 제어
    depth: 네트워크의 깊이를 선형적으로 제어(int)
    norm_function: 정규화 클래스(위의 BatchNorm 클래스와 동일한 형식)
    activ_function: 활성화 함수
    name: 변수 범위(str)의 이름
    """
    with tf.variable_scope(name):
        x_norm = norm_function(name="norm_input")(x, train=is_train)

        # Branch 1: 64 x conv 1x1 
        branch_conv_1_1 = tf.layers.conv1d(inputs=x_norm, filters=16*depth, kernel_size=1,
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                           padding="same", name="conv_1_1")
        branch_conv_1_1 = norm_function(name="norm_conv_1_1")(branch_conv_1_1, train=is_train)
        branch_conv_1_1 = activ_function(branch_conv_1_1, "activation_1_1")

        # Branch 2: 128 x conv 3x3 
        branch_conv_3_3 = tf.layers.conv1d(inputs=x_norm, filters=16, kernel_size=1, 
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                           padding="same", name="conv_3_3_1")
        branch_conv_3_3 = norm_function(name="norm_conv_3_3_1")(branch_conv_3_3, train=is_train)
        branch_conv_3_3 = activ_function(branch_conv_3_3, "activation_3_3_1")

        branch_conv_3_3 = tf.layers.conv1d(inputs=branch_conv_3_3, filters=32*depth, kernel_size=3, 
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                           padding="same", name="conv_3_3_2")
        branch_conv_3_3 = norm_function(name="norm_conv_3_3_2")(branch_conv_3_3, train=is_train)
        branch_conv_3_3 = activ_function(branch_conv_3_3, "activation_3_3_2")

        # Branch 3: 128 x conv 5x5 
        branch_conv_5_5 = tf.layers.conv1d(inputs=x_norm, filters=16, kernel_size=1, 
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                           padding="same", name="conv_5_5_1")
        branch_conv_5_5 = norm_function(name="norm_conv_5_5_1")(branch_conv_5_5, train=is_train)
        branch_conv_5_5 = activ_function(branch_conv_5_5, "activation_5_5_1")

        branch_conv_5_5 = tf.layers.conv1d(inputs=branch_conv_5_5, filters=32*depth, kernel_size=5, 
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                           padding="same", name="conv_5_5_2")
        branch_conv_5_5 = norm_function(name="norm_conv_5_5_2")(branch_conv_5_5, train=is_train)
        branch_conv_5_5 = activ_function(branch_conv_5_5, "activation_5_5_2")

        # Branch 4: 128 x conv 7x7
        branch_conv_7_7 = tf.layers.conv1d(inputs=x_norm, filters=16, kernel_size=1, 
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                           padding="same", name="conv_7_7_1")
        branch_conv_7_7 = norm_function(name="norm_conv_7_7_1")(branch_conv_7_7, train=is_train)
        branch_conv_7_7 = activ_function(branch_conv_7_7, "activation_7_7_1")

        branch_conv_7_7 = tf.layers.conv1d(inputs=branch_conv_7_7, filters=32*depth, kernel_size=5, 
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                           padding="same", name="conv_7_7_2")
        branch_conv_7_7 = norm_function(name="norm_conv_7_7_2")(branch_conv_7_7, train=is_train)
        branch_conv_7_7 = activ_function(branch_conv_7_7, "activation_7_7_2")

        # Branch 5: 16 x (max_pool 3x3 + conv 1x1)
        branch_maxpool_3_3 = tf.layers.max_pooling1d(inputs=x_norm, pool_size=3, strides=1, padding="same", name="maxpool_3")
        branch_maxpool_3_3 = norm_function(name="norm_maxpool_3_3")(branch_maxpool_3_3, train=is_train)
        branch_maxpool_3_3 = tf.layers.conv1d(inputs=branch_maxpool_3_3, filters=16, kernel_size=1, 
                                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                              padding="same", name="conv_maxpool_3")

        # Branch 6: 16 x (max_pool 5x5 + conv 1x1)
        branch_maxpool_5_5 = tf.layers.max_pooling1d(inputs=x_norm, pool_size=5, strides=1, padding="same", name="maxpool_5")
        branch_maxpool_5_5 = norm_function(name="norm_maxpool_5_5")(branch_maxpool_5_5, train=is_train)
        branch_maxpool_5_5 = tf.layers.conv1d(inputs=branch_maxpool_5_5, filters=16, kernel_size=1, 
                                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                              padding="same", name="conv_maxpool_5")

        # Branch 7: 16 x (avg_pool 3x3 + conv 1x1)
        branch_avgpool_3_3 = tf.layers.average_pooling1d(inputs=x_norm, pool_size=3, strides=1, padding="same", name="avgpool_3")
        branch_avgpool_3_3 = norm_function(name="norm_avgpool_3_3")(branch_avgpool_3_3, train=is_train)
        branch_avgpool_3_3 = tf.layers.conv1d(inputs=branch_avgpool_3_3, filters=16, kernel_size=1,
                                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                              padding="same", name="conv_avgpool_3")

        # Branch 8: 16 x (avg_pool 5x5 + conv 1x1)
        branch_avgpool_5_5 = tf.layers.average_pooling1d(inputs=x_norm, pool_size=5, strides=1, padding="same", name="avgpool_5")
        branch_avgpool_5_5 = norm_function(name="norm_avgpool_5_5")(branch_avgpool_5_5, train=is_train)
        branch_avgpool_5_5 = tf.layers.conv1d(inputs=branch_avgpool_5_5, filters=16, kernel_size=1, 
                                              kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                              padding="same", name="conv_avgpool_5")

        # Concatenate
        output = tf.concat([branch_conv_1_1, branch_conv_3_3, branch_conv_5_5, branch_conv_7_7, branch_maxpool_3_3, 
                           branch_maxpool_5_5, branch_avgpool_3_3, branch_avgpool_5_5], axis=-1)
        return output

## Load and prepare data
합성 및 제공된 노이즈 추가

In [8]:
filepaths_noise = glob.glob(os.path.join(get_train_audio_path(), "_background_noise_", "*.wav"))

noise = np.concatenate(list(map(lambda x: read_wav(x, False)[0], filepaths_noise)))
noise = np.concatenate([noise, noise[::-1]])
synthetic_noise = np.concatenate([white_noise(N=16000*30, state=np.random.RandomState(655321)), 
                                  blue_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  pink_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  brown_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  violet_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  np.zeros(16000*60)])
synthetic_noise /= np.max(np.abs(synthetic_noise))
synthetic_noise = np.concatenate([synthetic_noise, (synthetic_noise+synthetic_noise[::-1])/2])
all_noise = np.concatenate([noise, synthetic_noise])

  


In [9]:
np.random.seed(655321)
random.seed(655321)

path = get_silence_path()

if not os.path.exists(path):
    os.makedirs(path) # It fails in kaggle kernel due to the read-only filesystem

for noise_clip_no in tqdm(range(8000)):
    if noise_clip_no<=4000:
        idx = np.random.randint(0, len(noise)-16000)
        clip = noise[idx:(idx+16000)]
    else:
        idx = np.random.randint(0, len(synthetic_noise)-16000)
        clip = synthetic_noise[idx:(idx+16000)]
    wavfile.write(os.path.join(path, "{0:04d}.wav".format(noise_clip_no)), 16000, 
                               ((32767*clip/np.max(np.abs(clip))).astype(np.int16)))

    

100%|████████████████████████████████████████████████████████████████████████████| 8000/8000 [00:03<00:00, 2473.72it/s]


In [10]:
filepaths = glob.glob(os.path.join(get_train_audio_path(), "**/*.wav"), recursive=True)
filepaths += glob.glob(os.path.join(get_silence_path(), "**/*.wav"), recursive=True)
filepaths = list(filter(lambda fp: "_background_noise_" not in fp, filepaths))
validation_list = open(os.path.join(get_train_path(), "validation_list.txt")).readlines()
test_list = open(os.path.join(get_train_path(), "testing_list.txt")).readlines()
validation_list = list(map(lambda fn: os.path.join(get_train_audio_path(), fn.strip()), validation_list))
testing_list = list(map(lambda fn: os.path.join(get_train_audio_path(), fn.strip()), test_list))
training_list = np.setdiff1d(filepaths, validation_list+testing_list).tolist()

In [11]:
random.seed(655321)
random.shuffle(filepaths)
random.shuffle(validation_list)
random.shuffle(testing_list)
random.shuffle(training_list)

In [14]:
# 빠른 유닛 테스트
# 파일의 수와 일관성 테스트
assert all(map(lambda fp: os.path.splitext(fp)[1]==".wav", filepaths))
assert len(filepaths)==64727 - 6 + 8000
#assert len(training_list) == len(filepaths) - 6798 - 6835 
assert len(validation_list) == 6798
assert len(testing_list) == 6835

# 존재 테스트 파일
assert all(map(lambda fn: os.path.exists(os.path.join(fn)), validation_list))
assert all(map(lambda fn: os.path.exists(os.path.join(fn)), testing_list))
assert all(map(lambda fn: os.path.exists(os.path.join(fn)), training_list))
#assert set(validation_list + testing_list + training_list) == set(filepaths)

# 세트 간 중복 여부 테스트
assert len(np.intersect1d(validation_list, testing_list))==0
assert len(np.intersect1d(training_list, testing_list))==0
assert len(np.intersect1d(training_list, validation_list))==0

In [15]:
# Classes processing
cardinal_classes =list(set(map(lambda fp:os.path.split(os.path.split(fp)[0])[1], filepaths)))
le_classes = LabelEncoder().fit(cardinal_classes)
Counter(map(lambda fp:os.path.split(os.path.split(fp)[0])[1], filepaths))

Counter({'no': 2375,
         'yes': 2377,
         'stop': 2380,
         'nine': 2364,
         'left': 2353,
         'dog': 1746,
         'wow': 1745,
         'up': 2375,
         'one': 2370,
         'six': 2369,
         'zero': 2376,
         'two': 2373,
         'sheila': 1734,
         'tree': 1733,
         'silence': 8000,
         'four': 2372,
         'marvin': 1746,
         'bed': 1713,
         'right': 2367,
         'seven': 2377,
         'cat': 1733,
         'eight': 2352,
         'five': 2357,
         'on': 2367,
         'happy': 1742,
         'off': 2357,
         'three': 2356,
         'go': 2372,
         'down': 2359,
         'bird': 1731,
         'house': 1750})

In [16]:
# 빠른 유닛 테스트
# 테스트 데이터 준비
_gen_test = get_batcher(filepaths, 1000)
batch_a_wav, batch_a_target = next(_gen_test)
batch_b_wav, batch_b_target = next(_gen_test)
_gen_test_le = get_batcher(filepaths, 1000, label_encoder=le_classes)
batch_le_wav, batch_le_target = next(_gen_test_le)

# 테스트 배치 행렬 좌표 검정
assert batch_a_wav.shape == (1000, 16000, 1)
assert batch_le_wav.shape == (1000, 16000, 1)
assert batch_a_wav.shape == batch_b_wav.shape == batch_le_wav.shape

# 테스트 배치 재생산성
assert np.sum(np.abs(batch_a_wav - batch_b_wav)) != 0
assert len(batch_a_target) == len(batch_b_target) == len(batch_le_target)
assert any(batch_a_target != batch_b_target)

# 테스트 클래스 라벨 인코더
assert all(batch_le_target == np.expand_dims(le_classes.transform(np.squeeze(batch_a_target)), 1))

## Architecture design
WavCeption 디자인

In [32]:
class NameSpacer:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
        
class Architecture:
    def __init__(self, class_cardinality, seq_len=16000, name='architecture'):
        self.seq_len = seq_len
        self.class_cardinality = class_cardinality
        self.optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
        self.name = name
        self.define_computation_graph()
        
        # 별칭
        self.ph = self.placeholders
        self.op = self.optimizers
        self.summ = self.summaries
        
    def define_computation_graph(self):
        # 그래프 초기화
        tf.reset_default_graph()
        self.placeholders = NameSpacer(**self.define_placeholders())
        self.core_model = NameSpacer(**self.define_core_model())
        self.losses = NameSpacer(**self.define_losses())
        self.optimizers = NameSpacer(**self.define_optimizers())
        self.summaries = NameSpacer(**self.define_summaries())
        
    def define_placeholders(self):
        with tf.variable_scope("Placeholders"):
            wav_in = tf.placeholder(dtype=tf.float32, shape=(None, self.seq_len, 1), name='wav_in')
            is_train = tf.placeholder(dtype=tf.bool, shape=None, name='is_train')
            target = tf.placeholder(dtype=tf.int32, shape=(None, 1), name='target')
            acc_dev = tf.placeholder(dtype=tf.float32, shape=None, name='acc_dev')
            loss_dev = tf.placeholder(dtype=tf.float32, shape=None, name='loss_dev')
            return ({'wav_in': wav_in, 'target': target, 'is_train': is_train, 'acc_dev': acc_dev, 'loss_dev': loss_dev})
        
    def define_core_model(self):
        with tf.variable_scope("Core_Model"):
            x = inception_1d(x=self.placeholders.wav_in, is_train=self.placeholders.is_train, 
                             norm_function=BatchNorm, activ_function=tf.nn.relu, depth=1,
                             name="Inception_1_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=1, name="Inception_1_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=1, name="Inception_2_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=1, name="Inception_2_3")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_2")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_3_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_3_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_3")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_4_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_4_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_4")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_5_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_5_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_5")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_6_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_6_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_6")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_7_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_7_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_7")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_8_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_8_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_8")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_9_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_9_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_9")
            x = tf.layers.flatten(x)
            x = tf.layers.dense(BatchNorm(name="bn_dense_1")(x,train=self.placeholders.is_train),
                                128, activation=tf.nn.relu, kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                name="dense_1")
            output = tf.layers.dense(BatchNorm(name="bn_dense_2")(x,train=self.placeholders.is_train),
                                self.class_cardinality, activation=None, kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                name="output")
            return ({'output': output})
    
    def define_losses(self):
        with tf.variable_scope('Losses'):
            softmax_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.squeeze(self.placeholders.target), logits=self.core_model.output, name='softmax')
            return({'softmax': softmax_ce})
    
    def define_optimizers(self):
        with tf.variable_scope("Optimization"):
            op = self.optimizer.minimize(self.losses.softmax)
            return ({'op': op})
    
    def define_summaries(self):
        with tf.variable_scope("Summaries"):
            ind_max = tf.squeeze(tf.cast(tf.argmax(self.core_model.output, axis=1), tf.int32))
            target = tf.squeeze(self.placeholders.target)
            acc = tf.reduce_mean(tf.cast(tf.equal(ind_max, target), tf.float32))
            loss = tf.reduce_mean(self.losses.softmax)
            train_scalar_probes = {'accuracy': acc, 'loss': loss}
            train_performance_scalar = [tf.summary.scalar(k, tf.reduce_mean(v), family=self.name) for k, v in train_scalar_probes.items()]
            train_performance_scalar = tf.summary.merge(train_performance_scalar)
            
            dev_scalar_probes = {'acc_dev': self.placeholders.acc_dev, 'loss_dev': self.placeholders.loss_dev}
            dev_performance_scalar = [tf.summary.scalar(k, v, family=self.name) for k, v in dev_scalar_probes.items()]
            dev_performance_scalar = tf.summary.merge(dev_performance_scalar)
            return ({"accuracy": acc, 'loss': loss, 's_tr': train_performance_scalar, 's_de': dev_performance_scalar})

## Run model


In [33]:
net = Architecture(class_cardinality=len(cardinal_classes), name='wavception')

In [34]:
sess = start_tensorflow_session(device='0')
sw = get_summary_writer(sess, '~/.logs_tensorboard/', 'wavception', 'V1')
c = 0



In [35]:
sess.run(tf.initialize_all_variables())

In [36]:
np.random.seed(655321)
random.seed(655321)

In [38]:
for epoch in range(50000):
    random.shuffle(training_list)
    batcher = get_batcher(training_list, 8, le_classes)
    for i, (batch_x, batch_y) in enumerate(batcher):
        _, loss, acc, s = sess.run([net.op.op, net.losses.softmax, net.summ.accuracy, net.summ.s_tr], feed_dict={net.ph.wav_in: batch_x, net.ph.target: batch_y, net.ph.is_train: True})
        print('[{0:04d}|{1:04d}] Accuracy train: {2:.2f}%'.format(epoch, i, acc*100))
        sw.add_summary(s, c)
        if c%1000 == 0: # Validation
            accuracies_dev = []
            losses_dev = []
            batcher = get_batcher(validation_list, 8, le_classes)
            for i, (batch_x, batch_y) in enumerate(batcher):
                acc, loss = sess.run([net.summ.accuracy, net.summ.loss], feed_dict={net.ph.wav_in: batch_x, net.ph.target: batch_y, net.ph.is_train: False})
                accuracies_dev.append(acc)
                losses_dev.append(loss)
            s = sess.run(net.summ.s_de, feed_dict={net.ph.acc_dev: np.mean(accuracies_dev), net.ph.loss_dev: np.mean(losses_dev)})
            sw.add_summary(s, c)
        c += 1

ResourceExhaustedError: 2 root error(s) found.
  (0) Resource exhausted: OOM when allocating tensor with shape[8,400,1000,1] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node Core_Model/Inception_5_2/maxpool_5/MaxPool (defined at C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\framework\ops.py:1751) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[Losses/softmax/softmax/_3489]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (1) Resource exhausted: OOM when allocating tensor with shape[8,400,1000,1] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node Core_Model/Inception_5_2/maxpool_5/MaxPool (defined at C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\framework\ops.py:1751) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

0 successful operations.
0 derived errors ignored.

Original stack trace for 'Core_Model/Inception_5_2/maxpool_5/MaxPool':
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\traitlets\config\application.py", line 845, in launch_instance
    app.start()
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\ipykernel\kernelapp.py", line 612, in start
    self.io_loop.start()
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\asyncio\base_events.py", line 541, in run_forever
    self._run_once()
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\asyncio\base_events.py", line 1786, in _run_once
    handle._run()
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tornado\ioloop.py", line 688, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tornado\ioloop.py", line 741, in _run_callback
    ret = callback()
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tornado\gen.py", line 814, in inner
    self.ctx_run(self.run)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tornado\gen.py", line 775, in run
    yielded = self.gen.send(value)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\ipykernel\kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tornado\gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\ipykernel\kernelbase.py", line 268, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tornado\gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\ipykernel\kernelbase.py", line 545, in execute_request
    user_expressions, allow_stdin,
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tornado\gen.py", line 234, in wrapper
    yielded = ctx_run(next, result)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\ipykernel\ipkernel.py", line 306, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\IPython\core\interactiveshell.py", line 2887, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\IPython\core\interactiveshell.py", line 2932, in _run_cell
    return runner(coro)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\IPython\core\interactiveshell.py", line 3156, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\IPython\core\interactiveshell.py", line 3347, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\IPython\core\interactiveshell.py", line 3427, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-33-b1bb9254ff16>", line 1, in <module>
    net = Architecture(class_cardinality=len(cardinal_classes), name='wavception')
  File "<ipython-input-32-4f9cb5dc6f00>", line 11, in __init__
    self.define_computation_graph()
  File "<ipython-input-32-4f9cb5dc6f00>", line 22, in define_computation_graph
    self.core_model = NameSpacer(**self.define_core_model())
  File "<ipython-input-32-4f9cb5dc6f00>", line 62, in define_core_model
    activ_function=tf.nn.relu, depth=3, name="Inception_5_2")
  File "<ipython-input-7-3ba187b3d59a>", line 83, in inception_1d
    branch_maxpool_5_5 = tf.layers.max_pooling1d(inputs=x_norm, pool_size=5, strides=1, padding="same", name="maxpool_5")
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 324, in new_func
    return func(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\layers\pooling.py", line 165, in max_pooling1d
    return layer.apply(inputs)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 324, in new_func
    return func(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py", line 1695, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\layers\base.py", line 548, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py", line 847, in __call__
    outputs = call_fn(cast_inputs, *args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\autograph\impl\api.py", line 234, in wrapper
    return converted_call(f, options, args, kwargs)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\autograph\impl\api.py", line 439, in converted_call
    return _call_unconverted(f, args, kwargs, options)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\autograph\impl\api.py", line 330, in _call_unconverted
    return f(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\keras\layers\pooling.py", line 80, in call
    data_format=self.data_format)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\keras\backend.py", line 5262, in pool2d
    x, pool_size, strides, padding=padding, data_format=tf_data_format)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\ops\nn_ops.py", line 3815, in max_pool
    name=name)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\ops\gen_nn_ops.py", line 5675, in max_pool
    data_format=data_format, name=name)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 793, in _apply_op_helper
    op_def=op_def)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3360, in create_op
    attrs, op_def, compute_device)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3429, in _create_op_internal
    op_def=op_def)
  File "C:\ProgramData\Anaconda3\envs\tens_2g\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1751, in __init__
    self._traceback = tf_stack.extract_stack()


### Test Accuracy

In [None]:
accuracies = []
batcher = get_batcher(testing_list, 64, le_classes)
for i, (batch_x, batch_y) in tqdm(enumerate(batcher)):
    acc = sess.run(net.summ.accuracy, feed_dict={net.ph.wav_in: batch_x, net.ph.target: batch_y, net.ph.is_train: False})
    accuracies.append(acc)

## Prediction and submission building

In [None]:
scoring_list = glob.glob(os.path.join(get_scoring_audio_path(), '*.wav'), recursive=True)

In [None]:
batcher = get_batcher(scoring_list, 80, le_classes, scoring=True)

In [None]:
fns = []
prds = []
for i, (batch_x, filepaths) in tqdm(enumerate(batcher)):
    pred = sess.run(net.core_model.output, feed_dict={net.ph.wav_in: batch_x, net.ph.is_train: False})
    fns.extend(map(lambda f:os.path.split(f)[1], filepaths))
    prds.extend(map(lambda f:np.argmax(pred, axis=1).tolist(), pred))

In [None]:
# Submission storage
df=pd.DataFrame({"fname":fns, "label": prds})
df.label = le_classes.inverse_transform(df.label)
df.loc[~df.label.isin(["yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go", "silence"]), "label"]="unknown"
df.to_csv(os.path.join(get_submissions_path(), "submission.csv"), index=False)