# Kaggle Study Day 26

# WavCeption V1 : a 1-D Inception approach

https://www.kaggle.com/ivallesp/wavception-v1-a-1-d-inception-approach-lb-0-76

WaveCeption V1 네트워크는 일반 컨볼루션 신경망에 비해 인상적인 결과를 내는 것처럼 보이지만, 이 대회에선 전처리와 알려지지 않은 트랙 관리에서 힘든 작업이 있는 것 같아보인다. 이는 같은 아이디어인 구글의 inception 네트워크를 기반으로 한다.

## Load modules and libraries

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import os
import shutil
import glob
import random
from tqdm import tqdm
from collections import Counter
from sklearn.preprocessing import LabelEncoder
import IPython
from numpy.fft import rfft, irfft
import itertools

from scipy.io import wavfile
import IPython.display as ipd
import matplotlib.pyplot as plt
import scipy as sp
import tensorflow as tf

## Noise generation functions

In [14]:
def ms(x):
    # 신호 x 제곱의 평균값
    # param x : 상수 (dynamic quantity)
    # returns : x의 평균 제곱
    return (np.abs(x)**2.0).mean()


def normalize(y, x=None):
    # y를 (standard normal인) 백색 소음 신호로 정규화
    # 신호 x를 선택적으로 정규화
    # \\mu=0과 \\signal=1인 가우시안의 평균은 1.
    
    # y * np.sqrt((np.abs(x)**2.0).mean() / (np.abs(y)**2.0).mean()) 반환
    if x is not None:
        x = ms(x)
    else :
        x = 1.0
    return y* np.sqrt(x/ms(y))  # np.sqrt(1.0 / (np.abs(y)**2.0).mean()) 반환


def white_noise(N, state=None):
    state = np.random.RandomState() if state is None else state
    return state.randn(N)


def pink_noise(N, state=None):
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j*state.randn(N//2+1+uneven)
    S = np.sqrt(np.arange(len(X))+1.)  # 0으로 나뉘는 것을 방지하기 위해 +1
    y = (irfft(X/S)).real
    if uneven :
        y = y[:-1]
    return normalize(y)


def blue_noise(N, state=None):
    # Blue noise
    # param N : 샘플 수
    # param state : PRNG의 state
    # type state : class:'np.random.RandomState'
    # 전력은 옥타브당 6dB씩 증가
    # 전력 밀도는 옥타브당 3dB씩 증가
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j*state.randn(N//2+1+uneven)
    S = np.sqrt(np.arange(len(X)))  # filter
    y = (irfft(X*S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)


def brown_noise(N, state=None):
    # Violet noise
    # param N : 샘플 수 
    # param sate : PRNG의 state
    # type state : class:'np.random.RandomState'
    # 전력은 옥타브당 -3dB씩 감소
    # 전력 밀도는 옥타브당 6dB씩 감소
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = (np.arange(len(X))+1)  # Filter
    y = (irfft(X/S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)


def violet_noise(N, state=None):
    # Violet noise. 옥타브당 전력이 6dB씩 증가
    # param N : 샘플 수 
    # param sate : PRNG의 state
    # type state : class:'np.random.RandomState'
    # 전력은 옥타브당 +9dB씩 증가
    # 전력 밀도는 옥타브당 +6dB씩 증가
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = (np.arange(len(X)))# Filter
    y = (irfft(X*S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

## Tensorflow utilities

tensorflow의 공통 작업을 모듈화하는 유틸리티

In [4]:
# Tf Utils
def get_tensorflow_configuration(device='0', memory_fraction=1):
    # 사용할 GPU 및 프로세스에서 사용할 수 있는 메모리 양을 선택하는 함수
    # param device : 사용될 장치 (str)
    # param memory_fraction : 할당해야하는 메모리 비율 (float)
    # return : 세션에 전달할 config (tf 객체)
    device = str(device)
    config = tf.ConfigProto()
    config.allow_soft_placement = True
    config.gpu_options.per_process_gpu_memory_fraction = memory_fraction
    config.gpu_options.visible_device_list = device
    return (config)


def start_tensorflow_session(device='0', memory_fraction=1):
    # 사용할 GPU 장치와 사전 할당될 메모리 부분을 관리하는 tensorflow 세션을 시작
    # device : 장치 번호가 있는 문자열(str)
    # memory_fraction : 지정된 메모리에서 사전 할당될 메모리 일부 (float[0.1])
    # return : 구성(configure)된 tf.Session 
    return (tf.Session(config=get_tensorflow_configuration(device=device, memory_fraction=memory_fraction)))


def get_summary_writer(session, logs_path, project_id, version_id):
    # For tensorboarded reportin
    # session : 열린 tensorflow 세션 (tf.Session)
    # logs_path : tensorboard가 logs를 찾는 경로 (str)
    # project_id : 목적을 보고하기 위한 프로젝트 이름
    # version_id : 목적을 보고하기 위한 버전 이름
    # return summary_writer : tesorboard writer
    path = os.path.join(logs_path, '{}_{}'.format(project_id, version_id))
    if os.path.exists(path):
        shutil.rmtree(path)
    summary_writer = tf.suumary.FilterWriter(path, graph_def=session.graph_def)
    return summary_writer

## paths management module

In [6]:
# common paths
def _norm_path(path):
    # 경로 검색 기능의 출력을 정규화하는 데 사용하기 위한 decorator 함수. 슬랙시/백슬래시 윈도우 케이스를 고정하는 데 유용
    def normalize_path(*args, **kwargs):
        return os.path.normpath(path(*args, **kwargs))
    return normalize_path

def _assure_path_exists(path):
    # 경로 검색 기능의 출력 존재 여부를 확인하기 위한 decorator 함수. 슬래시/백슬래시 윈도우 케이스를 고정하는 데 유용
    def assure_exists(*args, **kwargs):
        p = path(*args, **kwargs)
        assert os.path.exists(p), "the following path does not exist: '{}'".format(p)
        return p
    return assure_exists

def _is_output_path(path):
    # 출력 경로 검색 함수의 출력에 적용되는 함수를 그룹화하는 데 사용하는 decorator 함수
    @_norm_path
    @_assure_path_exists
    def check_existence_or_create_it(*args, **kwargs):
        if not os.path.exists(path(*args, **kwargs)):
            "Path does not exist... creating it: {}".format(path(*args, **kwargs))
            os.makedirs(path(*args, **kwargs))
        return path(*args, **kwargs)
    return check_existence_or_create_it

def _is_input_path(path):
    # 입력 경로 검색 함수의 출력에 적용되는 함수를 그룹화하는 데 사용되는 decorator 함수
    @_norm_path
    @_assure_path_exists
    def check_existence(*args, **kwargs):
        return path(*args, **kwargs)
    return check_existence


@_is_input_path
def get_train_path():
    path = "../input/train"
    return path

@_is_input_path
def get_test_path():
    path = "../input/test"
    return path

@_is_input_path
def get_train_audio_path():
    path = os.path.join(get_train_path(), "audio")
    return path

@_is_input_path
def get_scoring_audio_path():
    path = os.path.join(get_test_path(), "audio")
    return path

@_is_output_path
def get_submissions_path():
    path = "../working/output"
    return path

@_is_output_path
def get_silence_path():
    path = "../working/silence"
    return path

## Utilities

공통적인 일반 목적의 유틸리티

In [7]:
# Utilities
flatten = lambda l: [item for sublist in l for item in sublist]

def batching(iterable, n=1):
    l = len(iterable)
    for ndx in range(0,1,n):
        yield iterable[ndx:min(ndx+n, 1)]

## Data tools

데이터 처리 툴

In [12]:
def read_wav(filepath, pad=True):
    # wav file의 filepath를 지정하면 이 함수가 파일을 읽어 정규화하고 16k 샘플이 있는지 확인하기 위해 패딩한다.
    # filpath : wav file 의 기존 파일 경로 (str)
    # pad : 패딩이 필요한지 여부 (bool)
    # returns : 샘플과 target 변수 ((np.array,str)의 튜플)
    sample_rate, x = wavfile.read(filepath)
    target = os.path.split(os.path.split(filepath)[0])[1]
    assert sample_rate == 16000
    if pad:
        return np.pad(x, (0, 16000-len(x)), mode='constant')/32768, target
    else:
        return x/32769, target
    
def get_batcher(list_of_paths, batch_size, label_encoder=None, scoring=False):
    # batches 리스트가 주어진 batch 생성기를 생성
    # list_of_paths : 포맷 요소가 있는 튜플 리스트 (filepath, target) (list)
    # batch_size : 배치 사이즈 (int)
    # label_encoder : 적합된 LabelEncoder (sklearn.LabelEncoder|optional)
    # scoring : target이 고려되어야하는지 여부 (bool)
    # returns : 배치 생성기
    for filepaths in batching(list_of_paths, batch_size):
        wavs, targets = zip(*list(map(read_wav, filepaths)))
        if scoring:
            yield np.expand_dims(np.row_stack(wavs), 2), filepaths
        else:
            if label_encoder is None:
                yield np.expand_dims(np.row_stack(wavs), 2), np.row_stack(targets)
            else:
                yield np.expand_dims(np.row_stack(wavs), 2), np.expand_dims(label_encoder.transform(np.squeeze(targets)),1)

## Architecture building blocks

Inception-1D(wavception)은 이 문제를 위해 설계한 모델이다. 일반 컨볼루션 신경망의 성능을 크게 향상시킨다.

In [134]:
class BatchNorm(object):
    def __init__(self, epsilon=1e-5, momentum=0.999, name='batch_normalization'):
        with tf.compat.v1.variable_scope(name):
            self.epsilon = epsilon
            self.momentum = momentum
            self.name = name
            
    def __call__(self, x, train=True):
        return tf.compat.v1.layers.batch_normalization(x, momentum=self.momentum,
                                           epsilon=self.epsilon, scale=True, training=train, name=self.name)
    
def inception_1d(x, is_train, depth, norm_function, activ_function, name):
    # Inception 1D 모듈 구현
    # x : 현재 모듈에 대한 입력 (channels-last가 있는 4D tensor)
    # is_train : BatchNormalization 동작을 제어하기 위한 boolean placeholder가 되도록 고안 (0D tensor)
    # depth : 네트워크의 깊이를 선형적으로 제어 (int)
    # norm_function : 정규화 클래스 (위의 BatchNorm 클래스와 동일한 포맷)
    # activ_function : tensorclow 활성화함수 (예: tf.nn.relu)
    # name : 변수 범위의 이름 (str)
    with tf.compat.v1.variable_scope(name):
        x_norm = norm_function(name="norm_input")(x, train=is_train)

        # Branch 1: 64 x conv 1x1 
        branch_conv_1_1 = tf.compat.v1.layers.conv1d(inputs=x_norm, filters=16*depth, kernel_size=1,
                                           kernel_initializer=tf.initializers.GlorotUniform(),
                                           padding="same", name="conv_1_1")
        branch_conv_1_1 = norm_function(name="norm_conv_1_1")(branch_conv_1_1, train=is_train)
        branch_conv_1_1 = activ_function(branch_conv_1_1, "activation_1_1")

        # Branch 2: 128 x conv 3x3 
        branch_conv_3_3 = tf.compat.v1.layers.conv1d(inputs=x_norm, filters=16, kernel_size=1, 
                                           kernel_initializer=tf.initializers.GlorotUniform(),
                                           padding="same", name="conv_3_3_1")
        branch_conv_3_3 = norm_function(name="norm_conv_3_3_1")(branch_conv_3_3, train=is_train)
        branch_conv_3_3 = activ_function(branch_conv_3_3, "activation_3_3_1")

        branch_conv_3_3 = tf.compat.v1.layers.conv1d(inputs=branch_conv_3_3, filters=32*depth, kernel_size=3, 
                                           kernel_initializer=tf.initializers.GlorotUniform(),
                                           padding="same", name="conv_3_3_2")
        branch_conv_3_3 = norm_function(name="norm_conv_3_3_2")(branch_conv_3_3, train=is_train)
        branch_conv_3_3 = activ_function(branch_conv_3_3, "activation_3_3_2")

        # Branch 3: 128 x conv 5x5 
        branch_conv_5_5 = tf.compat.v1.layers.conv1d(inputs=x_norm, filters=16, kernel_size=1, 
                                           kernel_initializer=tf.initializers.GlorotUniform(),
                                           padding="same", name="conv_5_5_1")
        branch_conv_5_5 = norm_function(name="norm_conv_5_5_1")(branch_conv_5_5, train=is_train)
        branch_conv_5_5 = activ_function(branch_conv_5_5, "activation_5_5_1")

        branch_conv_5_5 = tf.compat.v1.layers.conv1d(inputs=branch_conv_5_5, filters=32*depth, kernel_size=5, 
                                           kernel_initializer=tf.initializers.GlorotUniform(),
                                           padding="same", name="conv_5_5_2")
        branch_conv_5_5 = norm_function(name="norm_conv_5_5_2")(branch_conv_5_5, train=is_train)
        branch_conv_5_5 = activ_function(branch_conv_5_5, "activation_5_5_2")

        # Branch 4: 128 x conv 7x7
        branch_conv_7_7 = tf.compat.v1.layers.conv1d(inputs=x_norm, filters=16, kernel_size=1, 
                                           kernel_initializer=tf.initializers.GlorotUniform(),
                                           padding="same", name="conv_7_7_1")
        branch_conv_7_7 = norm_function(name="norm_conv_7_7_1")(branch_conv_7_7, train=is_train)
        branch_conv_7_7 = activ_function(branch_conv_7_7, "activation_7_7_1")

        branch_conv_7_7 = tf.compat.v1.layers.conv1d(inputs=branch_conv_7_7, filters=32*depth, kernel_size=5, 
                                           kernel_initializer=tf.initializers.GlorotUniform(),
                                           padding="same", name="conv_7_7_2")
        branch_conv_7_7 = norm_function(name="norm_conv_7_7_2")(branch_conv_7_7, train=is_train)
        branch_conv_7_7 = activ_function(branch_conv_7_7, "activation_7_7_2")

        # Branch 5: 16 x (max_pool 3x3 + conv 1x1)
        branch_maxpool_3_3 = tf.compat.v1.layers.max_pooling1d(inputs=x_norm, pool_size=3, strides=1, padding="same", name="maxpool_3")
        branch_maxpool_3_3 = norm_function(name="norm_maxpool_3_3")(branch_maxpool_3_3, train=is_train)
        branch_maxpool_3_3 = tf.compat.v1.layers.conv1d(inputs=branch_maxpool_3_3, filters=16, kernel_size=1, 
                                              kernel_initializer=tf.initializers.GlorotUniform(),
                                              padding="same", name="conv_maxpool_3")

        # Branch 6: 16 x (max_pool 5x5 + conv 1x1)
        branch_maxpool_5_5 = tf.compat.v1.layers.max_pooling1d(inputs=x_norm, pool_size=5, strides=1, padding="same", name="maxpool_5")
        branch_maxpool_5_5 = norm_function(name="norm_maxpool_5_5")(branch_maxpool_5_5, train=is_train)
        branch_maxpool_5_5 = tf.compat.v1.layers.conv1d(inputs=branch_maxpool_5_5, filters=16, kernel_size=1, 
                                              kernel_initializer=tf.initializers.GlorotUniform(),
                                              padding="same", name="conv_maxpool_5")

        # Branch 7: 16 x (avg_pool 3x3 + conv 1x1)
        branch_avgpool_3_3 = tf.compat.v1.layers.average_pooling1d(inputs=x_norm, pool_size=3, strides=1, padding="same", name="avgpool_3")
        branch_avgpool_3_3 = norm_function(name="norm_avgpool_3_3")(branch_avgpool_3_3, train=is_train)
        branch_avgpool_3_3 = tf.compat.v1.layers.conv1d(inputs=branch_avgpool_3_3, filters=16, kernel_size=1,
                                              kernel_initializer=tf.initializers.GlorotUniform(),
                                              padding="same", name="conv_avgpool_3")

        # Branch 8: 16 x (avg_pool 5x5 + conv 1x1)
        branch_avgpool_5_5 = tf.compat.v1.layers.average_pooling1d(inputs=x_norm, pool_size=5, strides=1, padding="same", name="avgpool_5")
        branch_avgpool_5_5 = norm_function(name="norm_avgpool_5_5")(branch_avgpool_5_5, train=is_train)
        branch_avgpool_5_5 = tf.compat.v1.layers.conv1d(inputs=branch_avgpool_5_5, filters=16, kernel_size=1, 
                                              kernel_initializer=tf.initializers.GlorotUniform(),
                                              padding="same", name="conv_avgpool_5")

        # Concatenate
        output = tf.concat([branch_conv_1_1, branch_conv_3_3, branch_conv_5_5, branch_conv_7_7, branch_maxpool_3_3, 
                           branch_maxpool_5_5, branch_avgpool_3_3, branch_avgpool_5_5], axis=-1)
        return output

## Load and prepare Data

In [25]:
# 합성되고 제공된 노이즈 추가
filepaths_noise = glob.glob(os.path.join(get_train_audio_path(), "_background_noise_", "*.wav"))

noise = np.concatenate(list(map(lambda x: read_wav(x, False)[0], filepaths_noise)))
noise = np.concatenate([noise, noise[::-1]])
synthetic_noise = np.concatenate([white_noise(N=16000*30, state=np.random.RandomState(655321)), 
                                  blue_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  pink_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  brown_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  violet_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  np.zeros(16000*60)])
synthetic_noise /= np.max(np.abs(synthetic_noise))
synthetic_noise = np.concatenate([synthetic_noise, (synthetic_noise+synthetic_noise[::-1])/2])
all_noise = np.concatenate([noise, synthetic_noise])

  sample_rate, x = wavfile.read(filepath)


In [26]:
np.random.seed(655321)
random.seed(655321)
path = get_silence_path()

if not os.path.exists(path):
    os.makedirs(path)

for noise_clip_no in tqdm(range(8000)):
    if noise_clip_no <= 4000:
        idx = np.random.randint(0, len(noise)-16000)
        clip = noise[idx:(idx+16000)]
    else:
        idx = np.random.randint(0, len(synthetic_noise)-16000)
        clip = synthetic_noise[idx:(idx+16000)]
    wavfile.write(os.path.join(path, "{0:04d}.wav".format(noise_clip_no)), 16000, 
                 ((32767*clip/np.max(np.abs(clip))).astype(np.int16)))

  ((32767*clip/np.max(np.abs(clip))).astype(np.int16)))
100%|█████████████████████████████████████████████████████████████████████████████| 8000/8000 [00:08<00:00, 961.56it/s]


In [50]:
filepaths = glob.glob(os.path.join(get_train_audio_path(), "**/*.wav"), recursive=True)
filepaths += glob.glob(os.path.join(get_silence_path(), "**/*.wav"), recursive=True)
filepaths = list(filter(lambda fp: "_background_noise_" not in fp, filepaths))
validation_list = open(os.path.join(get_train_path(), "validation_list.txt")).readlines()
test_list = open(os.path.join(get_train_path(), "testing_list.txt")).readlines()
validation_list = list(map(lambda fn: os.path.join(get_train_audio_path(), fn.strip()), validation_list))
testing_list = list(map(lambda fn: os.path.join(get_train_audio_path(), fn.strip()), test_list))
#training_list = np.setdiff1d(filepaths, validation_list+testing_list).tolist()  ### setdif1d : 첫배열에서 두번째 배열 뺀 차집합 반환

In [28]:
random.seed(655321)
random.shuffle(filepaths)
random.shuffle(validation_list)
random.shuffle(testing_list)
random.shuffle(training_list)

In [29]:
# Quick Unit-Tests
# 파일 수와 일관성 확인
assert all(map(lambda fp: os.path.splitext(fp)[1]=='.wav', filepaths)) 
assert len(filepaths)==64727-6+8000
assert len(training_list) == len(filepaths)-6798-6835
assert len(validation_list) == 6798
assert len(testing_list) == 6835

# 파일 존재 확인
assert all(map(lambda fn: os.path.exists(os.path.join(fn)), validation_list))
assert all(map(lambda fn: os.path.exists(os.path.join(fn)), testing_list))
assert all(map(lambda fn: os.path.exists(os.path.join(fn)), training_list))
assert set(validation_list + testing_list + training_list) == set(filepaths)

# 세트에 중복데이터 없는지 확인
assert len(np.intersect1d(validation_list, testing_list)) == 0  ### intersect1d : 두 배열의 교집합 정렬해 반환
assert len(np.intersect1d(training_list, testing_list)) == 0
assert len(np.intersect1d(training_list, validation_list)) == 0

AssertionError: 

In [60]:
# Classes processing
cardinal_classes = list(set(map(lambda fp:os.path.split(os.path.split(fp)[0])[1], filepaths)))
le_classes = LabelEncoder().fit(cardinal_classes)
Counter(map(lambda fp:os.path.split(os.path.split(fp)[0])[1], filepaths))

Counter({'bed': 1713,
         'bird': 1731,
         'cat': 1733,
         'dog': 1746,
         'down': 2359,
         'eight': 2352,
         'five': 2357,
         'four': 2372,
         'go': 2372,
         'happy': 1742,
         'house': 1750,
         'left': 2353,
         'marvin': 1746,
         'nine': 2364,
         'no': 2375,
         'off': 2357,
         'on': 2367,
         'one': 2370,
         'right': 2367,
         'seven': 2377,
         'sheila': 1734,
         'six': 2369,
         'stop': 2380,
         'three': 2356,
         'tree': 1733,
         'two': 2373,
         'up': 2375,
         'wow': 1745,
         'yes': 2377,
         'zero': 2376,
         'silence': 8000})

In [61]:
# Quick Unit-Tests
# 데이터 사전준비 확인
_gen_test = get_batcher(filepaths, 1000)
batch_a_wav, batch_a_target = next(_gen_test)
batch_b_wav, batch_b_target = next(_gen_test)
_gen_test_le = get_batcher(filepaths, 1000, label_encoder=le_classes)
batch_le_wav, batch_le_target = next(_gen_test_le)

# batch matrix 형태 상관관계 확인
assert batch_a_wav.shape == (1000, 16000, 1)
assert batch_le_wav.shape == (1000, 16000, 1)
assert batch_a_wav.shape == batch_b_wav.shape == batch_le_wav.shape

# batch 재현성 확인
assert np.sum(np.abs(batch_a_wav-batch_b_wav)) != 0
assert len(batch_a_target) == len(batch_b_target) == len(batch_le_target)
assert any(batch_a_target != batch_b_target)

# 클래스 라벨 인코더 확인
assert all(batch_le_target == np.expand_dims(le_classes.transform(np.squeeze(batch_a_target)),1))

StopIteration: 

## Architecture design

이제 WavCeption을 디자인 해보자.

In [140]:
class NameSpacer:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
        
class Architecture:
    def __init__(self, class_cardinality, seq_len=16000, name='architecture'):
        self.seq_len = seq_len
        self.class_cardinality = class_cardinality
        self.optimizer = tf.optimizers.Adam(learning_rate=0.0001)
        self.name = name
        self.define_computation_graph()
        
        # Aliases
        self.ph = self.placeholders
        self.op = self.optimizers
        self.summ = self.summaries
        
    def define_computation_graph(self):
        # reset graph
        tf.compat.v1.reset_default_graph()
        self.placeholders = NameSpacer(**self.define_placeholders())
        self.core_model = NameSpacer(**self.define_core_model())
        self.losses = NameSpacer(**self.define_losses())
        self.optimizers = NameSpacer(**self.define_optimizers())
        self.summaries = NameSpacer(**self.define_summaries())
        
    def define_placeholders(self):
        import tensorflow.compat.v1 as tf
        with tf.compat.v1.variable_scope('Placeholders'):
            wav_in = tf.placeholder(dtype=tf.float32, shape=(None, self.seq_len, 1), name='wav_in')
            is_train = tf.placeholder(dtype=tf.bool, shape=None, name='is_train')
            target = tf.placeholder(dtype=tf.int32, shape=None, name='acc_dev')
            acc_dev = tf.placeholder(dtype=tf.float32, shape=None, name="acc_dev")
            loss_dev = tf.placeholder(dtype=tf.float32, shape=None, name="loss_dev")
            return ({'wav_in':wav_in, 'target':target, 'is_train':is_train, 'acc_dev':acc_dev, 'loss_dev':loss_dev})
        
    def define_core_model(self):
        with tf.compat.v1.variable_scope('Core_Model'):
            x = inception_1d(x=self.placeholders.wav_in, is_train=self.placeholders.is_train, norm_function=BatchNorm,
                            activ_function=tf.nn.relu, depth=1, name='Inception_1_1')
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm,
                            activ_function=tf.nn.relu, depth=1, name='Inception_1_2')
            x = tf.compat.v1.layers.max_pooling1d(x,2,2, name='maxpool_1')
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=1, name="Inception_2_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=1, name="Inception_2_3")
            x = tf.compat.v1.layers.max_pooling1d(x, 2, 2, name="maxpool_2")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_3_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_3_2")
            x = tf.compat.v1.layers.max_pooling1d(x, 2, 2, name="maxpool_3")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_4_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_4_2")
            x = tf.compat.v1.layers.max_pooling1d(x, 2, 2, name="maxpool_4")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_5_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_5_2")
            x = tf.compat.v1.layers.max_pooling1d(x, 2, 2, name="maxpool_5")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_6_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_6_2")
            x = tf.compat.v1.layers.max_pooling1d(x, 2, 2, name="maxpool_6")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_7_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_7_2")
            x = tf.compat.v1.layers.max_pooling1d(x, 2, 2, name="maxpool_7")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_8_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_8_2")
            x = tf.compat.v1.layers.max_pooling1d(x, 2, 2, name="maxpool_8")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_9_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_9_2")
            x = tf.compat.v1.layers.max_pooling1d(x, 2, 2, name="maxpool_9")
            
            x = tf.compat.v1.layers.flatten(x)
            x = tf.compat.v1.layers.dense(BatchNorm(name='bn_dense_1')(x, train=self.placeholders.is_train),
                               128, activation=tf.nn.relu, kernel_initializer=tf.initializers.GlorotUniform(), name='dense_1')
            output = tf.compat.v1.layers.dense(BatchNorm(name='bn_dense_2')(x,train=self.placeholders.is_train),
                                    self.class_cardinality, activation=None, kernel_initializer=tf.initializers.GlorotUniform(), name='output')
            
            return ({'output':output})
        
        
    def define_losses(self):
        with tf.compat.v1.variable_scope('Losses'):
            softmax_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.squeeze(self.placeholders.target),
                                                                        logits=self.core_model.output, name='softmax')
        return ({'softmax':softmax_ce})
            
    def define_optimizers(self):
        with tf.compat.v1.variable_scope('Optimization'):
            op = self.optimizer.minimize(self.losses.softmax, var_list=None)
            return ({'op':op})
            
    def define_summaries(self):
        with tf.compat.v1.variable_scope('Summaries'):
            ind_max = tf.squeeze(tf.cast(tf.argmax(self.core_model.output, axis=1), tf.int32))
            target = tf.squeeze(self.placeholders.target)
            acc= tf.reduce_mean(tf.cast(tf.equal(ind_max, target), tf.float32))
        loss = tf.reduce_mean(self.losses.softmax)
        train_scalar_probes = {"accuracy": acc, 
                                "loss": loss}
        train_performance_scalar = [tf.summary.scalar(k, tf.reduce_mean(v), family=self.name) 
                                    for k, v in train_scalar_probes.items()]
        train_performance_scalar = tf.summary.merge(train_performance_scalar)

        dev_scalar_probes = {"acc_dev": self.placeholders.acc_dev, 
                                "loss_dev": self.placeholders.loss_dev}
        dev_performance_scalar = [tf.summary.scalar(k, v, family=self.name) for k, v in dev_scalar_probes.items()]
        dev_performance_scalar = tf.summary.merge(dev_performance_scalar)
            
        return({"accuracy": acc, "loss": loss, "s_tr": train_performance_scalar, "s_de": dev_performance_scalar})

## Run model

모델 실행이 영원히 걸리지 않으려면 GPU를 사용해 실행해야한다. 또한 예측하기 위해 네트워크를 중지할 시기를 결정해야한다.

In [142]:
net = Architecture(class_cardinality=len(cardinal_classes), name='Wavception')

ValueError: `tape` is required when a `Tensor` loss is passed.