# WavCeption V1: a 1-D Inception approach
https://www.kaggle.com/ivallesp/wavception-v1-a-1-d-inception-approach-lb-0-76

## Load modules and libraries


In [2]:
%matplotlib inline
import numpy as np 
import pandas as pd 
import os
import shutil
import glob
import random
from tqdm import tqdm
from collections import Counter
from sklearn.preprocessing import LabelEncoder
import IPython
from numpy.fft import rfft, irfft
import numpy as np
import random
import itertools

from scipy.io import wavfile
import IPython.display as ipd
import matplotlib.pyplot as plt
import scipy as sp
import tensorflow as tf

## Noise generation functions

In [3]:
def ms(x):
    """시그널 x의 제곱의 평균 값.
    x: Dynamic quantity.
    returns: x의 제곱 평균
    """
    return (np.abs(x)**2.0).mean()

def normalize(y, x=None):
    """y를 (표준 정규) 백색 노이즈 신호로 정규화.
    선택적으로 시그널 x의 파워로 정규화
    """
    if x is not None:
        x = ms(x)
    else:
        x = 1.0
    return y * np.sqrt( x / ms(y) )

def white_noise(N, state=None):
    state = np.random.RandomState() if state is None else state
    return state.randn(N)

def pink_noise(N, state=None):

    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = np.sqrt(np.arange(len(X))+1.) # +1 to avoid divide by zero
    y = (irfft(X/S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

def blue_noise(N, state=None):
    """
    청색 노이즈
    N: 샘플 수
    state: PRNG 상태
    type state: :class:`np.random.RandomState`
    
    파워는 옥타브당 6dB 증가
    파워 밀도는 옥타브당 3dB 증가

    """
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = np.sqrt(np.arange(len(X)))# Filter
    y = (irfft(X*S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

def brown_noise(N, state=None):
    """
    갈색 노이즈
    N: 샘플 수
    state: PRNG 상태
    type state: :class:`np.random.RandomState`
    
    파워는 옥타브당 3dB 감소
    파워 밀도는 옥타브당 6dB 증가

    """
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = (np.arange(len(X))+1)# Filter
    y = (irfft(X/S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

def violet_noise(N, state=None):
    """
    보라 노이즈
    N: 샘플 수
    state: PRNG 상태
    type state: :class:`np.random.RandomState`
    
    파워는 옥타브당 3dB 감소
    파워 밀도는 옥타브당 6dB 감소

    """
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = (np.arange(len(X)))# Filter
    y = (irfft(X*S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

## Tensorflow utilies
텐서플로우의 공통 작업을 모듈화합니다.

In [6]:
def get_tensorflow_configuration(device='0', memory_fraction=1):
    """
    사용할 GPU 및 프로세스에서 사용할 수 있는 메모리 양을 선택하는 기능입니다.
    device: 사용할 디바이스(str)
    memory_complet: 할당해야하는 메모리 비율(comparent)
    return: 세션에 전달할 config(tf 개체)
    """
    device = str(device)
    config = tf.ConfigPorto()
    config.allow_soft_placement = True
    config.gpu_options.per_process_gpu_memory_fractioon = memory_fraction
    config.gpu_options.visible_device_list = device
    return(config)

def start_tensorflow_session(device='0', memory_fraction=1):
    """
    사용할 GPU 장치를 관리하는 텐서 플로우 세션을 시작합니다. 즉, 사전 할당될 메모리의 비율입니다.
    device: 사용할 디바이스(str): 디바이스 번호(str)가 있는 문자열입니다.
    memory_fraction: 지정된 메모리(float[0,1)에서 사전 할당될 메모리의 일부입니다.
    return: 세션에 전달할 config(tf 개체)
    """
    return(tf.Session(config=get_tensorflow_configuration(device=device, memory_fraction=memory_fraction)))

def get_summary_writer(session, logs_path, project_id, version_id):
    """
    텐서보드 리포팅
    session: 열린 텐서 플로우 세션
    logs_path: 텐서보드가 로그를 찾는 경로(str)
    project_id: 보고용 프로젝트 이름(str)
    version_id: 보고용 버전 이름(str)
    return summary_writer: 텐서보드 writer
    """
    path = os.path.join(logs_path, "{}_{}".format(project_id, version_id))
    if os.path.exists(path):
        shutil.rmtree(path)
        summary_writer = tf.summary.FileWriter(path, graph_def=session.graph_def)
        return(summary_writer)

## Paths management module
경로를 다루는 모듈입니다.

In [8]:
# Common paths
def _norm_path(path):
    """
    경로 검색 기능의 출력을 정규화하는데 사용합니다. 
    슬래시나 역슬래시 케이스를 고치는데 유용합니다.
    """
    def normalize_path(*args, **kwargs):
        return os.path.normpath(path(*args, **kwargs))
    return normalize_path

def _assure_path_exists(path):
    """
    경로 검색 기능의 출력 유무를 확인하기 위한 함수입니다.
    """
    def assure_exists(*args, **kwargs):
        p = path(*args, **kwargs)
        assert os.path.exists(p), "다음 경로가 존재하지 않습니다: '{}'".format(p)
        return p
    return assure_exists

def _is_output_path(path):
    """
    경로 검색 함수의 출력에 적용되는 함수를 그룹화하기 위한 함수입니다.
    """
    @_norm_path
    @_assure_path_exists
    def check_existence_or_create_it(*args, **kwargs):
        if not os.path.exists(path(*args, **kwargs)):
            "경로가 존재하지 않습니다.. 생성: {}".format(path(*args, **kwargs))
            os.makedirs(path(*args, **kwargs))
        return path(*args, **kwargs)
    return check_existence_or_create_it

def _is_input_path(path):
    """
    입력 경로 검색 함수의 출력에 적용되는 함수를 그룹화하기 위한 데코레이터 함수입니다.
    """
    @_norm_path
    @_assure_path_exists
    def check_existence(*args, **kwargs):
        return path(*args, **kwargs)
    return check_existence

@_is_input_path
def get_train_path():
    path = './input/train'
    return path

@_is_input_path
def get_test_path():
    path = './input/test'
    return path

@_is_input_path
def get_train_audio_path():
    path = os.path.join(get_train_path(), 'audio')
    return path

@_is_input_path
def get_scoring_audio_path():
    path = os.path.join(get_test_path(), 'audio')
    return path

@_is_output_path
def get_submissions_path():
    path = './working/output'
    return path

@_is_output_path
def get_silence_path():
    path = './working/silence'
    return path

## Utilities
범용 유틸리티 입니다.

In [9]:
flatten = lambda l: [item for sublist in l for item in sublist]

def batching(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, 1, n):
        yield iterable[ndx:min(ndx + n, l)]

## Data Tools
데이터 핸들링 도구 입니다.

In [11]:
def read_wav(filepath, pad=True):
    """
    wav 파일의 경로를 지정하면 이 함수가 파일을 읽고 정규화하여 16k 샘플이 있는지     확인합니다.
    filepath: wav 파일의 기존 파일 경로(str)입니다.
    pad: 패딩 여부(bool)
    returns: 샘플과 타겟 변수(tuple (np.array, str)를 반환합니다.
    """
    sample_rate, x = wavfile.read(filepath)
    target = os.path.split(os.path.split(filepath)[0])[1]
    assert sample_rate==16000
    if pad:
        return np.pad(x, (0, 16000-len(x)), mode='constant')/32768, target
    else:
        return x/32768, target
    
def get_batcher(list_of_paths, batch_size, label_encoder=None, scoring=False):
    """
    배치 목록이 지정된 배치 생성기를 빌드합니다.
    list_of_class: (파일 경로, 대상)이 있는 튜플 리스트입니다.(list)
    batch_size: 배치의 크기(int)입니다.
    label_encoder: 라벨 인코더입니다.
    scoring: 타겟 고려 여부(bool)
    returns: 배치 생성기
    """
    for filepaths in batching(list_of_paths, batch_size):
        wavs, targets = zip(*list(map(read_wav, filepaths)))
        if scoring:
            yield np.expand_dims(np.row_stack(wavs), 2), filepaths
        else:
            if label_encoder is None:
                yield np.expand_dims(np.row_stack(wavs), 2), np.row_stack(targets)
            else:
                yield np.expand_dims(np.row_stack(wavs), 2), np.expand_dims(label_encoder.transform(np.squeeze(targets)), 1)

## Architecture building blocks
Inception-1D(일명 wavception)는 일반 컨볼루션 신경망의 성능을 크게 향상시킵니다.

In [12]:
class BatchNorm(object):
    def __init__(self, epsilon=1e-5, momentum=0.999, name='batch_norm'):
        with tf.variable_scope(name):
            self.epsilon = epsilon
            self.momentum = momentum
            self.name = name
            
    def __call__(self, x, train=True):
        return tf.contrib.layers.batch_norm(x,
                                           decay=self.momentum,
                                           updates_collections=None,
                                           epsilon=self.epsilon,
                                           scale=True,
                                           is_training=train,
                                           scope=self.name)
    
def inception_1d(x, is_train, depth, norm_function, activ_function, name):
    """
    Inception 1d 모듈 구현입니다.
    x: 현재 모듈에 대한 입력(채널-마지막을 포함한 4D 텐서)
    is_train: 배치정규화 동작을 제어
    depth: 네트워크의 깊이를 선형적으로 제어(int)
    norm_function: 정규화 클래스(위의 BatchNorm 클래스와 동일한 형식)
    activ_function: 활성화 함수
    name: 변수 범위(str)의 이름
    """
    with tf.variable_scope(name):
        x_norm = norm_function(name='norm_input')(x, train=is_train)
        
        # Branch 1: 64 x conv 1x1
        branch_conv_1_1 = tf.layers.conv1d(inputs=x_norm, filters=16*depth, kernel_size=1, kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='same', name='conv_1_1')
        branch_conv_1_1 = norm_function(name='norm_conv_1_1')(branch_conv_1_1, train=is_train)
        branch_conv_1_1 = activ_function(branch_conv_1_1, 'activation_1_1')
        
        # Branch 2: 128 x conv 3x3
        branch_conv_3_3 = tf.layers.conv1d(inputs=x_norm, filters=16*depth, kernel_size=1, kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='same', name='conv_3_3_1')
        branch_conv_3_3 = norm_function(name='norm_conv_3_3_1')(branch_conv_3_3, train=is_train)
        branch_conv_3_3 = activ_function(branch_conv_3_3, 'activation_3_3_1')
        
        branch_conv_3_3 = tf.layers.conv1d(inputs=branch_conv_3_3, filters=32*depth, kernel_size=3, kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='same', name='conv_3_3_2')
        branch_conv_3_3 = norm_function(name='norm_conv_3_3_2')(branch_conv_3_3, train=is_train)
        branch_conv_3_3 = activ_function(branch_conv_3_3, 'activation_3_3_2')
        
        # Branch 3: 128 x conv 5x5
        branch_conv_5_5 = tf.layers.conv1d(inputs=x_norm, filters=16, kernel_size=1, kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='same', name='conv_5_5_1')
        branch_conv_5_5 = norm_function(name='norm_conv_5_5_1')(branch_conv_5_5, train=is_train)
        branch_conv_5_5 = activ_function(branch_conv_5_5, 'activation_5_5_1')
        
        branch_conv_5_5 = tf.layers.conv1d(inputs=branch_conv_5_5, filters=32*depth, kernel_size=5, kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='same', name='conv_5_5_2')
        branch_conv_5_5 = norm_function(name='norm_conv_5_5_2')(branch_conv_5_5, train=is_train)
        branch_conv_5_5 = activ_function(branch_conv_5_5, 'activation_5_5_2')
        
        # Branch 4: 128 x conv 7x7
        branch_conv_7_7 = tf.layers.conv1d(inputs=x_norm, filters=16, kernel_size=1, 
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           padding="same", name="conv_7_7_1")
        branch_conv_7_7 = norm_function(name="norm_conv_7_7_1")(branch_conv_7_7, train=is_train)
        branch_conv_7_7 = activ_function(branch_conv_7_7, "activation_7_7_1")

        branch_conv_7_7 = tf.layers.conv1d(inputs=branch_conv_7_7, filters=32*depth, kernel_size=5, 
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           padding="same", name="conv_7_7_2")
        branch_conv_7_7 = norm_function(name="norm_conv_7_7_2")(branch_conv_7_7, train=is_train)
        branch_conv_7_7 = activ_function(branch_conv_7_7, "activation_7_7_2")        
        # Branch 5: 16 x (max_pool 3x3 + conv 1x1)
        branch_maxpool_3_3 = tf.layers.max_pooling1d(inputs=x_norm, pool_size=3, strides=1, padding="same", name="maxpool_3")
        branch_maxpool_3_3 = norm_function(name="norm_maxpool_3_3")(branch_maxpool_3_3, train=is_train)
        branch_maxpool_3_3 = tf.layers.conv1d(inputs=branch_maxpool_3_3, filters=16, kernel_size=1, 
                                              kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                              padding="same", name="conv_maxpool_3")
        
        # Branch 6: 16 x(max_pool 5x5 + conv 1x1)
        branch_maxpool_5_5 = tf.layers.max_pooling1d(inputs=x_norm, pool_size=5, strides=1, padding='same', name='maxpool_5')
        branch_maxpool_5_5 = norm_function(name='norm_maxpool_5_5')(branch_maxpool_5_5, train=is_train)
        branch_maxpool_5_5 = tf.layers.conv1d(inputs=branch_maxpool_5_5, filters=16, kernel_size=1, kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='same', name='conv_maxpool_5')
        
        # Branch 7: 16 x (avg_pool 3x3 + conv 1x1)
        branch_avgpool_3_3 = tf.layers.average_pooling1d(inputs=x_norm, pool_size=3, strides=1, padding='same', name='avgpool_3')
        branch_avgpool_3_3 = norm_function(name='norm_avgpool_3_3')(branch_avgpool_3_3, train=is_train)
        branch_avgpool_3_3 = tf.layers.conv1d(inputs=branch_avgpool_3_3, filters=16, kernel_size=1,
                                              kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                              padding="same", name="conv_avgpool_3")

        # Branch 8: 16 x (avg_pool 5x5 + conv 1x1)
        branch_avgpool_5_5 = tf.layers.average_pooling1d(inputs=x_norm, pool_size=5, strides=1, padding="same", name="avgpool_5")
        branch_avgpool_5_5 = norm_function(name="norm_avgpool_5_5")(branch_avgpool_5_5, train=is_train)
        branch_avgpool_5_5 = tf.layers.conv1d(inputs=branch_avgpool_5_5, filters=16, kernel_size=1, 
                                              kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                              padding="same", name="conv_avgpool_5")
        
        # Concatnate
        output = tf.concat([branch_conv_1_1, branch_conv_3_3, branch_conv_5_5, branch_conv_7_7, branch_maxpool_3_3, branch_maxpool_5_5, branch_avgpool_3_3, branch_avgpool_5_5], axis=-1)
        return output

## Load and prepare data
합성 및 제공된 노이즈 추가

In [15]:
filepath_noise = glob.glob(os.path.join(get_train_audio_path(), '_background_noise_', '*.wav'))

noise = np.concatenate(list(map(lambda x: read_wav(x, False)[0], filepath_noise)))
noise = np.concatenate([noise, noise[::-1]])
synthetic_noise = np.concatenate([white_noise(N=16000*30, state=np.random.RandomState(655321)),
                                 blue_noise(N=16000*30, state=np.random.RandomState(655321)),
                                 pink_noise(N=16000*30, state=np.random.RandomState(655321)),
                                 brown_noise(N=16000*30, state=np.random.RandomState(655321)),
                                 violet_noise(N=16000*30, state=np.random.RandomState(655321)),
                                 np.zeros(16000*60)])
synthetic_noise /= np.max(np.abs(synthetic_noise))
synthetic_noise = np.concatenate([synthetic_noise, (synthetic_noise+synthetic_noise[::-1])/2])
all_noise = np.concatenate([noise, synthetic_noise])

  sample_rate, x = wavfile.read(filepath)


In [17]:
np.random.seed(655321)
random.seed(655321)

path = get_silence_path()

if not os.path.exists(path):
    os.makedirs(path)
    
for noise_clip_no in tqdm(range(8000)):
    if noise_clip_no <= 4000:
        idx = np.random.randint(0, len(noise)-16000)
        clip = noise[idx:(idx+16000)]
    else:
        idx = np.random.randint(0, len(noise)-16000)
        clip = synthetic_noise[idx:(idx+16000)]
    wavfile.write(os.path.join(path, '{0:04d}.wav'.format(noise_clip_no)), 16000, ((32767*clip/np.max(np.abs(clip))).astype(np.int16)))

  wavfile.write(os.path.join(path, '{0:04d}.wav'.format(noise_clip_no)), 16000, ((32767*clip/np.max(np.abs(clip))).astype(np.int16)))
 50%|██████████████████████████████████████▌                                      | 4004/8000 [00:07<00:07, 502.79it/s]


ValueError: zero-size array to reduction operation maximum which has no identity