# kaggle study 26일차(tensorflow recognition)

코드출처 :https://www.kaggle.com/ivallesp/wavception-v1-a-1-d-inception-approach-lb-0-76

## WavCeption V1: just a 1-D Inception approach
저는 제가 가지고 놀던 작은 장난감을 공유하고 싶었을 뿐이고, 놀라운 결과를 주었습니다. 저는 현재 시간이 없기 때문에, 사람들이 어떻게 그것을 가지고 노는지를 보기 위해 그것을 공유하고 싶습니다:-D. WaveCeption V1 네트워크는 일반 컨볼루션 신경망에 비해 인상적인 결과를 내는 것처럼 보이지만, 이번 대회에서는 전처리 및 알려지지 않은 트랙 관리에 대한 힘든 작업이 진행 중인 것으로 보입니다. 구글의 인셉션 네트워크에 기반을 두고 있습니다. 같은 생각이죠.

저는 몇 주 전에 이 모듈들을 캐스케이드(아래 참조)로 연결하여 쉽게 1D 인셉션 네트워크를 구축할 수 있도록 이 모듈을 구현하는 모듈을 작성했습니다.

불행히도 여러 가지 Kaggle 제약으로 인해 커널 시스템에서 실행되지 않으므로, 직접 다운로드하여 실행하는 것이 좋습니다.

너무 무리하지 않고 12시간 동안 모델을 실행함으로써 리더보드에서 0.76을 달성했습니다(현지 테스트에서는 0.84). 같은 계열의 다른 실험에서는 0.89점을 주기도 했습니다. 그래서 미지의 클립을 다루는 방법이 크게 개선되었습니다:-D.

### Load modules and libraries

In [32]:
%matplotlib inline
import numpy as np 
import pandas as pd 
import os
import shutil
import glob
import random
from tqdm import tqdm
from collections import Counter
from sklearn.preprocessing import LabelEncoder
import IPython
from numpy.fft import rfft, irfft
import numpy as np
import random
import itertools
from tensorflow.python.framework import ops
ops.reset_default_graph()

from scipy.io import wavfile
import IPython.display as ipd
import matplotlib.pyplot as plt
import scipy as sp
import tensorflow as tf

### Noise generation functions
이 섹션의 코드는 다음 링크에서 차용 및 개조되었습니다. : https://github.com/python-acoustics/python-acoustics/blob/master/acoustics/generator.py

In [2]:
def ms(x):
    """signal x의 제곱의 평균 값입니다.
    :param_ x: 동적인 값
    :return: "x"제곱의 평균 
    """
    return (np.abs(x)**2.0).mean()

def normalize(y, x=None):
    """는 (표준 정상) 흰색 노이즈 신호에 y의 전원을 공급합니다.
    선택적으로 신호 'x'의 전원으로 정규화합니다.
    수학에서 가우스 평균은 'mu=0,sigma=1 입니다.
    """
    #return y * np.sqrt( (np.abs(x)**2.0).mean() / (np.abs(y)**2.0).mean() )
    if x is not None:
        x = ms(x)
    else:
        x = 1.0
    return y * np.sqrt( x / ms(y) )
    #return y * np.sqrt( 1.0 / (np.abs(y)**2.0).mean() )

def white_noise(N, state=None):
    state = np.random.RandomState() if state is None else state
    return state.randn(N)

def pink_noise(N, state=None):

    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = np.sqrt(np.arange(len(X))+1.) # +1 to avoid divide by zero
    y = (irfft(X/S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

def blue_noise(N, state=None):
    """
   
    
    """
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = np.sqrt(np.arange(len(X)))# Filter
    y = (irfft(X*S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

def brown_noise(N, state=None):
    """
    Violet noise.
    
    :param N: 샘플의 양입니다.
    :param state: PRNG의 상태입니다.
    :type state: :class:'np.random.RandomState

    전력은 옥타브당 6dB씩 증가합니다.
    전력 밀도는 옥타브당 3dB로 증가합니다.
    """
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = (np.arange(len(X))+1)# Filter
    y = (irfft(X/S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

def violet_noise(N, state=None):
    """
    Violet noise. Power increases with 6 dB per octave. 
    
    :param N: 샘플의 양
    :param state:  PRNG의 상태
    :type state: :class:'np.random.RandomState
    
    전력은 옥타브당 9dB씩 증가합니다.
    전력 밀도는 옥타브당 6dB로 증가합니다.
    
    """
    state = np.random.RandomState() if state is None else state
    uneven = N%2
    X = state.randn(N//2+1+uneven) + 1j * state.randn(N//2+1+uneven)
    S = (np.arange(len(X)))# Filter
    y = (irfft(X*S)).real
    if uneven:
        y = y[:-1]
    return normalize(y)

### Tensorflow utilities
텐서 흐름 공통 작업을 모듈화하는 유틸리티입니다.

In [3]:
# Tf Utils
def get_tensorflow_configuration(device="0", memory_fraction=1):
    """
    사용할 GPU 및 프로세스에서 사용할 수 있는 메모리 양을 선택하는 기능입니다.
    :param device: 사용할 디바이스(str)입니다.
    :param memory_complet: 할당해야 하는 메모리 비율(comparent)입니다.
    :return: 세션에 전달할 구성(tf 개체)입니다.
    """
    device = str(device)
    config = tf.ConfigProto()
    config.allow_soft_placement = True
    config.gpu_options.per_process_gpu_memory_fraction = memory_fraction
    config.gpu_options.visible_device_list = device
    return(config)


def start_tensorflow_session(device="0", memory_fraction=1):
    """
    사용할 GPU 장치를 관리하는 텐서 플로우 세션을 시작합니다.
    즉, 사전 할당될 메모리의 비율입니다.
    :device: 디바이스 번호(str)가 있는 문자열입니다.
    :memory_backet: 지정된 메모리에서 사전 할당될 메모리의 일부입니다.
    장치(예: [0, 1])입니다.
    :return: tf.Session을 구성함 
    """
    return(tf.Session(config=get_tensorflow_configuration(device=device, memory_fraction=memory_fraction)))


def get_summary_writer(session, logs_path, project_id, version_id):
    """
    텐서보드 보고용
    :param session: 열린 텐서 흐름 세션(tf)입니다.(tf.Session)
    :param logs_path: 텐서보드가 로그를 찾는 경로(str)입니다.
    :param project_id: 보고용 프로젝트 이름입니다(str).
    :param version_id: 보고용 버전 이름입니다(str).
    :return summary_writer :summary_writer를 반환합니다.
    """
    path = os.path.join(logs_path,"{}_{}".format(project_id, version_id)) 
    if os.path.exists(path):
        shutil.rmtree(path)
    summary_writer = tf.summary.FileWriter(path, graph_def=session.graph_def)
    return(summary_writer)

### Paths management module
경로를 처리하는 모듈입니다.

In [4]:
# Common paths
def _norm_path(path):
    """
    경로 검색 기능의 출력 유무를 확인하기 위한 Decorator 함수입니다. 
    슬래시/백슬래시 창 케이스를 고정하는 데 유용합니다.
    """
    def normalize_path(*args, **kwargs):
        return os.path.normpath(path(*args, **kwargs))
    return normalize_path


def _assure_path_exists(path):
    """
    경로 검색 기능의 출력 유무를 확인하기 위한 Decorator 함수입니다. 
    슬래시/백슬래시 창 케이스를 고정하는 데 유용합니다.
    """
    def assure_exists(*args, **kwargs):
        p=path(*args, **kwargs)
        assert os.path.exists(p), "the following path does not exist: '{}'".format(p)
        return p
    return assure_exists


def _is_output_path(path):
    """
    Decorator 함수는 출력 경로 검색 함수의 출력에 적용되는 함수를 그룹화하기 위한 것입니다.
    """
    @_norm_path
    @_assure_path_exists
    def check_existence_or_create_it(*args, **kwargs):
        if not os.path.exists(path(*args, **kwargs)):
            "Path does not exist... creating it: {}".format(path(*args, **kwargs))
            os.makedirs(path(*args, **kwargs))
        return path(*args, **kwargs)
    return check_existence_or_create_it


def _is_input_path(path):
    """
    Decorator function intended for grouping the functions which are applied over the output of an input path retrieval
    function
    """
    @_norm_path
    @_assure_path_exists
    def check_existence(*args, **kwargs):
        return path(*args, **kwargs)
    return check_existence

@_is_input_path
def get_train_path():
    path = "C:/Users/이동훈/Desktop/github/kaggle/kagglestudy/Data/tensorflow/train"
    return path

@_is_input_path
def get_test_path():
    path = "C:/Users/이동훈/Desktop/github/kaggle/kagglestudy/Data/tensorflow/test"
    return path

@_is_input_path
def get_train_audio_path():
    path = os.path.join(get_train_path(), "audio")
    return path

@_is_input_path
def get_scoring_audio_path():
    path = os.path.join(get_test_path(), "audio")
    return path

@_is_output_path
def get_submissions_path():
    path = "C:/Users/이동훈/Desktop/github/kaggle/kagglestudy/Data/tensorflow/working/output"
    return path

@_is_output_path
def get_silence_path():
    path = "C:/Users/이동훈/Desktop/github/kaggle/kagglestudy/Data/tensorflow/working/silence"
    return path

### Utilities
범용 유틸리티입니다.

In [5]:
flatten = lambda l: [item for sublist in l for item in sublist]

def batching(iterable,n=1):
    l = len(iterable)
    for ndx in range(0,l,n):
        yield iterable[ndx:min(ndx+n,l)]

### Data Tools
데이터 처리 도구입니다.

In [6]:
def read_wav(filepath, pad=True):
    """
    웨이브 파일의 파일 경로를 지정하면 이 함수가 파일을 읽고 표준화하며 패드를 만듭니다.
    16K 샘플을 가지고 있다는 것을 확실히 하기 위해서요.
    :param filepath: wav 파일의 기존 파일 경로(str)
    :param pad: 패딩이 필요합니까? (bool)
    
    :return: 샘플과 대상 변수(tuple of (np.array, str)
    """
    sample_rate, x = wavfile.read(filepath)
    target = os.path.split(os.path.split(filepath)[0])[1]
    assert sample_rate==16000
    if pad:
        return np.pad(x, (0, 16000-len(x)), mode="constant")/32768, target
    else:
        return x/32768, target

def get_batcher(list_of_paths, batch_size, label_encoder=None, scoring=False):
    """
    배치 목록이 지정된 배치 생성기를 빌드합니다.
    :param list_of_class: 형식 요소(파일 경로, 대상)가 있는 튜플 목록입니다(list).
    :param batch_size: 배치의 크기(int)
    :param label_encoder: 적합된 LabelEncoder.LabelEncoder(sklearn.LabelEncoder|optional)
    :param scoring: 목표를 고려해야 합니까? (bool)
    
    :return: 배치 생성기( batch generator)를 반환
    """
    for filepaths in batching(list_of_paths, batch_size):
        wavs, targets = zip(*list(map(read_wav, filepaths)))
        if scoring:
            yield np.expand_dims(np.row_stack(wavs), 2), filepaths
        else:
            if label_encoder is None:
                yield np.expand_dims(np.row_stack(wavs), 2), np.row_stack(targets)
            else:
                yield np.expand_dims(np.row_stack(wavs), 2), np.expand_dims(label_encoder.transform(np.squeeze(targets)),1)

### Architecture building blocks
Inception-1D(일명 웨이브 감지)는 이 문제를 위해 몇 주 전에 디자인한 모듈입니다. 일반 컨볼루션 신경망의 성능을 크게 향상시킵니다.

In [7]:
class BatchNorm(object):
    def __init__(self, epsilon=1e-5, momentum=0.999, name="batch_norm"):
        with tf.variable_scope(name):
            self.epsilon = epsilon
            self.momentum = momentum
            self.name = name

    def __call__(self, x, train=True):
        return tf.contrib.layers.batch_norm(x,
                                            decay=self.momentum,
                                            updates_collections=None,
                                            epsilon=self.epsilon,
                                            scale=True,
                                            is_training=train,
                                            scope=self.name)
    
    

def inception_1d(x, is_train, depth, norm_function, activ_function, name):
    """
    Inception 1D 모듈 구현입니다.
    :paramx: 현재 모듈에 대한 입력(4D 텐서(채널-마지막) 포함)입니다.
    :param is_train: BatchNormalization 동작을 제어하기 위한 부울 자리 표시자(0D 텐서)를 의도합니다.
    :param depth: 네트워크의 깊이를 선형적으로 제어합니다(int).
    :param norm_function: 정규화 클래스(위의 BatchNorm 클래스와 동일한 형식)입니다.
    :param active_function: 텐서 흐름 활성화 함수(예: tf.nn.relu)
    :param name: 변수 범위(str)의 이름입니다.
    """
    with tf.variable_scope(name):
        x_norm = norm_function(name="norm_input")(x, train=is_train)

        # Branch 1: 64 x conv 1x1 
        branch_conv_1_1 = tf.layers.conv1d(inputs=x_norm, filters=16*depth, kernel_size=1,
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           padding="same", name="conv_1_1")
        branch_conv_1_1 = norm_function(name="norm_conv_1_1")(branch_conv_1_1, train=is_train)
        branch_conv_1_1 = activ_function(branch_conv_1_1, "activation_1_1")

        # Branch 2: 128 x conv 3x3 
        branch_conv_3_3 = tf.layers.conv1d(inputs=x_norm, filters=16, kernel_size=1, 
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           padding="same", name="conv_3_3_1")
        branch_conv_3_3 = norm_function(name="norm_conv_3_3_1")(branch_conv_3_3, train=is_train)
        branch_conv_3_3 = activ_function(branch_conv_3_3, "activation_3_3_1")

        branch_conv_3_3 = tf.layers.conv1d(inputs=branch_conv_3_3, filters=32*depth, kernel_size=3, 
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           padding="same", name="conv_3_3_2")
        branch_conv_3_3 = norm_function(name="norm_conv_3_3_2")(branch_conv_3_3, train=is_train)
        branch_conv_3_3 = activ_function(branch_conv_3_3, "activation_3_3_2")

        # Branch 3: 128 x conv 5x5 
        branch_conv_5_5 = tf.layers.conv1d(inputs=x_norm, filters=16, kernel_size=1, 
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           padding="same", name="conv_5_5_1")
        branch_conv_5_5 = norm_function(name="norm_conv_5_5_1")(branch_conv_5_5, train=is_train)
        branch_conv_5_5 = activ_function(branch_conv_5_5, "activation_5_5_1")

        branch_conv_5_5 = tf.layers.conv1d(inputs=branch_conv_5_5, filters=32*depth, kernel_size=5, 
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           padding="same", name="conv_5_5_2")
        branch_conv_5_5 = norm_function(name="norm_conv_5_5_2")(branch_conv_5_5, train=is_train)
        branch_conv_5_5 = activ_function(branch_conv_5_5, "activation_5_5_2")

        # Branch 4: 128 x conv 7x7
        branch_conv_7_7 = tf.layers.conv1d(inputs=x_norm, filters=16, kernel_size=1, 
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           padding="same", name="conv_7_7_1")
        branch_conv_7_7 = norm_function(name="norm_conv_7_7_1")(branch_conv_7_7, train=is_train)
        branch_conv_7_7 = activ_function(branch_conv_7_7, "activation_7_7_1")

        branch_conv_7_7 = tf.layers.conv1d(inputs=branch_conv_7_7, filters=32*depth, kernel_size=5, 
                                           kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                           padding="same", name="conv_7_7_2")
        branch_conv_7_7 = norm_function(name="norm_conv_7_7_2")(branch_conv_7_7, train=is_train)
        branch_conv_7_7 = activ_function(branch_conv_7_7, "activation_7_7_2")

        # Branch 5: 16 x (max_pool 3x3 + conv 1x1)
        branch_maxpool_3_3 = tf.layers.max_pooling1d(inputs=x_norm, pool_size=3, strides=1, padding="same", name="maxpool_3")
        branch_maxpool_3_3 = norm_function(name="norm_maxpool_3_3")(branch_maxpool_3_3, train=is_train)
        branch_maxpool_3_3 = tf.layers.conv1d(inputs=branch_maxpool_3_3, filters=16, kernel_size=1, 
                                              kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                              padding="same", name="conv_maxpool_3")

        # Branch 6: 16 x (max_pool 5x5 + conv 1x1)
        branch_maxpool_5_5 = tf.layers.max_pooling1d(inputs=x_norm, pool_size=5, strides=1, padding="same", name="maxpool_5")
        branch_maxpool_5_5 = norm_function(name="norm_maxpool_5_5")(branch_maxpool_5_5, train=is_train)
        branch_maxpool_5_5 = tf.layers.conv1d(inputs=branch_maxpool_5_5, filters=16, kernel_size=1, 
                                              kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                              padding="same", name="conv_maxpool_5")

        # Branch 7: 16 x (avg_pool 3x3 + conv 1x1)
        branch_avgpool_3_3 = tf.layers.average_pooling1d(inputs=x_norm, pool_size=3, strides=1, padding="same", name="avgpool_3")
        branch_avgpool_3_3 = norm_function(name="norm_avgpool_3_3")(branch_avgpool_3_3, train=is_train)
        branch_avgpool_3_3 = tf.layers.conv1d(inputs=branch_avgpool_3_3, filters=16, kernel_size=1,
                                              kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                              padding="same", name="conv_avgpool_3")

        # Branch 8: 16 x (avg_pool 5x5 + conv 1x1)
        branch_avgpool_5_5 = tf.layers.average_pooling1d(inputs=x_norm, pool_size=5, strides=1, padding="same", name="avgpool_5")
        branch_avgpool_5_5 = norm_function(name="norm_avgpool_5_5")(branch_avgpool_5_5, train=is_train)
        branch_avgpool_5_5 = tf.layers.conv1d(inputs=branch_avgpool_5_5, filters=16, kernel_size=1, 
                                              kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                              padding="same", name="conv_avgpool_5")

        # Concatenate
        output = tf.concat([branch_conv_1_1, branch_conv_3_3, branch_conv_5_5, branch_conv_7_7, branch_maxpool_3_3, 
                           branch_maxpool_5_5, branch_avgpool_3_3, branch_avgpool_5_5], axis=-1)
        return output

### Load and prepare Data

In [8]:
filepaths_noise = glob.glob(os.path.join(get_train_audio_path(), "_background_noise_", "*.wav"))

noise = np.concatenate(list(map(lambda x: read_wav(x, False)[0], filepaths_noise)))
noise = np.concatenate([noise, noise[::-1]])
synthetic_noise = np.concatenate([white_noise(N=16000*30, state=np.random.RandomState(655321)), 
                                  blue_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  pink_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  brown_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  violet_noise(N=16000*30, state=np.random.RandomState(655321)),
                                  np.zeros(16000*60)])
synthetic_noise /= np.max(np.abs(synthetic_noise))
synthetic_noise = np.concatenate([synthetic_noise, (synthetic_noise+synthetic_noise[::-1])/2])
all_noise = np.concatenate([noise, synthetic_noise])

  # Remove the CWD from sys.path while we load stuff.


In [9]:
np.random.seed(655321)
random.seed(655321)

path = get_silence_path()

if not os.path.exists(path):
    os.makedirs(path) # It fails in kaggle kernel due to the read-only filesystem

for noise_clip_no in tqdm(range(8000)):
    if noise_clip_no<=4000:
        idx = np.random.randint(0, len(noise)-16000)
        clip = noise[idx:(idx+16000)]
    else:
        idx = np.random.randint(0, len(synthetic_noise)-16000)
        clip = synthetic_noise[idx:(idx+16000)]
    wavfile.write(os.path.join(path, "{0:04d}.wav".format(noise_clip_no)), 16000, 
                               ((32767*clip/np.max(np.abs(clip))).astype(np.int16)))
    

100%|█████████████████████████████████████████████████████████████████████████████| 8000/8000 [00:34<00:00, 231.09it/s]


In [10]:
filepaths = glob.glob(os.path.join(get_train_audio_path(), "**/*.wav"), recursive=True)
filepaths += glob.glob(os.path.join(get_silence_path(), "**/*.wav"), recursive=True)
filepaths = list(filter(lambda fp: "_background_noise_" not in fp, filepaths))
validation_list = open(os.path.join(get_train_path(), "validation_list.txt")).readlines()
test_list = open(os.path.join(get_train_path(), "testing_list.txt")).readlines()
validation_list = list(map(lambda fn: os.path.join(get_train_audio_path(), fn.strip()), validation_list))
testing_list = list(map(lambda fn: os.path.join(get_train_audio_path(), fn.strip()), test_list))
training_list = np.setdiff1d(filepaths, validation_list+testing_list).tolist()

In [11]:
random.seed(655321)
random.shuffle(filepaths)
random.shuffle(validation_list)
random.shuffle(testing_list)
random.shuffle(training_list)

In [12]:
assert all(map(lambda fp: os.path.splitext(fp)[1]==".wav", filepaths))
assert len(filepaths)==64727 - 6 + 8000
assert len(training_list) == len(filepaths) - 6798 - 6835 
assert len(validation_list) == 6798
assert len(testing_list) == 6835

# Test file existence
assert all(map(lambda fn: os.path.exists(os.path.join(fn)), validation_list))
assert all(map(lambda fn: os.path.exists(os.path.join(fn)), testing_list))
assert all(map(lambda fn: os.path.exists(os.path.join(fn)), training_list))
assert set(validation_list + testing_list + training_list) == set(filepaths)

# Test non-overlap among sets
assert len(np.intersect1d(validation_list, testing_list))==0
assert len(np.intersect1d(training_list, testing_list))==0
assert len(np.intersect1d(training_list, validation_list))==0

AssertionError: 

In [13]:
cardinal_classes = list(set(map(lambda fp:os.path.split(os.path.split(fp)[0])[1], filepaths)))
le_classes = LabelEncoder().fit(cardinal_classes)
Counter(map(
    lambda fp:os.path.split(os.path.split(fp)[0])[1], filepaths))

Counter({'no': 2375,
         'yes': 2377,
         'stop': 2380,
         'nine': 2364,
         'left': 2353,
         'dog': 1746,
         'wow': 1745,
         'up': 2375,
         'one': 2370,
         'six': 2369,
         'zero': 2376,
         'two': 2373,
         'sheila': 1734,
         'tree': 1733,
         'silence': 8000,
         'four': 2372,
         'marvin': 1746,
         'bed': 1713,
         'right': 2367,
         'seven': 2377,
         'cat': 1733,
         'eight': 2352,
         'five': 2357,
         'on': 2367,
         'happy': 1742,
         'off': 2357,
         'three': 2356,
         'go': 2372,
         'down': 2359,
         'bird': 1731,
         'house': 1750})

In [14]:
_gen_test = get_batcher(filepaths, 1000)
batch_a_wav, batch_a_target = next(_gen_test)
batch_b_wav, batch_b_target = next(_gen_test)
_gen_test_le = get_batcher(filepaths, 1000, label_encoder=le_classes)
batch_le_wav, batch_le_target = next(_gen_test_le)

# 배치 행렬 형태 상관 관계를 검정합니다.
assert batch_a_wav.shape == (1000, 16000, 1)
assert batch_le_wav.shape == (1000, 16000, 1)
assert batch_a_wav.shape == batch_b_wav.shape == batch_le_wav.shape

# 배치 재현성을 테스트
assert np.sum(np.abs(batch_a_wav-batch_b_wav)) != 0
assert len(batch_a_target) == len(batch_b_target) == len(batch_le_target)
assert any(batch_a_target != batch_b_target)

# 클래스 레이블 인코더를 테스트
assert all(batch_le_target == np.expand_dims(le_classes.transform(np.squeeze(batch_a_target)),1))

### Architecture design

In [33]:

class NameSpacer:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

class Architecture:
    def __init__(self, class_cardinality, seq_len=16000, name="architecture"):
        self.seq_len = seq_len
        self.class_cardinality = class_cardinality
        self.optimizer = tf.optimizers.Adam(learning_rate=0.0001)
        self.name=name
        self.define_computation_graph()
        
        #Aliases
        self.ph = self.placeholders
        self.op = self.optimizers
        self.summ = self.summaries

    def define_computation_graph(self):
        # Reset graph
        tf.compat.v1.reset_default_graph()
        self.placeholders = NameSpacer(**self.define_placeholders())
        self.core_model = NameSpacer(**self.define_core_model())
        self.losses = NameSpacer(**self.define_losses())
        self.optimizers = NameSpacer(**self.define_optimizers())
        self.summaries = NameSpacer(**self.define_summaries())

    def define_placeholders(self):
        with tf.compat.v1.variable_scope("Placeholders"):
            wav_in = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, self.seq_len, 1), name="wav_in")
            is_train = tf.compat.v1.placeholder(dtype=tf.bool, shape=None, name="is_train")
            target = tf.compat.v1.placeholder(dtype=tf.int32, shape=(None, 1), name="target")
            acc_dev = tf.compat.v1.placeholder(dtype=tf.float32, shape=None, name="acc_dev")
            loss_dev = tf.compat.v1.placeholder(dtype=tf.float32, shape=None, name="loss_dev")
            return({"wav_in": wav_in, "target": target, "is_train": is_train, "acc_dev": 
                    acc_dev, "loss_dev": loss_dev})
        
    def define_core_model(self):
        with tf.compat.v1.variable_scope("Core_Model"):
            x = inception_1d(x=self.placeholders.wav_in, is_train=self.placeholders.is_train, 
                             norm_function=BatchNorm, activ_function=tf.nn.relu, depth=1,
                             name="Inception_1_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=1, name="Inception_1_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=1, name="Inception_2_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=1, name="Inception_2_3")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_2")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_3_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_3_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_3")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_4_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=2, name="Inception_4_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_4")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_5_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_5_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_5")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_6_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=3, name="Inception_6_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_6")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_7_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_7_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_7")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_8_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_8_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_8")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_9_1")
            x = inception_1d(x=x, is_train=self.placeholders.is_train, norm_function=BatchNorm, 
                             activ_function=tf.nn.relu, depth=4, name="Inception_9_2")
            x = tf.layers.max_pooling1d(x, 2, 2, name="maxpool_9")
            x = tf.contrib.layers.flatten(x)
            x = tf.layers.dense(BatchNorm(name="bn_dense_1")(x,train=self.placeholders.is_train),
                                128, activation=tf.nn.relu, kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                name="dense_1")
            output = tf.layers.dense(BatchNorm(name="bn_dense_2")(x,train=self.placeholders.is_train),
                                self.class_cardinality, activation=None, kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                name="output")
            return({"output": output})
        
    def define_losses(self):
        with tf.compat.v1.variable_scope("Losses"):
            softmax_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.squeeze(self.placeholders.target), 
                                                                        logits=self.core_model.output,
                                                                        name="softmax")
            return({"softmax": softmax_ce})

    def define_optimizers(self):
        with tf.compat.v1.variable_scope("Optimization"):
            op = self.optimizer.minimize(self.losses.softmax)
            return({"op": op})

    def define_summaries(self):
        with tf.compat.v1.variable_scope("Summaries"):
            ind_max = tf.squeeze(tf.cast(tf.argmax(self.core_model.output, axis=1), tf.int32))
            target = tf.squeeze(self.placeholders.target)
            acc= tf.reduce_mean(tf.cast(tf.equal(ind_max, target), tf.float32))
            loss = tf.reduce_mean(self.losses.softmax)
            train_scalar_probes = {"accuracy": acc, 
                                   "loss": loss}
            train_performance_scalar = [tf.summary.scalar(k, tf.reduce_mean(v), family=self.name) 
                                        for k, v in train_scalar_probes.items()]
            train_performance_scalar = tf.summary.merge(train_performance_scalar)

            dev_scalar_probes = {"acc_dev": self.placeholders.acc_dev, 
                                 "loss_dev": self.placeholders.loss_dev}
            dev_performance_scalar = [tf.summary.scalar(k, v, family=self.name) for k, v in dev_scalar_probes.items()]
            dev_performance_scalar = tf.summary.merge(dev_performance_scalar)
            return({"accuracy": acc, "loss": loss, "s_tr": train_performance_scalar, "s_de": dev_performance_scalar})

### Run model
모델이 영원히 걸리지 않으려면 GPU를 사용하여 실행해야 합니다. 또한 예측하기 위해 네트워크를 중지할 시기를 결정해야 합니다. 타이탄 X 파스칼에서 12시간이나 걸렸어요

In [34]:
net = Architecture(class_cardinality=len(cardinal_classes), name="wavception")

RuntimeError: tf.placeholder() is not compatible with eager execution.

In [24]:
sess = start_tensorflow_session(device="1")
sw = get_summary_writer(sess, "~/.logs_tensorboard/", "wavception", "V1") # Adjust your tensorboard logs path here
c=0

AttributeError: module 'tensorflow' has no attribute 'Session'

In [25]:
sess.run(tf.global_variables_initializer())

NameError: name 'sess' is not defined

In [26]:
np.random.seed(655321)
random.seed(655321)

In [27]:
for epoch in range(50000):
    random.shuffle(training_list)
    batcher = get_batcher(training_list, 16, le_classes)
    for i, (batch_x, batch_y) in enumerate(batcher):
        _, loss, acc, s = sess.run([net.op.op, net.losses.softmax, net.summ.accuracy, net.summ.s_tr],
                                 feed_dict={net.ph.wav_in: batch_x, net.ph.target: batch_y, 
                                            net.ph.is_train: True})
        print("[{0:04d}|{1:04d}] Accuracy train: {2:.2f}%".format(epoch, i, acc*100))
        sw.add_summary(s, c)
        
        if c%1000==0: # Validation
            accuracies_dev=[]
            losses_dev=[]
            batcher = get_batcher(validation_list, 16, le_classes)
            for i, (batch_x, batch_y) in enumerate(batcher):
                acc, loss= sess.run([net.summ.accuracy, net.summ.loss], 
                               feed_dict={net.ph.wav_in: batch_x, net.ph.target: batch_y, 
                                          net.ph.is_train: False})
                accuracies_dev.append(acc)
                losses_dev.append(loss)
            s = sess.run(net.summ.s_de, feed_dict={net.ph.acc_dev: np.mean(accuracies_dev),
                                                        net.ph.loss_dev: np.mean(losses_dev)})
            sw.add_summary(s, c)
        c += 1

NameError: name 'sess' is not defined

정확도를 테스트

In [None]:
accuracies=[]
batcher = get_batcher(testing_list, 64, le_classes)
for i, (batch_x, batch_y) in tqdm(enumerate(batcher)):
    acc= sess.run(net.summ.accuracy, feed_dict={net.ph.wav_in: batch_x, net.ph.target: batch_y, 
                                                     net.ph.is_train: False})
    accuracies.append(acc)
        

### Prediction and submission building

In [None]:
scoring_list = glob.glob(os.path.join(get_scoring_audio_path(), "*.wav"), recursive=True)

In [None]:
batcher = get_batcher(scoring_list, 80, le_classes, scoring=True)

In [None]:
fns = []
prds = []
for i, (batch_x, filepaths) in tqdm(enumerate(batcher)):
    pred = sess.run(net.core_model.output, feed_dict={net.ph.wav_in: batch_x, net.ph.is_train: False})
    fns.extend(map(lambda f:os.path.split(f)[1], filepaths))
    prds.extend(map(lambda f:np.argmax(pred, axis=1).tolist(), pred))

참고: 알 수 없는 클립 문제를 빠르고 더러운 방법으로 해결할 수 있는 방법을 여기에 구현했습니다. 성능은 여전히 우수하지만(약 76LB) 훨씬 더 현명한 방법이 있습니다;-).

In [None]:
df=pd.DataFrame({"fname":fns, "label": prds})
df.label = le_classes.inverse_transform(df.label)
df.loc[~df.label.isin(["yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go", "silence"]), "label"]="unknown"
df.to_csv(os.path.join(get_submissions_path(), "submission.csv"), index=False)