# 월간 데이콘 5, 생체 광학 데이터 분석 AI 경진대회

## Public 3rd, Private 3rd / 85 (Top 3.5%)

- Competiton link : https://dacon.io/competitions/official/235616/overview/description/
- Data : https://dacon.io/competitions/official/235616/data/
- This Solution is also uploaded to Dacon Codeshare : https://dacon.io/competitions/official/235616/codeshare/1571?page=1&dtype=recent


In [1]:
data_dir = './data/'

In [2]:
import os
import pandas as pd
import numpy as np
import scipy
from tqdm import tqdm
from glob import glob
from scipy.io import wavfile
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import time

sns.set_style('whitegrid')

import warnings ; warnings.filterwarnings('ignore')

# Load Data

In [3]:
import os
import pandas as pd
import numpy as np
import scipy
from tqdm import tqdm
from glob import glob
from scipy.io import wavfile
import librosa

def data_loader(files):
    out = []
    for file in tqdm(files):
        data, fs = librosa.load(file, sr = None)
        out.append(data)
    out = np.array(out)
    return out

Xtrain = glob(data_dir + 'train/*.wav')
Xtrain = data_loader(Xtrain)

Ytrain = pd.read_csv(data_dir + 'train_answer.csv', index_col='id')
submission = pd.read_csv(data_dir + 'submission.csv', index_col='id')

print(Xtrain.shape, Ytrain.shape)
time.sleep(1)

Xtest = glob(data_dir + 'test/*.wav')
Xtest = data_loader(Xtest)

Xtrain = Xtrain.astype('float32')
Xtest = Xtest.astype('float32')

print(Xtrain.shape, Ytrain.shape, Xtest.shape, submission.shape)

100%|████████████████████████████████████████████████████████████████████████| 100000/100000 [00:20<00:00, 4996.13it/s]


(100000, 16000) (100000, 30)


100%|██████████████████████████████████████████████████████████████████████████| 10000/10000 [00:01<00:00, 5039.78it/s]


(100000, 16000) (100000, 30) (10000, 16000) (10000, 30)


## dB Mel Spectrogram

In [4]:
def get_melspectrogram(data, n_fft, win_length, hop_length, n_mels, sr=16000, save=False, to_db=True, normalize=False):
    array = []
    for i in tqdm(range(len(data))):
        melspec = librosa.feature.melspectrogram(data[i], sr=sr, n_fft=n_fft, win_length=win_length, 
                                                 hop_length=hop_length,n_mels=n_mels)
        array.append(melspec)
    array = np.array(array)
    if to_db == True:
        array = librosa.power_to_db(array, ref = np.max)
    if normalize==True: 
        mean = array.mean()
        std = array.std()
        array = (array - mean) / std
    if save == True:
        np.save(f"{data_dir}mel_spectrogram({n_fft},{win_length},{hop_length},{n_mels}).npy", array) 
    return array

def gen_4_mels(data, normalize=True):
    alpha = get_melspectrogram(data, n_fft=256, win_length=200, hop_length=160, n_mels=64, save=False, to_db=True, normalize=normalize)
    beta = get_melspectrogram(data, n_fft=512, win_length=400, hop_length=160, n_mels=64, save=False, to_db=True, normalize=normalize)
    gamma = get_melspectrogram(data, n_fft=1024, win_length=800, hop_length=160, n_mels=64, save=False, to_db=True, normalize=normalize)
    delta = get_melspectrogram(data, n_fft=2048, win_length=1600, hop_length=160, n_mels=64, save=False, to_db=True, normalize=normalize)
    
    data = np.stack([alpha, beta, gamma, delta], axis=-1)
    return data

Junho Sun 음성 신호 기본 정보 CodeShare : https://dacon.io/competitions/official/235616/codeshare/1305?page=1&dtype=recent&ptype=pub

글 내용 중에서 마지막 부분에 mel spectrogram 의 win_length 를 설명해주시는 부분이 있습니다.  
'마지막으로 spectrogram과 melspectrogram의 해상력에 대해 설명하겠습니다. win_length가 커질수록 주파수 성분에 대한 해상력은 높아지지만, 즉 더 정밀해지지만, 시간 성분에 대한 해상력은 낮아지게 됩니다. 즉, 더 정밀한 주파수 분포를 얻을 수 있으나 시간에 따른 주파수 변화를 관찰하기가 어려워집니다. 반대로 win_length가 작은 경우에는 주파수 성분에 대한 해상력은 낮아지지만, 시간 성분에 대한 해상력은 높아지게 됩니다. 따라서 적절한 값을 찾는 것이 중요합니다.'  
음성 신호의 시간 성분과 주파수 성분을 어떻게 둘다 놓치지 않고 잡아낼 수 있을까 고민하다가, 서로 다른 win_length 를 가진 여러개의 스펙트럼을 겹쳐가지고 4개의 mel spectrogram 을 만들어서 겹쳤습니다. 

In [5]:
all_data = np.concatenate([Xtrain, Xtest], axis=0)
print(all_data.shape)
time.sleep(1)
all_dbmel = gen_4_mels(all_data, normalize=True)
Xtrain_dbmel = all_dbmel[:len(Ytrain)]
Xtest_dbmel = all_dbmel[len(Ytrain):]
print(Xtrain_dbmel.shape, Ytrain.shape, Xtest_dbmel.shape)

(110000, 16000)


100%|█████████████████████████████████████████████████████████████████████████| 110000/110000 [01:54<00:00, 961.62it/s]
100%|█████████████████████████████████████████████████████████████████████████| 110000/110000 [02:28<00:00, 738.82it/s]
100%|█████████████████████████████████████████████████████████████████████████| 110000/110000 [03:38<00:00, 503.89it/s]
100%|█████████████████████████████████████████████████████████████████████████| 110000/110000 [05:59<00:00, 305.94it/s]


(100000, 64, 101, 4) (100000, 30) (10000, 64, 101, 4)


# Build Model

In [6]:
import keras
import keras.backend as K
from keras.models import Model, Sequential
from keras.layers import Input, Convolution2D, BatchNormalization, Activation, Flatten, Dropout, Dense, Add, AveragePooling2D
from keras.callbacks import EarlyStopping
from keras.losses import KLDivergence
from sklearn.model_selection import train_test_split
from keras.optimizers import Nadam

def mish(x):
    return x * K.tanh(K.softplus(x))

def eval_kldiv(y_true, y_pred):
    return KLDivergence()(np.array(y_true).astype('float32'), np.array(y_pred).astype('float32')).numpy()

In [7]:
def build_fn():
    dropout_rate=0.5
    
    model_in = Input(shape = (Xtrain_dbmel.shape[1:]))
    x = Convolution2D(32, 3, padding='same', kernel_initializer='he_normal')(model_in)
    x = BatchNormalization()(x)
    x_res = x
    x = Activation(mish)(x)
    x = Convolution2D(32, 3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation(mish)(x)
    x = Convolution2D(32, 3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Add()([x, x_res])
    x = Activation(mish)(x)
    x = AveragePooling2D()(x)
    x = Dropout(rate=dropout_rate)(x)

    x = Convolution2D(64, 3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x_res = x
    x = Activation(mish)(x)
    x = Convolution2D(64, 3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation(mish)(x)
    x = Convolution2D(64, 3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Add()([x, x_res])
    x = Activation(mish)(x)
    x = AveragePooling2D()(x)
    x = Dropout(rate=dropout_rate)(x)

    x = Convolution2D(128, 3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x_res = x
    x = Activation(mish)(x)
    x = Convolution2D(128, 3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation(mish)(x)
    x = Convolution2D(128, 3, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Add()([x, x_res])
    x = Activation(mish)(x)
    x = AveragePooling2D()(x)
    x = Dropout(rate=dropout_rate)(x)

    x = Convolution2D(64, 1, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(64, 3, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(256, 1, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x_res = x
    x = Activation(mish)(x)
    x = Convolution2D(64, 1, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(64, 3, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(256, 1, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation(mish)(x)
    x = Convolution2D(64, 1, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(64, 3, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(256, 1, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Add()([x, x_res])
    x = Activation(mish)(x)
    x = AveragePooling2D()(x)
    x = Dropout(rate=dropout_rate)(x)

    x = Convolution2D(128, 1, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(128, 3, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(512, 1, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x_res = x
    x = Activation(mish)(x)
    x = Convolution2D(128, 1, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(128, 3, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(512, 1, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Activation(mish)(x)
    x = Convolution2D(128, 1, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(128, 3, padding='same', kernel_initializer='he_normal')(x)
    x = Convolution2D(512, 1, padding='same', kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Add()([x, x_res])
    x = Activation(mish)(x)
    x = AveragePooling2D()(x)
    x = Dropout(rate=dropout_rate)(x)


    x = Flatten()(x)

    x = Dense(units=128, kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x_res = x
    x = Activation(mish)(x)
    x = Dropout(rate=dropout_rate)(x)

    x = Dense(units=128, kernel_initializer='he_normal')(x)
    x = BatchNormalization()(x)
    x = Add()([x_res, x])
    x = Activation(mish)(x)
    x = Dropout(rate=dropout_rate)(x)

    model_out = Dense(units=30, activation='softmax')(x)
    model = Model(model_in, model_out)
    model.compile(loss=KLDivergence(), optimizer=Nadam(learning_rate=0.002))
    return model
build_fn().summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 64, 101, 4)] 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 64, 101, 32)  1184        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 64, 101, 32)  128         conv2d[0][0]                     
__________________________________________________________________________________________________
activation (Activation)         (None, 64, 101, 32)  0           batch_normalization[0][0]        
______________________________________________________________________________________________

## Train 15 Models

In [None]:
num_models=15
model_list=[]

for i in tqdm(range(num_models)):
    model = build_fn()
    model.fit(Xtrain_dbmel, Ytrain, epochs=187, batch_size=16)
    model_list.append(model)
    model.save(f"model_{i}.h5")

학습을 하고, 모델 저장. 모델 저장 후 모두 불러와서 단순 평균 앙상블 진행. 

In [9]:
models = []
for i in tqdm(range(0, 15)):
    model_name = f"model_{i}.h5"
    models.append(keras.models.load_model(model_name, custom_objects={'mish' : mish}))
print(f"{len(models)} models reloaded")

100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [00:14<00:00,  1.02it/s]

15 models reloaded





In [10]:
preds = np.zeros(shape=submission.shape)
train_preds = np.zeros(shape = Ytrain.shape)

train_preds_list=[]
test_preds_list=[]
score_list=[]

for model, i in zip(models, range(len(models))):
    a = model.predict(Xtrain_dbmel)
    b = model.predict(Xtest_dbmel)
    eval_score = eval_kldiv(Ytrain, a)
    
    print(f"Model {i+1} Evaluation Score : {eval_score}")
    train_preds = train_preds + a
    preds = preds + b
    
    train_preds_list.append(a)
    test_preds_list.append(b)
    score_list.append(eval_score)
    
train_preds = train_preds / len(models)
preds = preds / len(models)
print(f"\nMean Predictions Evaluation Score : {eval_kldiv(Ytrain, train_preds)}")
simple_average = pd.DataFrame(preds, index=submission.index, columns=submission.columns)
simple_average.to_csv('15 Average Ensemble model.csv')
simple_average.head(10)

Model 1 Evaluation Score : 0.24118183553218842
Model 2 Evaluation Score : 0.23312652111053467
Model 3 Evaluation Score : 0.24818634986877441
Model 4 Evaluation Score : 0.23722496628761292
Model 5 Evaluation Score : 0.2390882819890976
Model 6 Evaluation Score : 0.24100100994110107
Model 7 Evaluation Score : 0.2496347874403
Model 8 Evaluation Score : 0.24356740713119507
Model 9 Evaluation Score : 0.24107488989830017
Model 10 Evaluation Score : 0.2274245172739029
Model 11 Evaluation Score : 0.23607350885868073
Model 12 Evaluation Score : 0.23797260224819183
Model 13 Evaluation Score : 0.2538020610809326
Model 14 Evaluation Score : 0.23577089607715607
Model 15 Evaluation Score : 0.2238369584083557

Mean Predictions Evaluation Score : 0.18654921650886536


Unnamed: 0_level_0,bed,bird,cat,dog,down,eight,five,four,go,happy,...,sheila,six,stop,three,tree,two,up,wow,yes,zero
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.024768,0.00282,0.000914,0.003068,0.003746,0.255225,0.002023,0.002626,0.083618,0.000695,...,0.001235,0.001561,0.001605,0.248279,0.009439,0.008524,0.002312,0.000764,0.000609,0.278987
1,0.147262,0.000757,0.000718,0.000272,0.000212,0.000788,0.073441,0.000204,0.000677,0.000331,...,0.000386,0.000305,0.000819,0.003424,0.256637,0.221761,0.013797,0.000204,0.000234,0.000314
2,0.000841,0.000384,0.000923,0.000784,0.001234,0.027174,0.000816,0.000743,0.001806,0.000982,...,0.000432,0.257953,0.326719,0.003251,0.001538,0.284989,0.004832,0.000346,0.004637,0.002297
3,0.000871,0.00105,0.000946,0.00038,0.000642,0.000768,0.290618,0.282772,0.000797,0.032753,...,0.000579,0.000348,0.000802,0.00413,0.266848,0.001172,0.018497,0.001371,0.000299,0.000682
4,0.00212,0.000584,0.000557,0.133227,0.011718,0.000677,0.013705,0.000852,0.00196,0.000489,...,0.000196,0.000568,0.003437,0.000297,0.000145,0.000149,0.002382,0.021904,0.000615,0.000975
5,0.013062,0.001301,0.001726,0.123044,0.007379,0.000795,0.24756,0.003694,0.040355,0.000491,...,0.000189,0.000313,0.00126,0.001081,0.000391,0.000619,0.002737,0.258006,0.000419,0.000967
6,0.003452,0.000936,0.000672,0.006914,0.001613,0.000342,0.000566,0.002741,0.287367,0.000375,...,0.000485,0.000763,0.332798,0.000449,0.00019,0.001652,0.002893,0.00072,0.000614,0.014695
7,0.000968,0.246415,0.000587,0.000717,0.002379,0.00092,0.303059,0.016404,0.012524,0.000826,...,0.015037,0.000395,0.001994,0.002466,0.000954,0.034002,0.011572,0.003346,0.000413,0.294591
8,0.006805,0.001124,0.000614,0.039188,0.006068,0.000433,0.402306,0.000824,0.001989,0.001361,...,9.7e-05,0.00019,0.00072,0.000534,0.000143,0.000185,0.005782,0.006733,0.00039,0.00016
9,0.001426,0.0009,0.000603,0.002688,0.17299,0.000954,0.00599,0.0191,0.116582,0.000774,...,0.000444,0.003889,0.009046,0.000981,0.00026,0.000621,0.004641,0.018064,0.000582,0.01586


Simple Average of 15 Predictions  
- Public LB : 0.399484
- Private LB : 0.39202