In [1]:
import pandas as pd
import numpy as np
import os
import shutil
from tqdm import tqdm
from glob import glob
import librosa
import warnings

warnings.filterwarnings("ignore")

### 데이터 불러오기

In [2]:
sample_submission = pd.read_csv("./open/sample_submission.csv")

africa_train_paths = glob("./open/train/africa/*.wav")
australia_train_paths = glob("./open/train/australia/*.wav")
canada_train_paths = glob("./open/train/canada/*.wav")
england_train_paths = glob("./open/train/england/*.wav")
hongkong_train_paths = glob("./open/train/hongkong/*.wav")
us_train_paths = glob("./open/train/us/*.wav")

path_list = [africa_train_paths, australia_train_paths, canada_train_paths,
             england_train_paths, hongkong_train_paths, us_train_paths]

In [3]:
# glob로 test data의 path를 불러올때 순서대로 로드되지 않을 경우를 주의해야 합니다.
# test_ 데이터 프레임을 만들어서 나중에 sample_submission과 id를 기준으로 merge시킬 준비를 합니다.

def get_id(data):
    return np.int(data.split("\\")[1].split(".")[0])

test_ = pd.DataFrame(index = range(0, 6100), columns = ["path", "id"])
test_["path"] = glob("./open/test/*.wav")
test_["id"] = test_["path"].apply(lambda x : get_id(x))

test_.head()

Unnamed: 0,path,id
0,./open/test\1.wav,1
1,./open/test\10.wav,10
2,./open/test\100.wav,100
3,./open/test\1000.wav,1000
4,./open/test\1001.wav,1001


### 데이터 전처리

baseline 코드에서는 librosa 라이브러리를 사용하여 wav파일을 전처리 합니다.

In [4]:
def load_data(paths):

    result = []
    for path in tqdm(paths):
        # sr = 16000이 의미하는 것은 1초당 16000개의 데이터를 샘플링 한다는 것입니다.
        data, sr = librosa.load(path, sr = 16000)
        result.append(data)
    result = np.array(result) 
    # 메모리가 부족할 때는 데이터 타입을 변경해 주세요 ex) np.array(data, dtype = np.float32)

    return result

In [5]:
# train 데이터를 로드하기 위해서는 많은 시간이 소모 됩니다.
# 따라서 추출된 정보를 npy파일로 저장하여 필요 할 때마다 불러올 수 있게 준비합니다.

os.mkdir("./npy_data")

africa_train_data = load_data(africa_train_paths)
np.save("./npy_data/africa_npy", africa_train_data)

australia_train_data = load_data(australia_train_paths)
np.save("./npy_data/australia_npy", australia_train_data)

canada_train_data = load_data(canada_train_paths)
np.save("./npy_data/canada_npy", canada_train_data)

england_train_data = load_data(england_train_paths)
np.save("./npy_data/england_npy", england_train_data)

hongkong_train_data = load_data(hongkong_train_paths)
np.save("./npy_data/hongkong_npy", hongkong_train_data)

us_train_data = load_data(us_train_paths)
np.save("./npy_data/us_npy", us_train_data)

test_data = load_data(test_["path"])
np.save("./npy_data/test_npy", test_data)

100%|██████████| 2500/2500 [06:40<00:00,  6.24it/s]
100%|██████████| 1000/1000 [02:36<00:00,  6.40it/s]
100%|██████████| 1000/1000 [02:36<00:00,  6.40it/s]
100%|██████████| 10000/10000 [26:31<00:00,  6.28it/s] 
100%|██████████| 1020/1020 [02:51<00:00,  5.93it/s]
100%|██████████| 10000/10000 [26:31<00:00,  6.28it/s] 
100%|██████████| 6100/6100 [16:17<00:00,  6.24it/s] 


In [6]:
# npy파일로 저장된 데이터를 불러옵니다.
africa_train_data = np.load("./npy_data/africa_npy.npy", allow_pickle = True)
australia_train_data = np.load("./npy_data/australia_npy.npy", allow_pickle = True)
canada_train_data = np.load("./npy_data/canada_npy.npy", allow_pickle = True)
england_train_data = np.load("./npy_data/england_npy.npy", allow_pickle = True)
hongkong_train_data = np.load("./npy_data/hongkong_npy.npy", allow_pickle = True)
us_train_data = np.load("./npy_data/us_npy.npy", allow_pickle = True)

test_data = np.load("./npy_data/test_npy.npy", allow_pickle = True)

train_data_list = [africa_train_data, australia_train_data, canada_train_data, england_train_data, hongkong_train_data, us_train_data]

In [7]:
# 이번 대회에서 음성은 각각 다른 길이를 갖고 있습니다.
# baseline 코드에서는 음성 중 길이가 가장 작은 길이의 데이터를 기준으로 데이터를 잘라서 사용합니다.

def get_mini(data):

    mini = 9999999
    for i in data:
        if len(i) < mini:
            mini = len(i)

    return mini

#음성들의 길이를 맞춰줍니다.

def set_length(data, d_mini):

    result = []
    for i in data:
        result.append(i[:d_mini])
    result = np.array(result)

    return result

#feature를 생성합니다.

def get_feature(data, sr = 16000, n_fft = 256, win_length = 200, hop_length = 160, n_mels = 64):
    mel = []
    for i in data:
        # win_length 는 음성을 작은 조각으로 자를때 작은 조각의 크기입니다.
        # hop_length 는 음성을 작은 조각으로 자를때 자르는 간격을 의미합니다.
        # n_mels 는 적용할 mel filter의 개수입니다.
        mel_ = librosa.feature.melspectrogram(i, sr = sr, n_fft = n_fft, win_length = win_length, hop_length = hop_length, n_mels = n_mels)
        mel.append(mel_)
    mel = np.array(mel)
    mel = librosa.power_to_db(mel, ref = np.max)

    mel_mean = mel.mean()
    mel_std = mel.std()
    mel = (mel - mel_mean) / mel_std

    return mel

In [8]:
train_x = np.concatenate(train_data_list, axis= 0)
test_x = np.array(test_data)

# 음성의 길이 중 가장 작은 길이를 구합니다.

train_mini = get_mini(train_x)
test_mini = get_mini(test_x)

mini = np.min([train_mini, test_mini])

# data의 길이를 가장 작은 길이에 맞춰 잘라줍니다.

train_x = set_length(train_x, mini)
test_x = set_length(test_x, mini)

# librosa를 이용해 feature를 추출합니다.

train_x = get_feature(data = train_x)
test_x = get_feature(data = test_x)

train_x = train_x.reshape(-1, train_x.shape[1], train_x.shape[2], 1)
test_x = test_x.reshape(-1, test_x.shape[1], test_x.shape[2], 1)

In [9]:
# train_data의 label을 생성해 줍니다.

train_y = np.concatenate((np.zeros(len(africa_train_data), dtype = np.int),
                        np.ones(len(australia_train_data), dtype = np.int),
                         np.ones(len(canada_train_data), dtype = np.int) * 2,
                         np.ones(len(england_train_data), dtype = np.int) * 3,
                         np.ones(len(hongkong_train_data), dtype = np.int) * 4,
                         np.ones(len(us_train_data), dtype = np.int) * 5), axis = 0)

In [10]:
train_x.shape, train_y.shape, test_x.shape

((25520, 64, 501, 1), (25520,), (6100, 64, 501, 1))

### 분석 모델

 분석 모델은 월간데이콘_6 음성 중첩 데이터 분류 AI 경진대회 3위를 달성하신 Jamm님의 코드를 바탕으로 만들어졌습니다.  
 https://www.dacon.io/competitions/official/235616/codeshare/1571?page=1&dtype=recent

In [11]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Convolution2D, BatchNormalization, Flatten,
                                     Dropout, Dense, AveragePooling2D, Add)
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

In [12]:
def block(input_, units = 32, dropout_rate = 0.5):
    
    x = Convolution2D(units, 3, padding ="same", activation = "relu")(input_)
    x = BatchNormalization()(x)
    x_res = x
    x = Convolution2D(units, 3, padding ="same", activation = "relu")(x)
    x = BatchNormalization()(x)
    x = Convolution2D(units, 3, padding ="same", activation = "relu")(x)
    x = BatchNormalization()(x)
    x = Add()([x, x_res])
    x = AveragePooling2D()(x)
    x = Dropout(rate=dropout_rate)(x)
    
    return x

def second_block(input_, units = 64, dropout_rate = 0.5):
    
    x = Convolution2D(units, 1, padding ="same", activation = "relu")(input_)
    x = Convolution2D(units, 3, padding ="same", activation = "relu")(x)
    x = Convolution2D(units * 4, 1, padding ="same", activation = "relu")(x)
    x = BatchNormalization()(x)
    x_res = x
    x = Convolution2D(units, 1, padding ="same", activation = "relu")(x)
    x = Convolution2D(units, 3, padding ="same", activation = "relu")(x)
    x = Convolution2D(units * 4, 1, padding ="same", activation = "relu")(x)
    x = BatchNormalization()(x)
    x = Convolution2D(units, 1, padding = "same", activation = "relu")(x)
    x = Convolution2D(units, 3, padding ="same", activation = "relu")(x)
    x = Convolution2D(units * 4, 1, padding = "same", activation = "relu")(x)
    x = BatchNormalization()(x)
    x = Add()([x, x_res])
    x = AveragePooling2D()(x)
    x = Dropout(rate=dropout_rate)(x)
    
    return x

In [13]:
def build_fn():
    dropout_rate = 0.3
    
    in_ = Input(shape = (train_x.shape[1:]))
    
    block_01 = block(in_, units = 32, dropout_rate = dropout_rate)
    block_02 = block(block_01, units = 64, dropout_rate = dropout_rate)
    block_03 = block(block_02, units = 128, dropout_rate = dropout_rate)

    block_04 = second_block(block_03, units = 64, dropout_rate = dropout_rate)
    block_05 = second_block(block_04, units = 128, dropout_rate = dropout_rate)

    x = Flatten()(block_05)

    x = Dense(units = 128, activation = "relu")(x)
    x = BatchNormalization()(x)
    x_res = x
    x = Dropout(rate = dropout_rate)(x)

    x = Dense(units = 128, activation = "relu")(x)
    x = BatchNormalization()(x)
    x = Add()([x_res, x])
    x = Dropout(rate = dropout_rate)(x)

    model_out = Dense(units = 6, activation = 'softmax')(x)
    model = Model(in_, model_out)
    return model

### 모델 학습

In [14]:
split = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 10)

pred = []
pred_ = []

for train_idx, val_idx in split.split(train_x, train_y):
    x_train, y_train = train_x[train_idx], train_y[train_idx]
    x_val, y_val = train_x[val_idx], train_y[val_idx]

    model = build_fn()
    model.compile(optimizer = keras.optimizers.Adam(0.002),
                 loss = keras.losses.SparseCategoricalCrossentropy(),
                 metrics = ['acc'])

    history = model.fit(x = x_train, y = y_train, validation_data = (x_val, y_val), epochs = 8)
    print("*******************************************************************")
    pred.append(model.predict(test_x))
    pred_.append(np.argmax(model.predict(test_x), axis = 1))
    print("*******************************************************************")

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
*******************************************************************
*******************************************************************
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
*******************************************************************
*******************************************************************
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
*******************************************************************
*******************************************************************
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
*******************************************************************
*******************************************************************
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
********************************************************

### 예측하기

In [15]:
def cov_type(data):
    return np.int(data)

# 처음에 살펴본 것처럼 glob로 test data의 path는 sample_submission의 id와 같이 1,2,3,4,5.....으로 정렬 되어있지 않습니다.
# 만들어둔 test_ 데이터프레임을 이용하여 sample_submission과 predict값의 id를 맞춰줍니다.

result = pd.concat([test_, pd.DataFrame(np.mean(pred, axis = 0))], axis = 1).iloc[:, 1:]
result["id"] = result["id"].apply(lambda x : cov_type(x))

result = pd.merge(sample_submission["id"], result)
result.columns = sample_submission.columns

In [16]:
result

Unnamed: 0,id,africa,australia,canada,england,hongkong,us
0,1,0.081180,0.011587,0.227791,0.172586,0.196294,0.310562
1,2,0.140754,0.015116,0.047008,0.395727,0.012825,0.388569
2,3,0.289965,0.024995,0.004752,0.420874,0.024038,0.235376
3,4,0.200857,0.045339,0.007385,0.495348,0.039959,0.211113
4,5,0.104367,0.012193,0.018487,0.183983,0.038093,0.642877
...,...,...,...,...,...,...,...
6095,6096,0.022506,0.010289,0.281649,0.182847,0.196283,0.306426
6096,6097,0.066110,0.018404,0.006888,0.256483,0.033560,0.618554
6097,6098,0.153915,0.032215,0.006315,0.304693,0.205671,0.297192
6098,6099,0.273270,0.022367,0.009398,0.216333,0.116568,0.362065


In [17]:
result.to_csv("DACON.csv", index = False)

baseline은 참가자의 제출을 최우선 목표로 하고 있습니다.  
창의적인 전처리 방법을 적용하고 훌륭한 분석 모델을 개발해 주세요  
  
감사합니다.