In [59]:
import pandas as pd
from tqdm.auto import tqdm
import os
import librosa
import numpy as np
import csv
import warnings
warnings.filterwarnings(action='ignore') 

CFG = {
    'SR':16000,
    'N_MFCC':32, # MFCC 벡터를 추출할 개수
    'SEED':41,
    'MAX_SIZE':360
}

train_df = pd.read_csv('./train_data.csv')
test_df = pd.read_csv('./test_data.csv')
unlabeled_df = pd.read_csv('./unlabeled_data.csv')

def get_mfcc_feature(df, data_type, save_path=None):
    # Data Folder path
    root_folder = './wav_dataset'
    features = []

    for uid in tqdm(df['id']):
        root_path = os.path.join(root_folder, data_type)
        path = os.path.join(root_path, str(uid).zfill(5)+'.wav')

        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(path, sr=CFG['SR'])
        clip, _ = librosa.effects.trim(y, CFG['MAX_SIZE'])
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=clip, sr=sr, n_mfcc=CFG['N_MFCC'])
        pad2d = lambda a, i: a[:, 0:i] if a.shape[1] > i else np.hstack((a, np.zeros((a.shape[0], i-a.shape[1]))))
        padded_mfcc = pad2d(mfcc, CFG['MAX_SIZE'])
        # 추출된 MFCC들의 평균을 Feature로 사용
        features.append(padded_mfcc)

    print('Done.')
    return np.array(features)

In [60]:
train_matrix = get_mfcc_feature(train_df, 'train')

  0%|          | 0/3805 [00:00<?, ?it/s]

Done.


In [61]:
train_matrix.shape

(3805, 32, 360)

In [63]:
test_matrix = get_mfcc_feature(test_df, 'test')
un_matrix = get_mfcc_feature(unlabeled_df, 'unlabeled')

test_matrix.shape, un_matrix.shape

  0%|          | 0/5732 [00:00<?, ?it/s]

Done.


  0%|          | 0/1867 [00:00<?, ?it/s]

Done.


((5732, 32, 360), (1867, 32, 360))

In [64]:
np.save('./train.npy', train_matrix)
np.save('./test.npy', test_matrix)
np.save('./unlabeled.npy', un_matrix)