In [6]:
import random
import pandas as pd
import numpy as np
import os
from tqdm.auto import tqdm
import librosa


In [2]:
##### hyperparameter
CFG = {
    'SR':16000, # sampling rate
    'N_MFCC':128, # Melspectrogram 벡터를 추출할 개수
    'SEED':42
}

#### fixed random seed
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

In [4]:
# mfcc & mel feature extract function
##### mfcc feature extract function
rootdir = './data/Collected_Data/'

def get_all_audio_files(rootdir):
    return [os.path.join(dp, f) for dp, dn, filenames in os.walk(rootdir) for f in filenames if os.path.splitext(f)[1] in ['.wav', '.mp3']]

def get_mfcc_feature(files):
    features = []
    for full_path in tqdm(files):
        try:
            y, sr = librosa.load(full_path, sr=CFG['SR'])
        except FileNotFoundError:
            continue
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CFG['N_MFCC'])
        features.append({
            'mfcc_mean': np.mean(mfcc, axis=1),
            'mfcc_max': np.max(mfcc, axis=1),
            'mfcc_min': np.min(mfcc, axis=1),
        })
    if not features:
        return pd.DataFrame()  
    mfcc_df = pd.DataFrame(features)
    mfcc_mean_df = pd.DataFrame(mfcc_df['mfcc_mean'].tolist(), columns=[f'mfcc_mean_{i}' for i in range(CFG['N_MFCC'])])
    mfcc_max_df = pd.DataFrame(mfcc_df['mfcc_max'].tolist(), columns=[f'mfcc_max_{i}' for i in range(CFG['N_MFCC'])])
    mfcc_min_df = pd.DataFrame(mfcc_df['mfcc_min'].tolist(), columns=[f'mfcc_min_{i}' for i in range(CFG['N_MFCC'])])
    return pd.concat([mfcc_mean_df, mfcc_max_df, mfcc_min_df], axis=1)


##### mel feature extract function
def get_feature_mel(files):
    features = []
    n_fft = 2048
    win_length = 2048
    hop_length = 1024
    n_mels = 128
    
    for full_path in tqdm(files):
        try:
            y, sr = librosa.load(full_path, sr=CFG['SR'])
        except Exception as e:
            print(f"Error loading {full_path}. Reason: {e}")
            continue
        
        D = np.abs(librosa.stft(y, n_fft=n_fft, win_length=win_length, hop_length=hop_length))
        mel = librosa.feature.melspectrogram(S=D, sr=sr, n_mels=n_mels, hop_length=hop_length, win_length=win_length)
        features.append({
            'mel_mean': mel.mean(axis=1),
            'mel_max': mel.max(axis=1),
            'mel_min': mel.min(axis=1),
        })
        
    if not features:
        print("No valid audio files found.")
        return pd.DataFrame()
    
    mel_df = pd.DataFrame(features)
    mel_mean_df = pd.DataFrame(mel_df['mel_mean'].tolist(), columns=[f'mel_mean_{i}' for i in range(n_mels)])
    mel_max_df = pd.DataFrame(mel_df['mel_max'].tolist(), columns=[f'mel_max_{i}' for i in range(n_mels)])
    mel_min_df = pd.DataFrame(mel_df['mel_min'].tolist(), columns=[f'mel_min_{i}' for i in range(n_mels)])
    
    return pd.concat([mel_mean_df, mel_max_df, mel_min_df], axis=1)

In [7]:
# 모든 오디오 파일의 경로를 가져옵니다.
all_files = get_all_audio_files(rootdir)

# 가져온 파일 경로를 사용하여 MFCC 및 MEL 특성을 추출합니다.
train_mfcc = get_mfcc_feature(all_files)
train_mel = get_feature_mel(all_files)
train_df = pd.concat([train_mel, train_mfcc], axis=1)
train_df.to_csv('./data/audio_data.csv', index=False)

  0%|          | 0/189 [00:00<?, ?it/s]

  0%|          | 0/189 [00:00<?, ?it/s]