In [19]:
import random
import pandas as pd
import numpy as np
import os
import librosa

import pickle

from tqdm.auto import tqdm

from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder
from xgboost import XGBClassifier

import warnings
warnings.filterwarnings(action='ignore') 

In [35]:
CFG = {
    'SR':20000,
    'N_melspectrogram':30, # Melspectogram 벡터를 추출할 개수
    'N_MFCC':32, # MFCC 벡터를 추출할 개수
    'SEED':41
}

In [36]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(CFG['SEED']) # Seed 고정

In [37]:
audio_data_path = 'audio_data'
audio_train_data_path = os.path.join(audio_data_path,'train')
audio_test_data_path = os.path.join(audio_data_path,'test')
train_df = pd.read_csv(os.path.join(audio_data_path,'train.csv'))
test_df = pd.read_csv(os.path.join(audio_data_path,'test.csv'))

In [38]:
def get_mel_feature(df, data_type, save_path):
    # Data Folder path
    root_folder = './audio_data'
    if os.path.exists(save_path):
        print(f'{save_path} is exist.')
        return
    features = []
    features2 = []
    for uid in tqdm(df['SegmentId']):
        root_path = os.path.join(root_folder, data_type)
        uid += '.wav'
        path = os.path.join(root_path, uid)

        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(path, sr=CFG['SR'])
    
        
        melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=CFG['N_melspectrogram'])
        
        
        # log sccale로 변환
        feature1 = librosa.power_to_db(S=melspectrogram, ref=1.0)

        # feature1 : 추출된 melspectrogram들의 평균을 Feature로 사용
        y_feature1 = []
        for e in feature1:
            y_feature1.append(np.mean(e))    
            
        features.append(y_feature1)
        
        
   
    mel_df = pd.DataFrame(features, columns=['mel_'+str(x) for x in range(1,CFG['N_melspectrogram']+1)])
    df = pd.concat([df, mel_df_norm], axis=1)
    df.to_csv(save_path, index=False)
    print('Done.')

##  mel + mfcc

In [25]:
def get_mel_feature2(df, data_type, save_path):
    # Data Folder path
    root_folder = './audio_data'
    if os.path.exists(save_path):
        print(f'{save_path} is exist.')
        return
    features = []
    features2 = []
    for uid in tqdm(df['SegmentId']):
        root_path = os.path.join(root_folder, data_type)
        uid += '.wav'
        path = os.path.join(root_path, uid)

        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(path, sr=CFG['SR'])
    
        # melspectrogram
        melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=CFG['N_melspectrogram'])
        # mfcc
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CFG['N_MFCC'])
        
        
        # log sccale로 변환
        feature1 = librosa.power_to_db(S=melspectrogram, ref=1.0)

        # feature1 : 추출된 melspectrogram들의 평균을 Feature로 사용
        y_feature1 = []
        for e in feature1:
            y_feature1.append(np.mean(e))    
            
        features.append(y_feature1)
        
        
        y_feature2 = []
        # 추출된 MFCC들의 평균을 Feature로 사용
        for e in mfcc:
            y_feature2.append(np.mean(e))
        features2.append(y_feature2)
    

    
    mel_df = pd.DataFrame(features, columns=['mel_'+str(x) for x in range(1,CFG['N_melspectrogram']+1)])
    mfcc_df = pd.DataFrame(features2, columns=['mfcc_'+str(x) for x in range(1,CFG['N_MFCC']+1)])
    df = pd.concat([df, mel_df,mfcc_df], axis=1)
    df.to_csv(save_path, index=False)
    print('Done.')

In [39]:
get_mel_feature(train_df, 'train', 'preprocessing_data/train_mel_data.csv')
get_mel_feature(test_df, 'test', 'preprocessing_data/test_mel_data.csv')

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10769.0), HTML(value='')))

In [None]:
# wav 파일의 MFCC Feature와 상태정보를 합친 학습데이터를 불러옵니다.
train_df = pd.read_csv('preprocessing_data/train_mel_data.csv')
test_df = pd.read_csv('preprocessing_data/test_mel_data.csv')

In [None]:
# 학습데이터를 모델의 input으로 들어갈 x와 label로 사용할 y로 분할
train_x = train_df.drop(columns=['Unnamed: 0', 'SegmentId','time','Valence','Arousal','Emotion'])
train_y = train_df['Emotion']

In [None]:
train_x

In [None]:
train_y

In [None]:
model = MLPClassifier(random_state=CFG['SEED']) # Sklearn에서 제공하는 Multi-layer Perceptron classifier 사용
model.fit(train_x, train_y) # Model Train

In [None]:
# 위의 학습데이터를 전처리한 과정과 동일하게 test data에도 적용
test_x = test_df.drop(columns=['Unnamed: 0', 'SegmentId','time','Valence','Arousal','Emotion'])
test_y = test_df['Emotion']

# Model 추론
preds = model.predict(test_x)

## Accuracy

In [None]:
from sklearn.metrics import mean_squared_error, accuracy_score

# Accuracy를 계산하여 성능을 평가합니다.
accuracy = accuracy_score(test_y, preds)

print(f"[mel] 모델의 성능(Accuracy): {accuracy}")

## mfcc+mel Accuracy

In [34]:
from sklearn.metrics import mean_squared_error, accuracy_score

# Accuracy를 계산하여 성능을 평가합니다.
accuracy = accuracy_score(test_y, preds)

print(f"[mfcc + mel] 모델의 성능(Accuracy): {accuracy}")

[mfcc + mel] 모델의 성능(Accuracy): 0.8403267731154845


## save model

In [96]:
# filename = './model/classification_mel_0_84_model.pkl' # Accuracy 0.84 model save
# pickle.dump(model, open(filename,'wb'))