In [1]:
import pandas as pd
import numpy as np
import librosa
import os
import sys
from sklearn.model_selection import train_test_split

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
class Config:
    SR = 32000
    N_MFCC = 13
    # Dataset
    ROOT_FOLDER = './'
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 96
    N_EPOCHS = 5
    LR = 3e-4
    # Others
    SEED = 42

CONFIG = Config()

In [4]:
df = pd.read_csv('./train.csv')
df.head()

Unnamed: 0,id,path,label
0,RUNQPNJF,./train/RUNQPNJF.ogg,real
1,JFAWUOGJ,./train/JFAWUOGJ.ogg,fake
2,RDKEKEVX,./train/RDKEKEVX.ogg,real
3,QYHJDOFK,./train/QYHJDOFK.ogg,real
4,RSPQNHAO,./train/RSPQNHAO.ogg,real


In [5]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)
train.shape, val.shape


((44350, 3), (11088, 3))

In [6]:
def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(row['path'], sr=CONFIG.SR)
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)

        if train_mode:
            label = row['label']
            label_vector = 0 if label == 'fake' else 1
            #label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features

In [7]:
train_mfcc = np.load('./mfcc/train_mfcc.npy')
val_mfcc = np.load('./mfcc/val_mfcc.npy')
train_labels = np.load('./mfcc/train_labels.npy')
val_labels = np.load('./mfcc/val_labels.npy')

In [13]:
train_mfcc2, val_mfcc2, train_labels2, val_labels2 = train_test_split(train_mfcc, train_labels, test_size=0.2, random_state=CONFIG.SEED)

In [16]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import NuSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score

lr = LogisticRegression(random_state=CONFIG.SEED)
dt = DecisionTreeClassifier(random_state=CONFIG.SEED)
rf = RandomForestClassifier(random_state=CONFIG.SEED)
xgb = XGBClassifier(random_state=CONFIG.SEED)
sgd = SGDClassifier(random_state=CONFIG.SEED)
nusvc = NuSVC(random_state=CONFIG.SEED)
knn = KNeighborsClassifier()
mlp = MLPClassifier(random_state=CONFIG.SEED)

model_list = [lr, dt, rf, xgb, sgd, nusvc, knn, mlp]

best_score = 0
best_model = None

for model in model_list:
    model.fit(train_mfcc2, train_labels2)
    pred = model.predict(val_mfcc2)
    score = accuracy_score(val_labels2, pred)
    print(f'{model.__class__.__name__} - {score}')
    if score > best_score:
        best_score = score
        best_model = model

best_pred = best_model.predict(val_mfcc2)
print(f'Best Model: {best_model.__class__.__name__}')
print(f'Best Score: {accuracy_score(val_labels2, best_pred)}')

LogisticRegression - 0.7750845546786922
DecisionTreeClassifier - 0.9012401352874859
RandomForestClassifier - 0.9608793686583991
XGBClassifier - 0.9586245772266065
SGDClassifier - 0.773055242390079
NuSVC - 0.8392333709131905
KNeighborsClassifier - 0.9438556933483653
MLPClassifier - 0.939684329199549
Best Model: RandomForestClassifier
Best Score: 0.9608793686583991
