In [None]:
import matplotlib.pyplot as plt
import os
from scipy.io import wavfile
from collections import defaultdict, Counter
from scipy import signal
import numpy as np
import librosa
import sklearn
import random
from unicodedata import normalize
from keras.layers import Dense
from keras import Model
from keras import Input
from keras.utils import to_categorical
from keras.regularizers import l2
from keras.layers import (
    Dense,
    Flatten,
    Conv2D,
    MaxPooling2D,
)

BASE_DIR = os.path.abspath("")
TRAIN_DIR = BASE_DIR + "/dataset/train"
TEST_DIR = BASE_DIR + "/dataset/test"
MODEL_DIR = BASE_DIR

In [None]:
train_X = []
train_spectrograms = []
train_mel_spectograms = []
train_mfccs = []
train_y = []

test_X = []
test_spectrograms = []
test_mel_spectograms = []
test_mfccs = []
test_y = []


pad1d = lambda a, i: (
    a[0:i] if a.shape[0] > i else np.hstack((a, np.zeros(i - a.shape[0])))
)
pad2d = lambda a, i: (
    a[:, 0:i]
    if a.shape[1] > i
    else np.hstack((a, np.zeros((a.shape[0], i - a.shape[1]))))
)

dataset0 = []
dataset1 = []

test_dataset = []
train_dataset = []

frame_length = 0.025
frame_stride = 0.0010

In [None]:
from sklearn import preprocessing

for filename in os.listdir(TRAIN_DIR + "/"):
    filename = normalize("NFC", filename)
    try:
        if ".wav" not in filename:
            continue

        wav, sr = librosa.load(TRAIN_DIR + "/" + filename, sr=16000)

        mfcc = librosa.feature.mfcc(y=wav)
        padded_mfcc = pad2d(mfcc, 40)

        if filename[0] == "외":
            train_dataset.append((padded_mfcc, 1))
        else:
            train_dataset.append((padded_mfcc, 0))
    except Exception as e:
        print(filename, e)
        raise

for filename in os.listdir(TEST_DIR + "/"):
    filename = normalize("NFC", filename)
    try:
        if ".wav" not in filename:
            continue

        wav, sr = librosa.load(TEST_DIR + "/" + filename, sr=16000)

        input_nfft = int(round(sr * frame_length))
        input_stride = int(round(sr * frame_stride))

        mfcc = librosa.feature.mfcc(y=wav)
        padded_mfcc = pad2d(mfcc, 40)

        if filename[0] == "외":
            test_dataset.append((padded_mfcc, 1))
        else:
            test_dataset.append((padded_mfcc, 0))
    except Exception as e:
        print(filename, e)
        raise

In [None]:
random.shuffle(test_dataset)
random.shuffle(train_dataset)

train_mfccs = [a for (a, b) in train_dataset]
train_y = [b for (a, b) in train_dataset]

test_mfccs = [a for (a, b) in test_dataset]
test_y = [b for (a, b) in test_dataset]

train_mfccs = np.array(train_mfccs)
train_y = to_categorical(np.array(train_y))

test_mfccs = np.array(test_mfccs)
test_y = to_categorical(np.array(test_y))

print("train_mfccs:", train_mfccs.shape)
print("train_y:", train_y.shape)

print("test_mfccs:", test_mfccs.shape)
print("test_y:", test_y.shape)

In [None]:
train_X_ex = np.expand_dims(train_mfccs, -1)
test_X_ex = np.expand_dims(test_mfccs, -1)
print("train X shape:", train_X_ex.shape)
print("test X shape:", test_X_ex.shape)

In [None]:
ip = Input(shape=train_X_ex.shape[1:])

x = Conv2D(32, (3, 3), padding="same", activation="relu")(ip)
x = MaxPooling2D((2, 2))(x)  # 20x40 -> 10x20

x = Conv2D(64, (3, 3), padding="same", activation="relu")(x)
x = MaxPooling2D((2, 2))(x)  # 10x20 -> 5x10

x = Conv2D(128, (3, 3), padding="same", activation="relu")(x)

x = Flatten()(x)

x = Dense(64, activation="relu")(x)
op = Dense(2, activation="softmax")(x)
model = Model(ip, op)
model.summary()

In [None]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

history = model.fit(
    train_X_ex,
    train_y,
    epochs=100,
    batch_size=32,
    verbose=1,
    validation_data=(test_X_ex, test_y),
)

In [None]:
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="val_accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

In [None]:
model.save(MODEL_DIR + "/new_filler_determine_model.h5")