In [None]:
import numpy as np
import itertools
import librosa
import librosa.display
import IPython.display as ipd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
plt.style.use('seaborn-white')

In [None]:
midi_file1 = '/content/drive/MyDrive/PAI_sound.wav'
midi_file2 = '/content/drive/MyDrive/backgroundsound.wav'

In [None]:
num_note1 = 60
num_note2 = 120
sec = 2
audio = []
inst = []

for inst_idx, note in itertools.product(range(1), range(num_note1)):
    offset = (note*sec)
    #print('instrunment: {}, note: {}, offset: {}'.format(0, note, offset))
    y, sr = librosa.load(midi_file1, sr=None, offset=offset, duration=2.0)
    audio.append(y)
    inst.append(0)

for inst_idx, note in itertools.product(range(1), range(num_note2)):
    offset = (note*sec)
    #print('instrunment: {}, note: {}, offset: {}'.format(1, note, offset))
    y, sr = librosa.load(midi_file2, sr=None, offset=offset, duration=2.0)
    audio.append(y)
    inst.append(1)

In [None]:
mfcc_np = np.array(audio_mfcc, np.float32)
inst_np = np.array(inst, np.int16)

print(mfcc_np.shape, inst_np.shape)

In [None]:
mfcc_np = mfcc_np.reshape((420), 20 * 173)

In [None]:
scaler = MinMaxScaler()
scaler.fit(mfcc_np)

In [None]:
from tensorflow.keras.utils import to_categorical

mfcc_np = np.array(audio_mfcc, np.float32)
mfcc_array = np.expand_dims(mfcc_np, -1)
inst_cat = to_categorical(inst_np)

train_x, test_x, train_y, test_y  = train_test_split(mfcc_array, inst_cat, test_size=0.2)

print(train_x.shape)
print(test_x.shape)
print(train_y.shape)
print(test_y.shape)

In [None]:
from keras.models import Sequential, Model
from keras.layers import Input, Dense
from keras.layers import Conv2D, MaxPool2D, Flatten

def model_build():
    model = Sequential()
    input = Input(shape=(20, 173, 1))
    output = Conv2D(64, 3, strides=1, padding='same', activation='relu')(input)
    output = MaxPool2D(pool_size=(2, 2), strides=2, padding='same')(output)

    output = Conv2D(128, 3, strides=1, padding='same', activation='relu')(output)
    output = MaxPool2D(pool_size=(2, 2), strides=2, padding='same')(output)

    output = Conv2D(256, 3, strides=1, padding='same', activation='relu')(output)
    output = MaxPool2D(pool_size=(2, 2), strides=2, padding='same')(output)

    output = Conv2D(512, 3, strides=1, padding='same', activation='relu')(output)
    output = MaxPool2D(pool_size=(2, 2), strides=2, padding='same')(output)

    output = Flatten()(output)
    output = Dense(256, activation='relu')(output)
    output = Dense(128, activation='relu')(output)
    output = Dense(64, activation='relu')(output)
    output = Dense(2, activation='sigmoid')(output)

    model = Model(inputs=[input], outputs=output)

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

    return model

In [None]:
model = model_build()
model.summary()

In [None]:
history = model.fit(train_x, train_y, epochs=70, batch_size=128, validation_split=0.2)

In [None]:
def plot_history(history_dict):
    loss = history_dict['loss']
    val_loss = history_dict['val_loss']

    epochs = range(1, (len(loss) + 1))
    fig = plt.figure(figsize=(14, 5))

    ax1 = fig.add_subplot(1, 2, 1)
    ax1.plot(epochs, loss, 'b--', label='train_loss')
    ax1.plot(epochs, val_loss, 'r:', label='val_loss')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('loss')
    ax1.grid()
    ax1.legend()

    acc = history_dict['acc']
    val_acc = history_dict['val_acc']

    ax2 = fig.add_subplot(1, 2, 2)
    ax2.plot(epochs, acc, 'b--', label='train_accuracy')
    ax2.plot(epochs, val_acc, 'r:', label='val_accuracy')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('loss')
    ax2.grid()
    ax2.legend()

    plt.show()

In [None]:
plot_history(history.history)

In [None]:
model.evaluate(test_x, test_y)

from keras.models import load_model

#model.save('PAI_Model_V2(0.052, 0.976).h5')