In [None]:
#    Define part
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, Input
from sklearn.metrics import confusion_matrix
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import get_data_mfcc

class AudioRecognition:

    def __init__(self, train_data, train_labels, test_data, test_labels):
        self.optimizer = optimizers.Adam(learning_rate=0.0001)
        self.train_data = np.array(train_data)
        self.test_data = np.array(test_data)
        self.train_labels = np.zeros((len(train_labels), 10))
        self.test_labels = np.zeros((len(test_labels), 10))
        for i_ter, val in enumerate(train_labels):
            self.train_labels[i_ter][int(val)] = 1
        for i_ter, val in enumerate(test_labels):
            self.test_labels[i_ter][int(val)] = 1
        self.model = Model()

    def build_model(self):
        input_tensor = Input(shape=self.train_data[0].shape)
        x = Dense(256, activation='relu')(input_tensor)
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)
        x = Dense(128, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)
        x = Dense(64, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)
        x = Dense(32, activation='relu')(x)
        output_tensor = Dense(10, activation='softmax')(x)
        return input_tensor, output_tensor

    def compile_model(self):
        input_tensor, output_tensor = self.build_model()
        self.model = Model(input_tensor, output_tensor)
        self.model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
        self.model.summary()

    def fit_model(self):
        history = self.model.fit(self.train_data, self.train_labels, epochs=200, batch_size=64,validation_data=(self.test_data, self.test_label))
        
    def save_model(self):
        self.model.save_weights('weight.ckpt')
        self.model.save_weights('DNN_model.h5')
        
    def predict_model(self):
        pre_label = self.model.predict_classes(self.test_data)
        return pre_label
    
    def predict_probablity_label(self):
        pre_label = np.argmax(self.model.predict(self.test_data),axis = 1)
        test_loss, test_accuracy = self.model.evaluate(self.test_data,self.test_labels)
        print(f'Test Loss{test_loss},Test Accuacy{test_accuracy}')
        return pre_label
    
    def predict_class(self):
        pre_label = self.model.predict_classes(self.test_data)
        return pre_label
    
    def print_confusion_matrix(self):
        plot_confusion_matrix(self.model,self.test_data,self.test_labels)
        

In [None]:
if __name__ == '__main__':
    data = get_data_mfcc.GetData()
    data.upset_file_name()
    data.get_train_data()
    data.get_test_data()
    train_data = data.train_data
    train_labels = data.train_labels
    test_data = data.test_data
    test_labels = data.test_labels

In [None]:
#    trainning
    audio = AudioRecognition(train_data, train_labels, test_data, test_labels)
    audio.build_model()
    audio.compile_model()
    audio.fit_model()
#     audio.save_model()

In [None]:
#    Calculate and plot the confusion matrix

predict_label = audio.predict_probablity_label()
cm = confusion_matrix(list(map(int,test_labels)), predict_label.tolist())

plt.figure(dpi=100)
plt.imshow(cm,cmap=plt.cm.Blues)
tick_locs = np.arange(10)
ticks = ['{}'.format(i) for i in range(0, 10)]
plt.xticks(tick_locs, ticks)
plt.yticks(tick_locs, ticks)
plt.ylabel("True number")
plt.xlabel("Predicted number")
plt.title("Confusion matrix")


for first_index in range(len(cm)):    #rows
    for second_index in range(len(cm[first_index])):    #columns
        c = cm[second_index][first_index]
        if c!= 0:
            plt.text(first_index,second_index, c,color='red', fontsize=10, va='center', ha='center')
            
plt.show()

In [None]:
!dir