In [1]:
"""

There are two main components of CNN:
1. Convolution
2. Pooling


Convolution contains following things 
Kernel = gird of weights
Kernel is "applied" to image
Traditionally used for image processing

"""

'\n\nThere are two main components of CNN:\n1. Convolution\n2. Pooling\n\n\nConvolution contains following things \nKernel = gird of weights\nKernel is "applied" to image\nTraditionally used for image processing\n\n'

In [2]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras

# MFCC ---> Mel Frequency Cepstral Co-efficients.


DATA_PATH = "data.json"


def load_data(data_path):
    with open(data_path, "r") as fp:
        data = json.load(fp)


    X = np.array(data["mfcc"])
    y = np.array(data["labels"])
    return X, y


def prepare_datasets(test_size, validation_size):
    # load data
    X, y = load_data(DATA_PATH)

    # create train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    # create train/validate split
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    # 3d array is used while working with Tensorflow for CNN
    # ... is a shorthand for selecting all elements along the specified dimensions.
    # TensorFlow expect 4D input tensors with shape (num_samples, height, width, channels)
    # the height and width correspond to the MFCC dimensions, and the channel dimension is added with a value of 1.
    X_train = X_train[..., np.newaxis] # 4d array ---> (num_samples, 130, 13, 1)
    X_validation = X_validation[..., np.newaxis]
    X_test = X_test[..., np.newaxis]

    return X_train, X_validation, X_test, y_train, y_validation, y_test


def build_model(input_shape):
    # create model
    model = keras.Sequential()

    # 1st convolutional layer
    """
    This line adds the first convolutional layer to the model. It consists of 32 filters(kernel), each with a size of 3x3. 
    The activation function used is ReLU. input_shape is the shape of the input data, which is required only for the first layer of the model.
    """
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 2nd convolutional layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 3rd convolutional layer
    model.add(keras.layers.Conv2D(32, (2, 2), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPool2D((2, 2), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # flatten the output and feed it into dense layer, basically shortening the data --> converts the 3D feature maps into a 1D vector
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer using softmax
    model.add(keras.layers.Dense(10, activation='softmax'))

    return model



def predict(model, X, y):
    X = X[np.newaxis, ...]

    # prediction = [ [ 0.1, 0.2, 0.3, ....] ]
    prediction = model.predict(X)

    # extracting index with max val
    predicted_index = np.argmax(prediction, axis=1) # gives index and we wan't to map it to the json file
    print("Expected index: {}, Predicted_index: {}".format(y, predicted_index))


if __name__ == "__main__":
    # create train, validation and test sets
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

    # build the CNN network
    input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
    model = build_model(input_shape)

    # compile the network
    optimizer = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer,
                 loss="sparse_categorical_crossentropy",
                 metrics=['accuracy'])

    # train the CNN
    model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)

    # evaluate the CNN on the test set
    test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print("Accuracy on test set is: {}".format(test_accuracy))

    # make prediction on a sample
    X = X_test[45]
    y = y_test[45]
    predict(model, X, y)
    

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.0000e+00 - loss: 8.0749 
Accuracy on test set is: 0.0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 181ms/step
Expected index: 3, Predicted_index: [6]
