### User Input

In [None]:
mfccs_json_path = "../mfccs_cnn_humpbackwhale_walrus.json"

accuracy_loss_image = "/Users/seantedesco/Documents/marine-mammal-call-classification/acc_loss_metrics.png"

confusion_mat_image = "/Users/seantedesco/Documents/marine-mammal-call-classification/finalCFcnn.png"

### Imports

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd 
import numpy as np
import warnings

### Plot Settings

In [None]:
# color defaults
CB91_Blue = '#2CBDFE'
CB91_Green = '#47DBCD'
CB91_Pink = '#F3A0F2'
CB91_Purple = '#9D2EC5'
CB91_Violet = '#661D98'
CB91_Amber = '#F5B14C'
color_list = [CB91_Blue, CB91_Green, CB91_Pink, CB91_Purple, CB91_Violet, CB91_Amber]

# matplotlib seettings
plt.style.use('ggplot')
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=color_list)
plt.style.use('ggplot')

# seaboarn settings
sns.set(style='ticks')
sns.set(style='ticks')
sns.set_style("darkgrid")

# pandas settings
pd.options.display.max_columns = 500
pd.options.display.max_rows = 500

# warning settings
warnings.simplefilter(action='ignore', category=FutureWarning)

### Load MFCC Dataset

In [None]:
from cnn_acoustics_lib import load_cnn_json

X, y, L = load_cnn_json(mfccs_json_path)
print(f"mapping the marine mammals: {L}")

In [None]:
# Check class counts and that labels are numerical order
np.unique(y, return_counts=True)

### Create Training, Validation, and Test Sets

In [None]:
# create train, validation and test sets
from cnn_acoustics_lib import prepare_datasets

X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(X, y, 0.25, 0.2) # test size, vailidation size

### Build the Model

In [None]:
from cnn_acoustics_lib import build_model

input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3])
model = build_model(input_shape)
print(model.summary())

In [None]:
# compile the network
optimizer = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, 
              loss="sparse_categorical_crossentropy",
              metrics=['accuracy'])

In [None]:
from cnn_acoustics_lib import plot_history

# Train the cnn 
history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=20)

# plot accuracy/error for training and validation
plot_history(history, accuracy_loss_image)

In [None]:
# evaluate the cnn on the test set
test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print("Accuracy on test set is: {}".format(test_accuracy))

# Confusion Matrix

In [None]:
# iterate each sample of X_test, predict, and store predicted index into a numpy array.
ypred_array = np.array([])

for i in range(len(X_test)):
    X = X_test[i]
    X = X[np.newaxis, ...] # to make a 4D
    
    # prediction
    prediction = model.predict(X)
    
    # extract the index with the max value
    predicted_index = np.argmax(prediction, axis=1)
    ypred_array = np.append(ypred_array, predicted_index)

In [None]:
confusion_mtx = tf.math.confusion_matrix(y_test, ypred_array) 
num_rows = confusion_mtx.get_shape().as_list()[0] #we need these to remove (slice)
num_cols = confusion_mtx.get_shape().as_list()[1]
confusion_mtx= tf.slice(confusion_mtx, [1,1], [num_rows-1, num_cols-1])
# the second argument of the function is offset for beginning the slice, the third argument is the shape of the resulting matrix

plt.figure(figsize=(5, 4))
sns.heatmap(confusion_mtx, xticklabels=L, yticklabels=L, 
            annot=True, fmt='g')
plt.xlabel('Prediction')
plt.ylabel('Label')
plt.title("Confusion Matrix of CNN")
plt.savefig(confusion_mat_image, bbox_inches='tight')
plt.show()

### Predict a sample

In [None]:
from cnn_acoustics_lib import predict
# make a prediction on a sample
X = X_test[65]
y = y_test[65]
predict(model, X, y)

### Save the model

In [None]:
# save the entire model
model.save('saved_model/FinalCnnModel6_2')