#### * Load required libraries

In [16]:
import sys
import os
import IPython
import math
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
%matplotlib inline

import random
from datetime import datetime
#from include import helpers

from tensorflow.keras import backend as keras_backend
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout,Flatten,BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint 
from tensorflow.keras.regularizers import l2

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix

#### * Load MFCC data

Data is an NumPy float32 array of shape (8732, 40, 174), 8732 samples with 40 MFCC coefficients and 174 frames. Each feature was zero-centered and scaled between -1 and 1 during extraction.<br/>
<br/>

In [17]:
data_path = os.path.abspath('C:\\Users\\LENOVO\\Desktop\\SE\\Project')
# Define a labels array for future use
# Pre-processed MFCC coefficients
X = np.load(data_path+"\\train_dataset\\X-mfcc.npy")
y = np.load(data_path+"\\train_dataset\\y-mfcc.npy")

# Metadata
metadata = pd.read_csv(data_path+"\\train_dataset\\train_data.csv",encoding='unicode_escape')
labels = metadata['ebird_code'].unique()
print(labels.shape,X.shape,y.shape)

(12,) (923, 40, 431) (923,)


### 1. Data preparation: features + metadata

#### 1.1 Train / Test split

Note that we are using the same index order for both the MFCC arrays and the metadata to keep track of the origin of each feature.<br/>
<br/>

In [18]:
indexes = []
total = len(metadata)
indexes = list(range(0, total))

# Randomize indexes
random.shuffle(indexes)

# Divide the indexes into Train and Test
test_split_pct = 20
split_offset = math.floor(test_split_pct * total / 100)

# Split the metadata
test_split_idx = indexes[0:split_offset]
train_split_idx = indexes[split_offset:total]


# Split the features with the same indexes
X_test = np.take(X, test_split_idx, axis=0)
y_test = np.take(y, test_split_idx, axis=0)
X_train = np.take(X, train_split_idx, axis=0)
y_train = np.take(y, train_split_idx, axis=0)

# Also split metadata
test_meta = metadata.iloc[test_split_idx]
train_meta = metadata.iloc[train_split_idx]

# Print status
print("Test split: {} \t\t Train split: {}".format(len(test_meta), len(train_meta)))
print("X test shape: {} \t X train shape: {}".format(X_test.shape, X_train.shape))
print("y test shape: {} \t\t y train shape: {}".format(y_test.shape, y_train.shape))

Test split: 184 		 Train split: 739
X test shape: (184, 40, 431) 	 X train shape: (739, 40, 431)
y test shape: (184,) 		 y train shape: (739,)


#### 1.2 One hot encode labels

In [19]:
le = LabelEncoder()
y_test_encoded = to_categorical(le.fit_transform(y_test))
y_train_encoded = to_categorical(le.fit_transform(y_train))

#### 1.3 Reshape data

In [21]:
# How data should be structured
num_rows = 40
num_columns = 431 
num_channels = 1

# Reshape to fit the network input (channel last)
X_train = X_train.reshape(X_train.shape[0], num_rows, num_columns, num_channels)
X_test = X_test.reshape(X_test.shape[0], num_rows, num_columns, num_channels)

# Total number of labels to predict (equal to the network output nodes)
num_labels = y_train_encoded.shape[1]

#### 2.1 Model definition



In [31]:
# Create a secquential object
model = Sequential()


# Conv 1
model.add(Conv2D(filters=32,kernel_size=(3,3),input_shape=(num_rows, num_columns, num_channels)))
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Dropout(0.3))
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Dropout(0.25))
model.add(BatchNormalization())
model.add(Flatten())
#model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
# Softmax output
model.add(Dense(num_labels, activation='softmax'))

In [32]:
adam = Adam(lr=0.001)
model.compile(
    loss='categorical_crossentropy', 
    metrics=['accuracy'], 
    optimizer=adam)
# Display model architecture summary 
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_12 (Conv2D)          (None, 38, 429, 32)       320       
                                                                 
 conv2d_13 (Conv2D)          (None, 36, 427, 64)       18496     
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 12, 142, 64)      0         
 2D)                                                             
                                                                 
 dropout_12 (Dropout)        (None, 12, 142, 64)       0         
                                                                 
 conv2d_14 (Conv2D)          (None, 10, 140, 64)       36928     
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 3, 46, 64)        0         
 2D)                                                  

#### 2.3 Training the model

In [33]:
num_epochs = 60
num_batch_size = 128
model_file = 'cnn_model1.hdf5'
model_path = "C:\\Users\\LENOVO\\Desktop\SE\\Project\\"+model_file


# Save checkpoints
checkpointer = ModelCheckpoint(filepath=model_path, 
                               verbose=1, 
                               save_best_only=True)
start = datetime.now()
history = model.fit(X_train, 
                    y_train_encoded, 
                    batch_size=num_batch_size, 
                    epochs=num_epochs, 
                    validation_split=1/12.,
                    callbacks=[checkpointer], 
                    verbose=1)

duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/60
Epoch 1: val_loss improved from inf to 2.48708, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 2/60
Epoch 2: val_loss improved from 2.48708 to 2.48071, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 3/60
Epoch 3: val_loss improved from 2.48071 to 2.47002, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 4/60
Epoch 4: val_loss improved from 2.47002 to 2.46069, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 5/60
Epoch 5: val_loss improved from 2.46069 to 2.44685, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 6/60
Epoch 6: val_loss improved from 2.44685 to 2.42416, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 7/60
Epoch 7: val_loss improved from 2.42416 to 2.41291, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 8/60
Epoch 8: val_loss improved from 2.41291 to 2.40595, saving model to C:\Users\LENOVO

In [41]:
def evaluate_model(model, X_train, y_train, X_test, y_test):
    train_score = model.evaluate(X_train, y_train, verbose=0)
    test_score = model.evaluate(X_test, y_test, verbose=0)
    return train_score, test_score

In [42]:
def model_evaluation_report(model, X_train, y_train, X_test, y_test, calc_normal=True):
    dash = '-' * 38

    # Compute scores
    train_score, test_score = evaluate_model(model, X_train, y_train, X_test, y_test)

    # Pint Train vs Test report
    print('{:<10s}{:>14s}{:>14s}'.format("", "LOSS", "ACCURACY"))
    print(dash)
    print('{:<10s}{:>14.4f}{:>14.4f}'.format( "Training:", train_score[0], 100 * train_score[1]))
    print('{:<10s}{:>14.4f}{:>14.4f}'.format( "Test:", test_score[0], 100 * test_score[1]))


    # Calculate and report normalized error difference?
    if (calc_normal):
        max_err = max(train_score[0], test_score[0])
        error_diff = max_err - min(train_score[0], test_score[0])
        normal_diff = error_diff * 100 / max_err
        print('{:<10s}{:>13.2f}{:>1s}'.format("Normal diff ", normal_diff, ""))

In [43]:
# Load best saved model
model = load_model(model_path)

model_evaluation_report(model, X_train, y_train_encoded, X_test, y_test_encoded)

                    LOSS      ACCURACY
--------------------------------------
Training:         0.3914       96.3464
Test:             1.3300       57.6087
Normal diff         70.57 


The error difference between Train and Test is small. To measure this different I'm using **the normalized difference between Train and Test error**, I report this as **norm diff** and show it as a percentage. If this difference is somewhere around 3.5% I will estimate the model is not overfitting. If it's negative, it will most probably be underfit.<br/>
<br/>
Train and test loss scores are similar and so the accuracy, with a normal diff of 3.8%, acceptable, but also just about to start overfitting as this amount of loss difference is hardly recoverable with more training.<br/>
With **89.69%** test accuracy we are over the mean score achieved by previous works on this dataset using MFCC features and CNN networks.<br/>
<br/>

#### 2.5 Train vs Test history plot

In [44]:
def plot_train_history(history, x_ticks_vertical=False):
    history = history.history

    # min loss / max accs
    min_loss = min(history['loss'])
    min_val_loss = min(history['val_loss'])
    max_accuracy = max(history['accuracy'])
    max_val_accuracy = max(history['val_accuracy'])

    # x pos for loss / acc min/max
    min_loss_x = history['loss'].index(min_loss)
    min_val_loss_x = history['val_loss'].index(min_val_loss)
    max_accuracy_x = history['accuracy'].index(max_accuracy)
    max_val_accuracy_x = history['val_accuracy'].index(max_val_accuracy)

    # summarize history for loss, display min
    plt.figure(figsize=(16,8))
    plt.plot(history['loss'], color="#1f77b4", alpha=0.7)
    plt.plot(history['val_loss'], color="#ff7f0e", linestyle="--")
    plt.plot(min_loss_x, min_loss, marker='o', markersize=3, color="#1f77b4", alpha=0.7, label='Inline label')
    plt.plot(min_val_loss_x, min_val_loss, marker='o', markersize=3, color="#ff7f0e", alpha=7, label='Inline label')
    plt.title('Model loss', fontsize=20)
    plt.ylabel('Loss', fontsize=16)
    plt.xlabel('Epoch', fontsize=16)
    plt.legend(['Train', 
                'Test', 
                ('%.3f' % min_loss), 
                ('%.3f' % min_val_loss)], 
                loc='upper right', 
                fancybox=True, 
                framealpha=0.9, 
                shadow=True, 
                borderpad=1)

    if (x_ticks_vertical):
        plt.xticks(np.arange(0, len(history['loss']), 5.0), rotation='vertical')
    else:
        plt.xticks(np.arange(0, len(history['loss']), 5.0))

    plt.show()

    # summarize history for accuracy, display max
    plt.figure(figsize=(16,6))
    plt.plot(history['accuracy'], alpha=0.7)
    plt.plot(history['val_accuracy'], linestyle="--")
    plt.plot(max_accuracy_x, max_accuracy, marker='o', markersize=3, color="#1f77b4", alpha=7)
    plt.plot(max_val_accuracy_x, max_val_accuracy, marker='o', markersize=3, color="orange", alpha=7)
    plt.title('Model accuracy', fontsize=20)
    plt.ylabel('Accuracy', fontsize=16)
    plt.xlabel('Epoch', fontsize=16)
    plt.legend(['Train', 
                'Test', 
                ('%.2f' % max_accuracy), 
                ('%.2f' % max_val_accuracy)], 
                loc='upper left', 
                fancybox=True, 
                framealpha=0.9, 
                shadow=True, 
                borderpad=1)
    plt.figure(num=1, figsize=(10, 6))

    if (x_ticks_vertical):
        plt.xticks(np.arange(0, len(history['accuracy']), 5.0), rotation='vertical')
    else:
        plt.xticks(np.arange(0, len(history['accuracy']), 5.0))

    plt.show()

The generalization gap in the plot is quite smooth, which is a good sign. We could use a larger batch size and a smoother curve -as the direction of the gradient would be more certain-, but on my test it tended much more to overfit with **256** than **128**. Lower batch size values will add some kind of regularization factor to training -because the direction of the gradient becomes less certain-.<br/>
<br/>
Note that test error is lower than train error: this normal as during training the error is calculated while the model is using dropout (what adds more difficulty).<br/>
<br/>


In [46]:
# Predict probabilities for test set
y_probs = model.predict(X_test, verbose=0)

# Get predicted labels
yhat_probs = np.argmax(y_probs, axis=1)
y_trues = np.argmax(y_test_encoded, axis=1)

# Add "pred" column
test_meta['pred'] = yhat_probs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_meta['pred'] = yhat_probs


In [47]:
# Build classification report
re = classification_report(y_trues, yhat_probs, labels=[0,1,2,3,4,5,6,7,8,9], target_names=labels)
print(re)

              precision    recall  f1-score   support

      amewig       0.27      0.50      0.35         6
      amewoo       0.38      0.59      0.47        17
      amtspa       0.50      0.42      0.45        12
      annhum       0.67      0.73      0.70        22
      astfly       0.83      0.71      0.77        21
      baisan       0.50      0.36      0.42        11
      baleag       0.44      0.67      0.53         6
      balori       0.58      0.65      0.61        17
      banswa       0.58      0.35      0.44        20
      barswa       0.65      0.79      0.71        19

   micro avg       0.56      0.60      0.58       151
   macro avg       0.54      0.58      0.55       151
weighted avg       0.59      0.60      0.58       151



