#### * Load required libraries

In [1]:
import sys
import os
import IPython
import math
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
%matplotlib inline

import random
from datetime import datetime

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout,Flatten,BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint 
from tensorflow.keras.regularizers import l2

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix

#### * Load MFCC data

In [7]:
data_path = os.path.abspath('C:\\Users\\LENOVO\\Desktop\\SE\\Project')
# Define a labels array for future use
# Pre-processed MFCC coefficients
X = np.load(data_path+"\\train_dataset\\X-mfcc.npy")
y = np.load(data_path+"\\train_dataset\\y-mfcc.npy")

# Metadata
metadata = pd.read_csv(data_path+"\\train_dataset\\train_data.csv",encoding='unicode_escape')
labels = metadata['ebird_code'].unique()
print(labels.shape,X.shape,y.shape)

(12,) (923, 40, 431) (923,)


### 1. Data preparation: features + metadata

#### 1.1 Train / Test split

Note that we are using the same index order for both the MFCC arrays and the metadata to keep track of the origin of each feature.<br/>
<br/>

In [8]:
indexes = []
total = len(metadata)
indexes = list(range(0, total))

# Randomize indexes
random.shuffle(indexes)

# Divide the indexes into Train and Test
test_split_pct = 15
split_offset = math.floor(test_split_pct * total / 100)

# Split the metadata
test_split_idx = indexes[0:split_offset]
train_split_idx = indexes[split_offset:total]


# Split the features with the same indexes
X_test = np.take(X, test_split_idx, axis=0)
y_test = np.take(y, test_split_idx, axis=0)
X_train = np.take(X, train_split_idx, axis=0)
y_train = np.take(y, train_split_idx, axis=0)

# Also split metadata
test_meta = metadata.iloc[test_split_idx]
train_meta = metadata.iloc[train_split_idx]

# Print status
print("Test split: {} \t\t Train split: {}".format(len(test_meta), len(train_meta)))
print("X test shape: {} \t X train shape: {}".format(X_test.shape, X_train.shape))
print("y test shape: {} \t\t y train shape: {}".format(y_test.shape, y_train.shape))

Test split: 138 		 Train split: 785
X test shape: (138, 40, 431) 	 X train shape: (785, 40, 431)
y test shape: (138,) 		 y train shape: (785,)


#### 1.2 One hot encode labels

In [9]:
le = LabelEncoder()
y_test_encoded = to_categorical(le.fit_transform(y_test))
y_train_encoded = to_categorical(le.fit_transform(y_train))

#### 1.3 Reshape data

In [10]:
# How data should be structured
num_rows = 40
num_columns = 431 
num_channels = 1

# Reshape to fit the network input (channel last)
X_train = X_train.reshape(X_train.shape[0], num_rows, num_columns, num_channels)
X_test = X_test.reshape(X_test.shape[0], num_rows, num_columns, num_channels)

# Total number of labels to predict (equal to the network output nodes)
num_labels = y_train_encoded.shape[1]
print("X test shape: {} \t X train shape: {}".format(X_test.shape, X_train.shape))
print("y test shape: {} \t\t y train shape: {}".format(y_test.shape, y_train.shape))

X test shape: (138, 40, 431, 1) 	 X train shape: (785, 40, 431, 1)
y test shape: (138,) 		 y train shape: (785,)


#### 2.1 Model definition



In [9]:
# Create a secquential object
model = Sequential()


# Conv 1
model.add(Conv2D(filters=32,kernel_size=(3,3),input_shape=(num_rows, num_columns, num_channels)))
model.add(Conv2D(64, kernel_size=(3,3), activation='relu',kernel_regularizer=l2(l=0.01)))
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Dropout(0.3))
model.add(Conv2D(64, kernel_size=(3,3), activation='relu',kernel_regularizer=l2(l=0.01)))
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Dropout(0.25))
model.add(BatchNormalization())
model.add(Flatten())
#model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
# Softmax output
model.add(Dense(num_labels, activation='softmax'))

In [10]:
adam = Adam(lr=0.001)
model.compile(
    loss='categorical_crossentropy', 
    metrics=['accuracy'], 
    optimizer=adam)
# Display model architecture summary 
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 38, 429, 32)       320       
                                                                 
 conv2d_2 (Conv2D)           (None, 36, 427, 64)       18496     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 142, 64)      0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 12, 142, 64)       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 10, 140, 64)       36928     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 3, 46, 64)        0         
 2D)                                                  

  super().__init__(name, **kwargs)


#### 2.3 Training the model

In [12]:
num_epochs = 60
num_batch_size = 128
model_file = 'cnn_model2.hdf5'
model_path = "C:\\Users\\LENOVO\\Desktop\SE\\Project\\"+model_file


# Save checkpoints
checkpointer = ModelCheckpoint(filepath=model_path, 
                               verbose=1, 
                               save_best_only=True)
history = model.fit(X_train, 
                    y_train_encoded, 
                    batch_size=num_batch_size, 
                    epochs=num_epochs, 
                    validation_split=1/12.,
                    callbacks=[checkpointer], 
                    verbose=1)

Epoch 1/60
Epoch 1: val_loss improved from inf to 3.48610, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 2/60
Epoch 2: val_loss improved from 3.48610 to 3.43567, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 3/60
Epoch 3: val_loss improved from 3.43567 to 3.37271, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 4/60
Epoch 4: val_loss improved from 3.37271 to 3.31068, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 5/60
Epoch 5: val_loss improved from 3.31068 to 3.26702, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 6/60
Epoch 6: val_loss improved from 3.26702 to 3.20436, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 7/60
Epoch 7: val_loss improved from 3.20436 to 3.16846, saving model to C:\Users\LENOVO\Desktop\SE\Project\cnn_model1.hdf5
Epoch 8/60
Epoch 8: val_loss improved from 3.16846 to 3.11166, saving model to C:\Users\LENOVO

In [13]:
def evaluate_model(model, X_train, y_train, X_test, y_test):
    train_score = model.evaluate(X_train, y_train, verbose=0)
    test_score = model.evaluate(X_test, y_test, verbose=0)
    return train_score, test_score

In [14]:
def model_evaluation_report(model, X_train, y_train, X_test, y_test, calc_normal=True):
    dash = '-' * 38

    # Compute scores
    train_score, test_score = evaluate_model(model, X_train, y_train, X_test, y_test)

    # Pint Train vs Test report
    print('{:<10s}{:>14s}{:>14s}'.format("", "LOSS", "ACCURACY"))
    print(dash)
    print('{:<10s}{:>14.4f}{:>14.4f}'.format( "Training:", train_score[0], 100 * train_score[1]))
    print('{:<10s}{:>14.4f}{:>14.4f}'.format( "Test:", test_score[0], 100 * test_score[1]))


    # Calculate and report normalized error difference?
    if (calc_normal):
        max_err = max(train_score[0], test_score[0])
        error_diff = max_err - min(train_score[0], test_score[0])
        normal_diff = error_diff * 100 / max_err
        print('{:<10s}{:>13.2f}{:>1s}'.format("Normal diff ", normal_diff, ""))

In [15]:
# Load best saved model
model = load_model(model_path)

model_evaluation_report(model, X_train, y_train_encoded, X_test, y_test_encoded)

                    LOSS      ACCURACY
--------------------------------------
Training:         0.7302       95.6688
Test:             1.5714       63.0435
Normal diff         53.53 


In [16]:
# Predict probabilities for test set
y_probs = model.predict(X_test, verbose=0)
print(y_probs[0:5])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_meta['pred'] = yhat_probs


In [None]:
# Get predicted labels
yhat_probs = np.argmax(y_probs, axis=1)
y_trues = np.argmax(y_test_encoded, axis=1)

# Add "pred" column
test_meta['pred'] = yhat_probs

In [19]:
# Build classification report
re = classification_report(y_trues, yhat_probs, labels=[0,1,2,3,4,5,6,7,8,9,10,11], target_names=labels)
print(re)

              precision    recall  f1-score   support

      amewig       0.67      0.50      0.57         4
      amewoo       0.70      0.44      0.54        16
      amtspa       0.70      0.78      0.74         9
      annhum       0.62      0.80      0.70        10
      astfly       0.54      0.93      0.68        14
      baisan       0.40      0.17      0.24        12
      baleag       0.50      0.25      0.33         4
      balori       0.54      0.68      0.60        19
      banswa       0.67      0.67      0.67         9
      barswa       0.68      0.93      0.79        14
      bawwar       0.75      0.75      0.75        16
     belkin1       1.00      0.27      0.43        11

    accuracy                           0.63       138
   macro avg       0.65      0.60      0.59       138
weighted avg       0.65      0.63      0.60       138

