In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# shared_folder_path = '/content/drive/MyDrive/Envision_Dataset'
# # https://drive.google.com/drive/folders/1Vq1Oj9-y2MmQOCc1Py0_SpwEefT7bdS3?usp=drive_link
# # List contents of the shared folder
# import os
# os.listdir(shared_folder_path)
# import shutil
# shutil.copytree(shared_folder_path, '/content/drive/MyDrive/Envision2024/dataset')  # Replace '/content/data' with your desired destination path

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [None]:
# Define the path to the audio data
data_path = '/content/drive/MyDrive/Envision2024/dataset'
def load_data(dataset_folder):
    data = []
    labels = []
    max_length = 0  # Initialize max_length to track the maximum length
    for species_folder in os.listdir(dataset_folder):
        species_path = os.path.join(dataset_folder, species_folder)
        if os.path.isdir(species_path):
            for filename in os.listdir(species_path):
                audio_path = os.path.join(species_path, filename)
                signal, sr = librosa.load(audio_path, sr=None)
                mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
                data.append(mfccs.T)  # Transpose MFCCs to match shape (time_steps, n_mfcc)
                labels.append(species_folder)
                max_length = max(max_length, mfccs.shape[1])  # Update max_length
    return data, labels, max_length

In [None]:
data, labels, max_length = load_data(data_path)
labels[0]

'Dicrurus andamanensis'

In [None]:
# Assuming max_length is the desired length for all sequences
for i in range(len(data)):
    if data[i].shape[0] < max_length:
        # Pad shorter sequences with zeros
        padding = np.zeros((max_length - data[i].shape[0], data[i].shape[1]))
        data[i] = np.vstack((data[i], padding))
    elif data[i].shape[0] > max_length:
        # Truncate longer sequences
        data[i] = data[i][:max_length]


In [None]:
labels = np.array(labels)
data = np.array(data)

In [None]:
print(data.shape)
# data = np.expand_dims(data, axis=-1)

(129, 15028, 13)


In [None]:
data_copy = data

In [None]:
labels[0]

'Dicrurus andamanensis'

In [None]:
from sklearn.preprocessing import LabelEncoder


# Assuming labels is a list or array containing your original labels
# Convert labels to integer encoding
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Get the number of unique classes
num_classes = len(label_encoder.classes_)

# Convert integer labels to categorical format using one-hot encoding
labels_categorical = to_categorical(labels_encoded, num_classes=num_classes)


In [None]:
num_classes

5

In [None]:
data_copy.shape

(129, 15028, 13)

# Cnn2d 2 conv layers  one flat and 1 dense layer no dropout

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data_copy, labels_categorical, test_size=0.2, random_state=42)  # Split data

X_train = X_train.reshape(-1, 15028, 13, 1)
X_test = X_test.reshape(-1, 15028, 13, 1)

In [None]:
X_train.shape

(103, 15028, 13, 1)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(15028, 13, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

In [None]:

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10)


loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 0.5


# 2 conv layers  1 dense layer dropout (0.25, .24, 0.5 respectively)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data_copy, labels_categorical, test_size=0.2, random_state=42)  # Split data

X_train = X_train.reshape(-1, 15028, 13, 1)
X_test = X_test.reshape(-1, 15028, 13, 1)
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(15028, 13, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


early_stopping = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, callbacks=[early_stopping])
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Test Accuracy: 0.26923078298568726


# 2 conv layers dense layer dropout with **L2  regularization**

In [None]:
from tensorflow.keras import regularizers
X_train, X_test, y_train, y_test = train_test_split(data_copy, labels_categorical, test_size=0.2, random_state=42)  # Split data
X_train = X_train.reshape(-1, 15028, 13, 1)
X_test = X_test.reshape(-1, 15028, 13, 1)


model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu',
                           kernel_regularizer=regularizers.l2(0.01),
                           input_shape=(15028, 13, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu',
                           kernel_regularizer=regularizers.l2(0.01)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu',
                          kernel_regularizer=regularizers.l2(0.01)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


early_stopping = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, callbacks=[early_stopping])


loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Test Accuracy: 0.3461538553237915


In [None]:
# def normalize_data(data, max_length):
#   for i in range(len(data)):
#         if data[i].shape[0] < max_length:
#               # Pad shorter sequences with zeros
#           padding = np.zeros((max_length - data[i].shape[0], data[i].shape[1]))
#           data[i] = np.vstack((data[i], padding))
#         elif data[i].shape[0] > max_length:
#             # Truncate longer sequences
#           data[i] = data[i][:max_length]

#           # Normalize MFCC features between -1 and 1
#         data_min_val = np.min(data[i])
#         data_max_val = np.max(data[i])
#         data_range = data_max_val - data_min_val
#         data[i] = -1 + 2 * ((data[i] - data_min_val) / data_range)

#   return data

array([[[-1.        ,  0.73380252,  0.73380252, ...,  0.73380252,
          0.73380252,  0.73380252],
        [-1.        ,  0.73380252,  0.73380252, ...,  0.73380252,
          0.73380252,  0.73380252],
        [-1.        ,  0.73380252,  0.73380252, ...,  0.73380252,
          0.73380252,  0.73380252],
        ...,
        [ 0.73380252,  0.73380252,  0.73380252, ...,  0.73380252,
          0.73380252,  0.73380252],
        [ 0.73380252,  0.73380252,  0.73380252, ...,  0.73380252,
          0.73380252,  0.73380252],
        [ 0.73380252,  0.73380252,  0.73380252, ...,  0.73380252,
          0.73380252,  0.73380252]],

       [[-1.        ,  0.64675808,  0.64675808, ...,  0.64675808,
          0.64675808,  0.64675808],
        [-1.        ,  0.64675808,  0.64675808, ...,  0.64675808,
          0.64675808,  0.64675808],
        [-1.        ,  0.64675808,  0.64675808, ...,  0.64675808,
          0.64675808,  0.64675808],
        ...,
        [ 0.64675808,  0.64675808,  0.64675808, ...,  

In [None]:
data

array([[[-6.02393616e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [-6.02393616e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [-6.02393616e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        ...,
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]],

       [[-6.42114136e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [-6.42114136e+02,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e