In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
import librosa
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder


In [None]:
# Define the path to the audio data
data_path = '/content/drive/MyDrive/Envision2024/dataset'
def load_data(dataset_folder, normalize = False, n_mfcc=13):
    data = []
    labels = []
    max_length = 0  # Initialize max_length to track the maximum length
    for species_folder in os.listdir(dataset_folder):
        species_path = os.path.join(dataset_folder, species_folder)
        if os.path.isdir(species_path):
            for filename in os.listdir(species_path):
                audio_path = os.path.join(species_path, filename)
                signal, sr = librosa.load(audio_path, sr=None)
                mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc)
                if normalize:
                  mfccs = librosa.util.normalize(mfccs)
                data.append(mfccs.T)  # Transpose MFCCs to match shape (time_steps, n_mfcc)
                labels.append(species_folder)
                max_length = max(max_length, mfccs.shape[1])  # Update max_length
    return data, labels, max_length

In [None]:
data, labels, max_length = load_data(data_path, False, 40)
labels[0]

'Dicrurus andamanensis'

In [None]:
data_n, labels, max_length = load_data(data_path, True, 40)

In [None]:
# Assuming max_length is the desired length for all sequences
for i in range(len(data)):
    if data[i].shape[0] < max_length:
        # Pad shorter sequences with zeros
        padding = np.zeros((max_length - data[i].shape[0], data[i].shape[1]))
        data[i] = np.vstack((data[i], padding))
    elif data[i].shape[0] > max_length:
        # Truncate longer sequences
        data[i] = data[i][:max_length]


In [None]:
labels = np.array(labels)
data = np.array(data)
data.shape

(129, 15028, 40)

In [None]:
def add_padding(data, max_length):
  for i in range(len(data)):
    if data[i].shape[0] < max_length:
        # Pad shorter sequences with zeros
        padding = np.zeros((max_length - data[i].shape[0], data[i].shape[1]))
        data[i] = np.vstack((data[i], padding))
    elif data[i].shape[0] > max_length:
        # Truncate longer sequences
        data[i] = data[i][:max_length]
  return data

In [None]:
data_n = add_padding(data_n,max_length)

In [None]:
# Assuming labels is a list or array containing your original labels
# Convert labels to integer encoding
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Get the number of unique classes
num_classes = len(label_encoder.classes_)
# Convert integer labels to categorical format using one-hot encoding
labels_categorical = to_categorical(labels_encoded, num_classes=num_classes)


In [None]:
np.save(f"{data_path}/mfcc_40", data)
np.save(f"{data_path}/labels_40", labels)

In [None]:
np.save(f"{data_path}/mfcc_normalized_40", data_n)
