In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
import librosa
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder


In [None]:
data_path = '/content/drive/MyDrive/Envision2024/data'
def load_data(dataset_folder, normalize=True, n_mels=128):
    data = []
    labels = []
    max_length = 0  # Initialize max_length to track the maximum length

    for species_folder in os.listdir(dataset_folder):
        species_path = os.path.join(dataset_folder, species_folder)

        if os.path.isdir(species_path):
            for filename in os.listdir(species_path):
                audio_path = os.path.join(species_path, filename)
                signal, sr = librosa.load(audio_path, sr=None)
                mel = librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=n_mels)
                mel_db = librosa.amplitude_to_db(abs(mel))

                # If normalization is required
                if normalize:
                    mel_db = librosa.util.normalize(mel_db)

                data.append(mel_db.T)  # Transpose Mel spectrograms to match shape (time_steps, n_mels)
                labels.append(species_folder)
                max_length = max(max_length, mel_db.shape[1])  # Update max_length

                for i in range(len(data)):
                  if data[i].shape[0] < max_length:
                      # Pad shorter sequences with zeros
                      padding = np.zeros((max_length - data[i].shape[0], data[i].shape[1]))
                      data[i] = np.vstack((data[i], padding))
                  elif data[i].shape[0] > max_length:
                      # Truncate longer sequences
                      data[i] = data[i][:max_length]

    return data, labels, max_length





In [None]:
data_n, labels, max_length = load_data(data_path,n_mels = 13)
labels[0]

'Dicrurus andamanensis'

In [None]:
labels = np.array(labels)
data = np.array(data_n)
data.shape

(129, 15028, 13)

In [None]:
np.save(f"{data_path}/mel_n", data)
np.save(f"{data_path}/labels_1", labels)

In [None]:
data.tofile('mel_n.csv', sep = ',')

In [None]:
labels

array(['Dicrurus andamanensis', 'Dicrurus andamanensis',
       'Dicrurus andamanensis', 'Dicrurus andamanensis',
       'Dicrurus andamanensis', 'Dicrurus andamanensis',
       'Dicrurus andamanensis', 'Dicrurus andamanensis',
       'Dicrurus andamanensis', 'Dicrurus andamanensis',
       'Dicrurus andamanensis', 'Dicrurus andamanensis',
       'Dicrurus andamanensis', 'Dicrurus andamanensis',
       'Dicrurus andamanensis', 'Dicrurus andamanensis',
       'Dicrurus andamanensis', 'Dicrurus andamanensis',
       'Dicrurus andamanensis', 'Dicrurus andamanensis',
       'Alcippe cinerea', 'Alcippe cinerea', 'Alcippe cinerea',
       'Alcippe cinerea', 'Alcippe cinerea', 'Alcippe cinerea',
       'Alcippe cinerea', 'Alcippe cinerea', 'Alcippe cinerea',
       'Alcippe cinerea', 'Alcippe cinerea', 'Alcippe cinerea',
       'Alcippe cinerea', 'Alcippe cinerea', 'Alcippe cinerea',
       'Alcippe cinerea', 'Alcippe cinerea', 'Alcippe cinerea',
       'Alcippe cinerea', 'Alcippe cinerea', '