In [8]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer

np.complex = complex

# only for audio files (mp3 and wav)
audio_dir = r"C:\Users\User\Downloads\Animal Sounds 5\Animal Sounds"
supported_exts = ['.mp3', '.wav']

# For training
spectrograms = []
types = []
sub_categories = []

# Parse labels: "Type - Subcategory" OR fallback
def parse_labels_from_filename(filename):
    name = os.path.splitext(filename)[0]

    if '-' in name:
        first_dash = name.find('-')
        type_part = name[:first_dash].strip().lower()
        sub_part = name[first_dash + 1:].strip()
        return type_part if type_part else 'unknown', sub_part
    else:
        # No dash? Use fallback logic
        tokens = name.split()
        if len(tokens) >= 2:
            return tokens[0].strip().lower(), ' '.join(tokens[1:]).strip()
        else:
            return 'unknown', name.strip()

# Load audio files and convert to spectrograms
for fname in os.listdir(audio_dir):
    if any(fname.lower().endswith(ext) for ext in supported_exts):
        path = os.path.join(audio_dir, fname)
        try:
            y, sr = librosa.load(path, sr=22050)
            S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
            S_DB = librosa.power_to_db(S, ref=np.max)

            if S_DB.shape[1] < 128:
                pad = 128 - S_DB.shape[1]
                S_DB = np.pad(S_DB, ((0, 0), (0, pad)), mode='constant')
            elif S_DB.shape[1] > 128:
                S_DB = S_DB[:, :128]

            spectrograms.append(S_DB)

            t, sub = parse_labels_from_filename(fname)
            if not t:
                print(f"⚠️ Empty type parsed from: {fname}")
            types.append([t])
            sub_categories.append([sub])

        except Exception as e:
            print(f"⚠️ Skipped {fname}: {e}")



# Convert to numpy arrays
spectrograms = np.expand_dims(np.array(spectrograms), -1)

# Labels one-hot encoding
mlb_types = MultiLabelBinarizer()
mlb_sub_categories = MultiLabelBinarizer()
y_types = mlb_types.fit_transform(types)
y_sub_categories = mlb_sub_categories.fit_transform(sub_categories)

print(f"✅ Loaded {len(spectrograms)} spectrograms")
print("Example labels:", types[:3], sub_categories[:3])


✅ Loaded 32 spectrograms
Example labels: [['unknown'], ['unknown'], ['australian']] [['Black-Currawong-Strepera-fuliginosa'], ['Kingfisher-Todiramphus sanctus'], ['Magpie-Gymnorhina-tibicen']]


In [9]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split

# Define the CNN model
def create_model(input_shape, num_types, num_sub_categories):
    inputs = Input(shape=input_shape)
    x = Conv2D(32, (3, 3), activation='relu')(inputs)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)

    type_output = Dense(num_types, activation='sigmoid', name='type_output')(x)
    sub_category_output = Dense(num_sub_categories, activation='sigmoid', name='sub_category_output')(x)

    model = Model(inputs=inputs, outputs=[type_output, sub_category_output])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics={'type_output': 'accuracy', 'sub_category_output': 'accuracy'})
    return model

# Prepare data
input_shape = (128, 128, 1)
num_types = len(mlb_types.classes_)
num_sub_categories = len(mlb_sub_categories.classes_)

X_train, X_val, y_train_types, y_val_types, y_train_sub_categories, y_val_sub_categories = train_test_split(
    spectrograms, y_types, y_sub_categories, test_size=0.2, random_state=42
)

model = create_model(input_shape, num_types, num_sub_categories)

# Train the model
history = model.fit(
    X_train,
    {'type_output': y_train_types, 'sub_category_output': y_train_sub_categories},
    validation_data=(X_val, {'type_output': y_val_types, 'sub_category_output': y_val_sub_categories}),
    epochs=20,
    batch_size=8
)





Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [10]:
import json

# Construct full dataset list
training_data = []
audio_files = os.listdir(audio_dir)

for fname, t_list, s_list in zip(audio_files, types, sub_categories):
    if any(fname.lower().endswith(ext) for ext in supported_exts):
        training_data.append({
            "file": fname,
            "type": t_list[0],
            "sub_category": s_list[0]
        })

# Output path
output_path = os.path.join(audio_dir, "training_data.json")

# Save as JSON
with open(output_path, "w") as f:
    json.dump(training_data, f, indent=4)

print(f"✅ Saved training data to: {output_path}")


✅ Saved training data to: C:\Users\User\Downloads\Animal Sounds 5\Animal Sounds\training_data.json
