<a href="https://colab.research.google.com/github/Anirudh-p1107/Animal_sound/blob/main/index.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# connect the dataset from google drive
from google.colab import drive
drive.mount('/content/drive')

# installing required libraries
!pip install librosa
!pip install soundfile
!pip install tensorflow

# importing all the necessary libraries
import numpy as np
import os
import librosa
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight

# Set seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Path and Parameters
DATASET_PATH = "/content/drive/MyDrive/Train"
N_MELS, MAX_PAD_LEN, SAMPLE_RATE = 128, 174, 22050

# Mel spectogram
def extract_mel_spectrogram(file_path):
    try:
        y, sr = librosa.load(file_path, sr=SAMPLE_RATE)
        y, _ = librosa.effects.trim(y, top_db=20)
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=N_MELS, n_fft=2048, hop_length=256, fmax=8000)
        mel_spec_db = librosa.power_to_db(mel_spec + 1e-9, ref=np.max)
        mel_spec_norm = (mel_spec_db - np.min(mel_spec_db)) / (np.max(mel_spec_db) - np.min(mel_spec_db) + 1e-9)
        mel_spec_padded = np.zeros((N_MELS, MAX_PAD_LEN))
        mel_spec_padded[:, :min(MAX_PAD_LEN, mel_spec_norm.shape[1])] = mel_spec_norm[:, :MAX_PAD_LEN]
        return mel_spec_padded
    except:
        return None

# Data Processing
X, y, labels = [], [], {}
for idx, animal in enumerate(os.listdir(DATASET_PATH)):
    animal_folder = os.path.join(DATASET_PATH, animal)
    if os.path.isdir(animal_folder):
        labels[idx] = animal
        for file in filter(lambda f: f.endswith(".wav"), os.listdir(animal_folder)):
            feature = extract_mel_spectrogram(os.path.join(animal_folder, file))
            if feature is not None:
                X.append(feature)
                y.append(idx)

# Convert to numpy arrays
if X:
    X, y = np.array(X)[..., np.newaxis], to_categorical(y, num_classes=len(labels))
else:
    raise ValueError("No valid spectrograms found in dataset.")

# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=np.argmax(y, axis=1))
class_weight_dict = dict(enumerate(compute_class_weight('balanced', classes=np.unique(np.argmax(y, axis=1)), y=np.argmax(y, axis=1))))

# CNN Model
def create_model(input_shape, num_classes):
    model = Sequential([
        Input(shape=input_shape),
        Conv2D(32, (3, 3), activation='relu', padding='same'), BatchNormalization(), MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu', padding='same'), BatchNormalization(), MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu', padding='same'), BatchNormalization(), MaxPooling2D((2, 2)),
        Conv2D(256, (3, 3), activation='relu', padding='same'), BatchNormalization(), MaxPooling2D((2, 2)),
        Flatten(),
        Dense(512, activation='relu'), BatchNormalization(), Dropout(0.5),
        Dense(256, activation='relu'), Dropout(0.3), Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model((N_MELS, MAX_PAD_LEN, 1), len(labels))
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test),
          class_weight=class_weight_dict,
          callbacks=[ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-5, verbose=1),
                     EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True, verbose=1),
                     ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True, verbose=1)],
          verbose=1)

model.save("animal_audio.keras")


# Predictions
def predict_animal(AUDIO_FILE):
    """ Predicts the animal class from an audio file. """
    # Convert audio to Mel spectrogram
    spectrogram = extract_mel_spectrogram(AUDIO_FILE)

    # Reshape for CNN input (add batch and channel dimensions)
    spectrogram = spectrogram[np.newaxis, ..., np.newaxis]  # Shape: (1, 128, 128, 1)

    # Make prediction
    predictions = model.predict(spectrogram)
    predicted_label = np.argmax(predictions)

    print(f"Predicted Animal: {labels[predicted_label]} (Confidence: {predictions[0][predicted_label]:.2f})")

                                                            # output
AUDIO_FILE = "/content/drive/MyDrive/Test/Lion_9.wav"       # lion
predict_animal(AUDIO_FILE)

AUDIO_FILE = "/content/drive/MyDrive/Test/Monkey_51.wav"       # monkey
predict_animal(AUDIO_FILE)

AUDIO_FILE = "/content/drive/MyDrive/Test/Sheep_10.wav"       # sheep
predict_animal(AUDIO_FILE)

AUDIO_FILE = "/content/drive/MyDrive/Test/Cow_51.wav"       # cow
predict_animal(AUDIO_FILE)

AUDIO_FILE = "/content/drive/MyDrive/Test/Horse_51.wav"       # horse
predict_animal(AUDIO_FILE)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 1/50
[1m24/25[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - accuracy: 0.2242 - loss: 3.0049
Epoch 1: val_accuracy improved from -inf to 0.10000, saving model to best_model.keras
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 121ms/step - accuracy: 0.2305 - loss: 2.9642 - val_accuracy: 0.1000 - val_loss: 2.3503 - learning_rate: 1.0000e-04
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.6313 - loss: 1.1245
Epoch 2: val_accuracy did not improve from 0.10000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.6313 - loss: 1.1263 - val_accuracy: 0.1000 - val_loss: 2.5225 - learning_rate: 1.0000e-04
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.7494 - loss: 0.8253
Epoch 3: val_accura