# Using SVM


In [4]:
import matplotlib.pyplot as plt
import pandas as pd
import os
import numpy as np
import pywt
import IPython.display as ipd
import librosa
import librosa.display
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report


%matplotlib inline



mfccs=librosa.feature.mfcc(y=librosa_audio_data,sr=librosa_sample_rate,n_mfcc=40)
print(mfccs.shape)


mfccs


metadata.head(10)



# Define the animal classes
animal_classes = ['cat', 'dog']

# Define the wavelet family and level
wavelet_family = 'db4'
level = 4

# Define the feature vector and label list
feature_vectors = []
labels = []
animal_class_path1 = "../pgm/datasets/cats_dogs/train/"

# Iterate through each animal class
for animal_class in animal_classes:
    # Get the path to the animal class directory
    animal_class_path = animal_class_path1 + animal_class
    # Iterate through each audio file in the directory
    for audio_file in os.listdir(animal_class_path):
        # Load the audio data
        audio_data, _ = librosa.load(os.path.join(animal_class_path, audio_file), sr=44100)
        # Apply wavelet transform
        cA4, cD4, cD3, cD2, cD1 = pywt.wavedec(audio_data, wavelet_family, level=level)
        # Extract statistical features from the wavelet coefficients, including median
        feature_vector = [np.mean(cA4), np.std(cA4), np.median(cA4),
                          np.mean(cD4), np.std(cD4), np.median(cD4),
                          np.mean(cD3), np.std(cD3), np.median(cD3),
                          np.mean(cD2), np.std(cD2), np.median(cD2),
                          np.mean(cD1), np.std(cD1), np.median(cD1)]
        # Append the feature vector and label to the lists
        feature_vectors.append(feature_vector)
        labels.append(animal_class)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(feature_vectors, labels, test_size=0.2, random_state=42)

# Train the SVM classifier
svm = SVC(kernel='linear', C=1, gamma='auto')
svm.fit(X_train, y_train)

# Test the SVM classifier
y_pred = svm.predict(X_test)

# Print the classification report
print(classification_report(y_test, y_pred))

# Testing a sample audio file
test_audio_file = "../pgm/datasets/cats_dogs/test/cats/cat_42.wav"

test_audio_data, _ = librosa.load(test_audio_file, sr=44100)

cA4, cD4, cD3, cD2, cD1 = pywt.wavedec(test_audio_data, wavelet_family, level=level)

test_feature_vector = [np.mean(cA4), np.std(cA4), np.median(cA4),
                        np.mean(cD4), np.std(cD4), np.median(cD4),
                        np.mean(cD3), np.std(cD3), np.median(cD3),
                        np.mean(cD2), np.std(cD2), np.median(cD2),
                        np.mean(cD1), np.std(cD1), np.median(cD1)]

predicted_class = svm.predict([test_feature_vector])

print(f"The predicted class for the test audio is: {predicted_class[0]}")

(40, 500)
              precision    recall  f1-score   support

         cat       0.62      1.00      0.76        21
         dog       1.00      0.38      0.55        21

    accuracy                           0.69        42
   macro avg       0.81      0.69      0.66        42
weighted avg       0.81      0.69      0.66        42

The predicted class for the test audio is: cat


In [5]:
# Import necessary libraries
import os
import numpy as np
import pywt
import librosa
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Define the animal classes
animal_classes = ['cat', 'dog']

# Define the wavelet family and level
wavelet_family = 'db4'
level = 4

# Define the feature vector and label list
feature_vectors = []
labels = []
animal_class_path1 = "../pgm/datasets/cats_dogs/train/"

# Feature selection (you can customize this)
selected_coefficients = [0, 2, 4, 6, 8]  # Select some coefficients from the wavelet transform

# Iterate through each animal class
for animal_class in animal_classes:
    animal_class_path = os.path.join(animal_class_path1, animal_class)
    for audio_file in os.listdir(animal_class_path):
        audio_data, _ = librosa.load(os.path.join(animal_class_path, audio_file), sr=44100)
        cA4, cD4, cD3, cD2, cD1 = pywt.wavedec(audio_data, wavelet_family, level=level)
        feature_vector = [np.mean(cA4[i]) for i in selected_coefficients] + [np.mean(cD4[i]) for i in selected_coefficients]
        labels.append(animal_class)
        feature_vectors.append(feature_vector)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(feature_vectors, labels, test_size=0.2, random_state=42)

# Train the SVM classifier with optimized hyperparameters
svm = SVC(kernel='rbf', C=1.0, gamma='scale')  # Tune hyperparameters here
svm.fit(X_train, y_train)

# Test the SVM classifier
y_pred = svm.predict(X_test)

# Print the classification report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

         cat       0.50      1.00      0.67        21
         dog       0.00      0.00      0.00        21

    accuracy                           0.50        42
   macro avg       0.25      0.50      0.33        42
weighted avg       0.25      0.50      0.33        42



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Using Random forest

In [6]:
# Import necessary libraries
import os
import numpy as np
import pywt
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Define the animal classes
animal_classes = ['cat', 'dog']

# Define the wavelet family and level
wavelet_family = 'db4'
level = 4

# Define the feature vector and label list
feature_vectors = []
labels = []
animal_class_path1 = "../pgm/datasets/cats_dogs/train/"

# Feature selection: Extract relevant wavelet coefficients
selected_coefficients = [0, 2, 4, 6, 8]

# Iterate through each animal class
for animal_class in animal_classes:
    animal_class_path = os.path.join(animal_class_path1, animal_class)
    for audio_file in os.listdir(animal_class_path):
        audio_data, _ = librosa.load(os.path.join(animal_class_path, audio_file), sr=44100)
        cA4, cD4, cD3, cD2, cD1 = pywt.wavedec(audio_data, wavelet_family, level=level)
        feature_vector = [np.mean(cA4[i]) for i in selected_coefficients] + [np.mean(cD4[i]) for i in selected_coefficients]
        labels.append(animal_class)
        feature_vectors.append(feature_vector)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(feature_vectors, labels, test_size=0.2, random_state=42)

# Train a Random Forest Classifier with optimized hyperparameters
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)  # Adjust hyperparameters as needed
rf_classifier.fit(X_train, y_train)

# Test the Random Forest Classifier
y_pred = rf_classifier.predict(X_test)

# Print the classification report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

         cat       0.80      0.95      0.87        21
         dog       0.94      0.76      0.84        21

    accuracy                           0.86        42
   macro avg       0.87      0.86      0.86        42
weighted avg       0.87      0.86      0.86        42



# Using CNN

In [9]:
pip install keras tensorflow


Note: you may need to restart the kernel to use updated packages.


In [12]:
import os
import numpy as np
import pywt
import librosa
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.utils import to_categorical

# Define the animal classes
animal_classes = ['cat', 'dog']

# Define the wavelet family and level
wavelet_family = 'db4'
level = 4

# Define the feature vector and label list
feature_vectors = []
labels = []
animal_class_path1 = "../pgm/datasets/cats_dogs/train/"

# Feature selection: Extract relevant wavelet coefficients
selected_coefficients = [0, 2, 4, 6, 8]

# Iterate through each animal class
for animal_class in animal_classes:
    animal_class_path = os.path.join(animal_class_path1, animal_class)
    for audio_file in os.listdir(animal_class_path):
        audio_data, _ = librosa.load(os.path.join(animal_class_path, audio_file), sr=44100)
        cA4, cD4, cD3, cD2, cD1 = pywt.wavedec(audio_data, wavelet_family, level=level)
        feature_vector = [np.mean(cA4[i]) for i in selected_coefficients] + [np.mean(cD4[i]) for i in selected_coefficients]
        labels.append(animal_class)
        feature_vectors.append(feature_vector)

# Convert labels to numerical values
label_mapping = {label: i for i, label in enumerate(animal_classes)}
y = np.array([label_mapping[label] for label in labels])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(feature_vectors, y, test_size=0.2, random_state=42)

# Reshape feature vectors for input to CNN
X_train = np.array(X_train).reshape(-1, len(selected_coefficients) * 2, 1)
X_test = np.array(X_test).reshape(-1, len(selected_coefficients) * 2, 1)

# One-hot encode the labels
y_train = to_categorical(y_train, num_classes=len(animal_classes))

# Build a CNN model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 1), activation='relu', input_shape=(len(selected_coefficients) * 2, 1, 1)))
model.add(MaxPooling2D(pool_size=(2, 1)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(len(animal_classes), activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=16, verbose=1, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)

# Print the classification report
print(classification_report(y_test, y_pred))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
              precision    recall  f1-score   support

           0       0.50      1.00      0.67        21
           1       0.00      0.00      0.00        21

    accuracy                           0.50        42
   macro avg       0.25      0.50      0.33        42
weighted avg       0.25      0.50      0.33        42



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


we can say that CNN doesnt work well for Audio classification and categotization