<a href="https://colab.research.google.com/github/Melo567/3d/blob/main/Audio_Classification_using_Deep_Learning_and_TensorFlow_A_Step_by_Step_Guide.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import os
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.image import resize
from tensorflow.keras.models import load_model

# Step 1: Define your folder structure
data_dir = 'drive/MyDrive/dunstan'
classes = ['alone', 'brup', 'colic', 'discomfort','hungry', 'pond', 'sleep', 'thirsty','tooth']

# Step 2: Load and preprocess audio data
def load_and_preprocess_data(data_dir, classes, target_shape=(128, 128)):
    data = []
    labels = []

    for i, class_name in enumerate(classes):
        class_dir = os.path.join(data_dir, class_name)
        for filename in os.listdir(class_dir):
            if filename.endswith('.wav'):
                file_path = os.path.join(class_dir, filename)
                audio_data, sample_rate = librosa.load(file_path, sr=None)
                # Perform preprocessing (e.g., convert to Mel spectrogram and resize)
                mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
                mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
                data.append(mel_spectrogram)
                labels.append(i)

    return np.array(data), np.array(labels)

# Step 3: Split data into training and testing sets
data, labels = load_and_preprocess_data(data_dir, classes)
labels = to_categorical(labels, num_classes=len(classes))  # Convert labels to one-hot encoding
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Step 4: Create a neural network model
input_shape = X_train[0].shape
input_layer = Input(shape=input_shape)
x = Conv2D(32, (3, 3), activation='relu')(input_layer)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
output_layer = Dense(len(classes), activation='softmax')(x)
model = Model(input_layer, output_layer)

# Step 5: Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Step 6: Train the model
model.fit(X_train, y_train, epochs=200, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 481ms/step - accuracy: 0.1417 - loss: 3.8163 - val_accuracy: 0.2727 - val_loss: 24.5221
Epoch 2/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 235ms/step - accuracy: 0.6292 - loss: 2.3123 - val_accuracy: 0.1818 - val_loss: 22.8129
Epoch 3/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 433ms/step - accuracy: 0.9229 - loss: 0.3966 - val_accuracy: 0.2727 - val_loss: 20.6597
Epoch 4/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 463ms/step - accuracy: 0.9187 - loss: 0.3616 - val_accuracy: 0.1818 - val_loss: 18.4231
Epoch 5/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 253ms/step - accuracy: 0.8917 - loss: 0.3272 - val_accuracy: 0.1818 - val_loss: 17.7888
Epoch 6/200
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 237ms/step - accuracy: 0.9187 - loss: 0.2953 - val_accuracy: 0.1818 - val_loss: 18.4859
Epoch 7/200
[1m2/2[0m [32

<keras.src.callbacks.history.History at 0x78fe6bd55780>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
test_accuracy=model.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1])

# Step 7: Save the model
model.save('dunstan_classification_model.keras')


0.1818181872367859


In [16]:
# Load the saved model
model = load_model('dunstan_classification_model.keras')

# Define the target shape for input spectrograms
target_shape = (128, 128)

# Define your class labels
classes = ['alone', 'brup', 'colic', 'discomfort','hungry', 'pond', 'sleep', 'thirsty','tooth']

# Function to preprocess and classify an audio file
def test_audio(file_path, model):
    # Load and preprocess the audio file
    audio_data, sample_rate = librosa.load(file_path, sr=None)
    mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
    mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
    mel_spectrogram = tf.reshape(mel_spectrogram, (1,) + target_shape + (1,))

    # Make predictions
    predictions = model.predict(mel_spectrogram)

    # Get the class probabilities
    class_probabilities = predictions[0]

    # Get the predicted class index
    predicted_class_index = np.argmax(class_probabilities)

    return class_probabilities, predicted_class_index

# Test an audio file
test_audio_file = 'sample_data/gas_colic.wav'
class_probabilities, predicted_class_index = test_audio(test_audio_file, model)

# Display results for all classes
for i, class_label in enumerate(classes):
    probability = class_probabilities[i]
    print(f'Class: {class_label}, Probability: {probability:.4f}')

# Calculate and display the predicted class and accuracy
predicted_class = classes[predicted_class_index]
accuracy = class_probabilities[predicted_class_index]
print(f'The audio is classified as: {predicted_class}')
print(f'Accuracy: {accuracy:.4f}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
Class: alone, Probability: 0.0000
Class: brup, Probability: 1.0000
Class: colic, Probability: 0.0000
Class: discomfort, Probability: 0.0000
Class: hungry, Probability: 0.0000
Class: pond, Probability: 0.0000
Class: sleep, Probability: 0.0000
Class: thirsty, Probability: 0.0000
Class: tooth, Probability: 0.0000
The audio is classified as: brup
Accuracy: 1.0000


In [14]:
model = load_model('dunstan_classification_model.keras')

# Create the TFLite converter, using the loaded model object
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Convert the model
tflite_model = converter.convert()

# Save the TFLite model
with open('dunstan_classification_model.tflite', 'wb') as f:
    f.write(tflite_model)

Saved artifact at '/tmp/tmpfq4c_x8i'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 128, 128, 1), dtype=tf.float32, name='input_layer')
Output Type:
  TensorSpec(shape=(None, 9), dtype=tf.float32, name=None)
Captures:
  133034125116864: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133034138445856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133034134152896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133034134157296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133034135681008: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133034137380896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133034137383360: TensorSpec(shape=(), dtype=tf.resource, name=None)
  133034137376848: TensorSpec(shape=(), dtype=tf.resource, name=None)
