In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np

In [None]:
def extract_features(file_path, max_pad_len=100):
    try:
        # Load the audio file
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')

        # Extract MFCC features from the audio
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)

        # Pad the MFCC array to ensure consistent shape
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')

        return mfccs
    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}")
        return None

In [None]:
# Define the CNN model
def create_cnn_model(input_shape, num_classes):
    model = models.Sequential()
    
    # First convolutional layer
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Second convolutional layer
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Third convolutional layer
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Flatten the output and add dense layers
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(num_classes, activation='softmax'))
    
    return model

In [None]:
# Example usage
file_path = 'path_to_audio_file.wav'
mfccs = extract_features(file_path)
mfccs = np.expand_dims(mfccs, axis=-1)  # Add channel dimension

# Parameters
input_shape = (time_steps, frequency_bins, 1)  # Replace with actual values
num_classes = 12  # 10 words + "is_unknown" + "_silence_"

# Create the model
model = create_cnn_model(input_shape, num_classes)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=3, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

# Save the model
model.save('audio_classification_cnn.h5')