In [4]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Input
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

# Function to extract MFCC features
def extract_mfcc(file_path, n_mfcc=13):
    audio, sample_rate = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
    mfccs_mean = np.mean(mfccs.T, axis=0)
    return mfccs_mean

# Process dataset and extract MFCC features
def process_dataset(directory_path):
    mfcc_features = []
    labels = []
    
    for speaker in os.listdir(directory_path):
        speaker_path = os.path.join(directory_path, speaker)
        if not os.path.isdir(speaker_path) or speaker.startswith('_'):
            continue
        
        for file_name in os.listdir(speaker_path):
            if file_name.endswith('.wav'):
                file_path = os.path.join(speaker_path, file_name)
                mfcc = extract_mfcc(file_path)
                mfcc_features.append(mfcc)
                labels.append(speaker)
    
    df = pd.DataFrame(mfcc_features)
    df['label'] = labels
    return df

# Dataset path
dataset_path = "16000_pcm_speeches"

# Process dataset and encode labels
df = process_dataset(dataset_path)
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

# Save label classes for later
np.save('label_encoder_classes.npy', label_encoder.classes_)

# Split data into train and test sets
X = df.drop('label', axis=1)
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape data for CNN input
X_train_cnn = X_train.values.reshape(X_train.shape[0], 13, 1)
X_test_cnn = X_test.values.reshape(X_test.shape[0], 13, 1)

# Save test data for later
np.save("X_test_cnn.npy", X_test_cnn)
np.save("y_test.npy", y_test)

# Convert labels to categorical format
total_classes = len(label_encoder.classes_)
y_train_categorical = to_categorical(y_train, num_classes=total_classes)
y_test_categorical = to_categorical(y_test, num_classes=total_classes)

# Build CNN model
def build_cnn_model(input_shape, num_classes):
    model = Sequential()
    model.add(Input(shape=input_shape))  # Input layer

    # First convolutional block
    model.add(Conv1D(filters=32, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2, padding='same'))

    # Second convolutional block
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2, padding='same'))

    # Third convolutional block
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'))

    # Flatten the output
    model.add(Flatten())

    # Fully connected layer
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

    # Output layer with softmax activation for multi-class classification
    model.add(Dense(num_classes, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model




# Train and save model
cnn_model = build_cnn_model((13, 1), total_classes)
history = cnn_model.fit(X_train_cnn, y_train_categorical, epochs=15, batch_size=32, validation_data=(X_test_cnn, y_test_categorical))
cnn_model.save("cnn_model.keras")  # Save model in .keras format

# Evaluate the model
loss, accuracy = cnn_model.evaluate(X_test_cnn, y_test_categorical)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
axes[0].plot(history.history['loss'], label='Training Loss')
axes[0].plot(history.history['val_loss'], label='Validation Loss')
axes[0].set_title('Loss')
axes[0].legend()

axes[1].plot(history.history['accuracy'], label='Training Accuracy')
axes[1].plot(history.history['val_accuracy'], label='Validation Accuracy')
axes[1].set_title('Accuracy')
axes[1].legend()

plt.tight_layout()
plt.show()

ValueError: Computed output size would be negative. Received `inputs shape=(None, 1, 64)`, `kernel shape=(3, 64, 128)`, `dilation_rate=[1]`.