In [38]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import to_categorical
from sklearn.metrics import classification_report
from keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint

In [46]:
def extract_features(filename, list_of_features):
        audio_data, sample_rate = librosa.load(filename)
        features = []
        if 'mfcc' in list_of_features:
            mfccs = np.mean(librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13).T, axis=0)
            features.extend(mfccs)
        if 'chroma' in list_of_features:
            chroma = np.mean(librosa.feature.chroma_stft(y=audio_data, sr=sample_rate).T, axis=0)
            features.extend(chroma)
        if 'melspectogram' in list_of_features:
            melspec = np.mean(librosa.feature.melspectrogram(y=audio_data, sr=sample_rate).T, axis=0)
            features.extend(melspec)
        return features

In [45]:
def load_data(test_size=0.2):
    data_dir = "/content/drive/MyDrive/Audio_Speech_Actors_01-24"
    features = []
    labels = []
    list_of_features = ['mfcc', 'chroma', 'melspectogram']

    for root, _, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                feature = extract_features(file_path, list_of_features)
                if feature is not None:
                    features.append(feature)
                    labels.append(file.split('-')[2])


    X = np.array(features)
    y = np.array(labels)

    le = LabelEncoder()
    y = to_categorical(le.fit_transform(y))

    return train_test_split(X, y, test_size=test_size, random_state=42)

In [41]:
def build_model(input_shape, num_classes):
    model = Sequential()
    model.add(Dense(300, activation='relu', input_shape=(input_shape,)))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model


In [42]:
def train_model(model, X_train, y_train, X_val, y_val):
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
        LearningRateScheduler(lambda epoch: 1e-3 * 10 ** (epoch / 20)),
        ModelCheckpoint("best_model.keras", monitor='val_loss', save_best_only=True)
    ]

    history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                        epochs=300, batch_size=256, callbacks=callbacks)
    return history

In [43]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_test_labels = np.argmax(y_test, axis=1)
    y_pred_labels = np.argmax(y_pred, axis=1)

    accuracy = np.sum(y_pred_labels == y_test_labels) / len(y_test_labels)
    print(f"Test Accuracy: {accuracy:.2%}")
    print(classification_report(y_test_labels, y_pred_labels))


In [None]:
X_train, X_test, y_train, y_test = load_data(test_size=0.2)
model = build_model(X_train.shape[1], y_train.shape[1])
train_model(model, X_train, y_train, X_test, y_test)
evaluate_model(model, X_test, y_test)