In [None]:
Feature Extraction

In [None]:
# plot the MFCCs heatmap
def plot_mfccs(mfccs):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mfccs, x_axis='time')
    plt.colorbar()
    plt.title('MFCC')
    plt.tight_layout()
    plt.show()

#plot a grid of MFCCs.
def plot_mfccs_subplot(mfccs, ax):
    img = librosa.display.specshow(mfccs, x_axis='time', ax=ax)
    return img

In [None]:
# Set the number of Mel-Frequency Cepstral Coefficients (MFCCs) to extract.
num_mfcc = 20

# Initialize empty arrays to hold the feature matrix from audio and corresponding labels.
X_train = []
y_train = []

# Set the number of rows and columns for the subplot grid
num_rows = 4
num_cols = 4

# Keep track of the current subplot index
subplot_idx = 1

# Create a new figure for the grid of subplots
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 10))
fig.tight_layout(pad=2.0)

# Iterate through all of the .wav files in the training directory.
for file in sorted(os.listdir(data_path)):
    if file.endswith(".wav"):
        # Load the audio file using librosa and our files in Google Drive.
        audio_data, sample_rate = librosa.load(os.path.join(data_path, file))

        # Determine the MFCC features from the audio data provided above.
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=num_mfcc)
        #print(mfccs)

        # Plot the MFCCs
        # plot_mfccs(mfccs)
        # Plot the MFCCs in the corresponding subplot
        row = (subplot_idx - 1) // num_cols
        col = (subplot_idx - 1) % num_cols
        img = plot_mfccs_subplot(mfccs, axes[row, col])
        axes[row, col].set_title(file)

        # Increment the subplot index
        subplot_idx += 1

        if subplot_idx > num_rows * num_cols:
            break

        # Grab the mean and standard deviation of our features.
        mfccs_avg = np.mean(mfccs, axis=1)
        mfccs_std = np.std(mfccs, axis=1)

        # Add the MFCC features to the data matrix
        X_train.append(np.concatenate((mfccs_avg, mfccs_std)))

        # Determine the emotion label of the audio file based on its filename.
        label = ""
        idx = 0
        while not (file[idx].isdigit()):
            label += file[idx]
            idx += 1
        y_train.append(label)

# Add a colorbar to the figure
cbar = fig.colorbar(img, ax=axes.ravel().tolist())
cbar.set_label('Amplitude (dB)')

# Show the figure
plt.show()

Split data into training and validation data

In [None]:
# Convert our data into np.arrays for SVM.
X_train = np.array(X_train)
y_train = np.array(y_train)

# Use stratified sampling to split the data into a smaller training set and a validation set
X_train_new, X_val, y_train_new, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

# Initialize the SVM classifier
svm_classifier = SVC(kernel='linear', C=10, gamma=0.1)

# Train the SVM classifier on the smaller training set
svm_classifier.fit(X_train_new, y_train_new)

# Predict the labels of the validation set using the trained SVM classifier
y_pred_val = svm_classifier.predict(X_val)

# Print the classification report and confusion matrix for the validation set
print(classification_report(y_val, y_pred_val))
print(confusion_matrix(y_val, y_pred_val))