In [1]:
import os
import librosa
import numpy as np

# === HELPER: Extract MFCC from one file ===
def extract_mfcc(file_path, n_mfcc=40):
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfcc.T  # shape: (time_steps, n_mfcc)

In [37]:
songs_path = r'../data/Unprocessed/Audio_Song_Actors'
mfcc_list_songs = []
label_list = []
max_len = 0
for actor in os.listdir(songs_path):
    for audio in os.listdir(songs_path +'/'+ actor):
        #output_dir = output_parent_dir_songs + '/' + actor + '/' + audio[:-4] #Remove the .wav extension
        #os.makedirs(output_dir, exist_ok=True) #Create an output directory for each audio
        y, sr = librosa.load(songs_path +'/'+ actor + '/' + audio)
        mfcc = extract_mfcc(songs_path +'/'+ actor + '/' + audio, n_mfcc=40) #Extract the MFCC features
        mfcc_list_songs.append(mfcc)
        label_list.append(audio[:-4])
        max_len =  max(max_len, mfcc.shape[0])
        
padded_mfccs = []
for mfcc in mfcc_list_songs:
    pad_width = max_len - mfcc.shape[0]
    if pad_width > 0:
        padded = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        padded = mfcc
    padded_mfccs.append(padded)#Append the MFCC and the audio name to the list
print("mfcc_list_songs", len(mfcc_list_songs))
merged = [[a, b] for a, b in zip(padded_mfccs, label_list)]
processed_audio_songs = np.array(merged, dtype=object) #Convert the list to a numpy array

mfcc_list_songs 1012


In [40]:
speech_path = r'../data/Unprocessed/Audio_Speech_Actors'
mfcc_list_speech = []
label_list = []
max_len = 0
for actor in os.listdir(speech_path):
    for audio in os.listdir(speech_path +'/'+ actor):
        y, sr = librosa.load(speech_path +'/'+ actor + '/' + audio)
        mfcc = extract_mfcc(speech_path +'/'+ actor + '/' + audio, n_mfcc=40) #Extract the MFCC features
        mfcc_list_speech.append(mfcc)
        label_list.append(audio[:-4])
        max_len =  max(max_len, mfcc.shape[0])
        
padded_mfccs = []
for mfcc in mfcc_list_speech:
    pad_width = max_len - mfcc.shape[0]
    if pad_width > 0:
        padded = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        padded = mfcc
    padded_mfccs.append(padded)#Append the MFCC and the audio name to the list
print("mfcc_list_speech", len(mfcc_list_speech))
merged = [[a, b] for a, b in zip(padded_mfccs, label_list)]
processed_audio_speech = np.array(merged, dtype=object) #Convert the list to a numpy array

mfcc_list_speech 1440


In [41]:
processed_audio = np.concatenate((processed_audio_songs, processed_audio_speech), axis=0) #Concatenate the two arrays
np.save('processed_audio.npy', processed_audio) #Save the array to a file
print("processed_audio", len(processed_audio))

processed_audio 2452
