In [12]:
import os
import numpy as np
import librosa

# Function to compute the 2D spectrogram from the audio data
def compute_spectrogram(audio_data, sample_rate, n_fft=2048, hop_length=512):
    # Compute the Short-Time Fourier Transform (STFT)
    stft = librosa.stft(audio_data, n_fft=n_fft, hop_length=hop_length)

    # Compute the magnitude spectrogram
    magnitude_spectrogram = np.abs(stft)

    return magnitude_spectrogram

# Function to generate the 3D spectrogram tensor
def generate_3d_spectrogram(audio_folder_path, output_folder_path, time_steps=300):
    # Get a list of all the audio file names in the folder
    file_list = os.listdir(audio_folder_path)

    # Sort the file names to ensure consistent order
    file_list.sort()

    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    # Initialize an empty list to store the 3D spectrogram tensors
    spectrogram_tensors = []

    # Iterate over the audio files
    for file_name in file_list:
        # Load the audio file and convert it to mono (if stereo)
        audio_path = os.path.join(audio_folder_path, file_name)
        audio, sr = librosa.load(audio_path, sr=None, mono=True)

        # Compute the 2D spectrogram from the audio data
        spectrogram = compute_spectrogram(audio, sr)

        # Set a fixed length for the time dimension of the spectrogram
        fixed_length_spectrogram = np.zeros((spectrogram.shape[0], time_steps))
        if spectrogram.shape[1] >= time_steps:
            # Truncate the spectrogram if it is longer than the fixed length
            fixed_length_spectrogram = spectrogram[:, :time_steps]
        else:
            # Pad the spectrogram if it is shorter than the fixed length
            fixed_length_spectrogram[:, :spectrogram.shape[1]] = spectrogram

        # Append the fixed-length 2D spectrogram to the list
        spectrogram_tensors.append(fixed_length_spectrogram)

        # Print a message indicating that the spectrogram has been generated for this file
        #print(f"Spectrogram generated for {file_name}")

    # Convert the list of 2D spectrogram slices to a 3D tensor
    spectrogram_tensor = np.stack(spectrogram_tensors)

    # Save the 3D spectrogram tensor as an .npy file
    output_file_name = 're_3d_spectrogram.npy'
    output_file_path = os.path.join(output_folder_path, output_file_name)
    np.save(output_file_path, spectrogram_tensor)

    # Print a message to indicate that the process is complete
    print("3D spectrogram tensor saved successfully as 3d_spectrogram.npy in the output folder.")

# Specify the input folder containing the resampled audio files
audio_folder_path = 'D:/Actor_01_Resampled_Audio'

# Specify the output folder path to save the 3D spectrograms
output_folder_path = 'D:/re_3d_spectrogram'

# Generate and save the 3D spectrogram tensor
generate_3d_spectrogram(audio_folder_path, output_folder_path)


3D spectrogram tensor saved successfully as 3d_spectrogram.npy in the output folder.


In [13]:
import os
import numpy as np

# Load the 3D spectrogram tensor from the .npy file
output_folder_path = 'D:/re_3d_spectrogram'
spectrogram_tensor_path = os.path.join(output_folder_path, 're_3d_spectrogram.npy')
spectrogram_tensor = np.load(spectrogram_tensor_path)

# Print the size (shape) of the numpy array
print("Shape of the 3D spectrogram tensor:", spectrogram_tensor.shape)

# View the numpy array
#print("3D spectrogram tensor:")
#print(spectrogram_tensor)


Shape of the 3D spectrogram tensor: (60, 1025, 300)


In [14]:
import os
import numpy as np
import librosa

# Function to compute various audio features from the audio data
def compute_features(audio_data, sample_rate, n_fft=2048, hop_length=512):
    # Compute the Short-Time Fourier Transform (STFT)
    stft = librosa.stft(audio_data, n_fft=n_fft, hop_length=hop_length)

    # Compute the magnitude spectrogram
    magnitude_spectrogram = np.abs(stft)

    # Compute MFCC features
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)

    # Compute chroma features
    chroma = librosa.feature.chroma_stft(y=audio_data, sr=sample_rate)

    return magnitude_spectrogram, mfccs, chroma

# Function to generate the 3D feature tensor
def generate_3d_features(audio_folder_path, output_folder_path, time_steps=300):
    # Get a list of all the audio file names in the folder
    file_list = os.listdir(audio_folder_path)

    # Sort the file names to ensure consistent order
    file_list.sort()

    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    # Initialize lists to store the 3D feature tensors
    magnitude_spectrogram_list = []
    mfcc_list = []
    chroma_list = []

    # Iterate over the audio files
    for file_name in file_list:
        # Load the audio file and convert it to mono (if stereo)
        audio_path = os.path.join(audio_folder_path, file_name)
        audio, sr = librosa.load(audio_path, sr=None, mono=True)

        # Compute various audio features from the audio data
        magnitude_spectrogram, mfccs, chroma = compute_features(audio, sr)

        # Set a fixed length for the time dimension of the features
        fixed_length_spectrogram = np.zeros((magnitude_spectrogram.shape[0], time_steps))
        fixed_length_mfcc = np.zeros((mfccs.shape[0], time_steps))
        fixed_length_chroma = np.zeros((chroma.shape[0], time_steps))

        if magnitude_spectrogram.shape[1] >= time_steps:
            # Truncate the features if they are longer than the fixed length
            fixed_length_spectrogram = magnitude_spectrogram[:, :time_steps]
            fixed_length_mfcc = mfccs[:, :time_steps]
            fixed_length_chroma = chroma[:, :time_steps]
        else:
            # Pad the features if they are shorter than the fixed length
            fixed_length_spectrogram[:, :magnitude_spectrogram.shape[1]] = magnitude_spectrogram
            fixed_length_mfcc[:, :mfccs.shape[1]] = mfccs
            fixed_length_chroma[:, :chroma.shape[1]] = chroma

        # Append the fixed-length features to the respective lists
        magnitude_spectrogram_list.append(fixed_length_spectrogram)
        mfcc_list.append(fixed_length_mfcc)
        chroma_list.append(fixed_length_chroma)

        # Print a message indicating that the features have been generated for this file
        #print(f"Features generated for {file_name}")

    # Convert the lists of features to 3D tensors
    magnitude_spectrogram_tensor = np.stack(magnitude_spectrogram_list)
    mfcc_tensor = np.stack(mfcc_list)
    chroma_tensor = np.stack(chroma_list)

    # Save the 3D feature tensors as .npy files
    np.save(os.path.join(output_folder_path, 'magnitude_spectrogram.npy'), magnitude_spectrogram_tensor)
    np.save(os.path.join(output_folder_path, 'mfcc.npy'), mfcc_tensor)
    np.save(os.path.join(output_folder_path, 'chroma.npy'), chroma_tensor)

    # Print a message to indicate that the process is complete
    print("3D feature tensors saved successfully in the output folder.")

# Specify the input folder containing the resampled audio files
audio_folder_path = 'D:/Actor_01_Resampled_Audio'

# Specify the output folder path to save the 3D feature tensors
output_folder_path = 'D:/3D_Features'

# Generate and save the 3D feature tensors
generate_3d_features(audio_folder_path, output_folder_path)


3D feature tensors saved successfully in the output folder.


In [15]:
import numpy as np

# Load the 3D feature tensors from the saved .npy files
magnitude_spectrogram_tensor = np.load('D:/3D_Features/magnitude_spectrogram.npy')
mfcc_tensor = np.load('D:/3D_Features/mfcc.npy')
chroma_tensor = np.load('D:/3D_Features/chroma.npy')

# View the shapes of the tensors
print("Shape of the magnitude spectrogram tensor:", magnitude_spectrogram_tensor.shape)
print("Shape of the MFCC tensor:", mfcc_tensor.shape)
print("Shape of the chroma tensor:", chroma_tensor.shape)


Shape of the magnitude spectrogram tensor: (60, 1025, 300)
Shape of the MFCC tensor: (60, 13, 300)
Shape of the chroma tensor: (60, 12, 300)
