In [1]:
import os
import glob
import matplotlib.pyplot as plt
import librosa
import librosa.display
import numpy as np
from IPython.display import display, HTML
import nbformat as nbf


In [2]:
# Base directory containing the audio files
base_dir = 'SpeechData/pilot'  # Update this to the correct path

# List of phonemes
phonemes = ['i', 'e', 'ɐ', 'ӕː', 'ie', 'p', 'b', 't', 'd', 'k', 'g', 'm', 'r', 's', 'h', 'ʣ']
# Function to find audio files
def find_audio_files(base_dir, phonemes):
    audio_files = {phoneme: [] for phoneme in phonemes}
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        if os.path.isdir(folder_path):
            # Extract the ID number from the folder name
            folder_id = folder.split('_')[1]
            # print(f'Processing folder {folder_id}')
            for phoneme in phonemes:
                phoneme_clean = phoneme.strip('/')
                pattern = os.path.join(folder_path, f'ID_{folder_id}_level1__{phoneme_clean}_.wav')
                files = glob.glob(pattern)
                if files:
                    audio_files[phoneme].extend(files)
                    # print(f'Found {len(files)} files for phoneme {phoneme}')
    return audio_files

audio_files = find_audio_files(base_dir, phonemes)


In [3]:
# audio_files

In [4]:
audio_data = {phoneme: [] for phoneme in phonemes}
for phoneme in phonemes:
    for file in audio_files[phoneme]:
        data, sr = librosa.load(file,sr=16000)
        audio_data[phoneme].append(data)

In [5]:
audio_data_truncated = {phoneme: [] for phoneme in phonemes}
for phoneme in phonemes:
    for data in audio_data[phoneme]:
            total_length = len(data)
            part_length = total_length // 3

            # Split the audio into three equal parts
            parts = [data[:part_length], data[part_length:2*part_length], data[2*part_length:]]
            trimmed_parts = []

            # Trim each part and save the trimmed versions into trimmed_parts array
            for part in parts:
                trimmed_part, _ = librosa.effects.trim(part, top_db=35)
                trimmed_parts.append(trimmed_part)

            audio_data_truncated[phoneme].append(trimmed_parts)

In [None]:

# Create the directory if it does not exist
output_dir = 'SpectrogramPlots'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Prepare data for plotting
data_truncated_by_phoneme = {phoneme: [] for phoneme in phonemes}
for phoneme in phonemes:
    data_truncated_by_phoneme[phoneme] = audio_data_truncated[phoneme]

# Create plots for each phoneme
for phoneme, data_truncated in data_truncated_by_phoneme.items():
    num_plots = len(data_truncated) * 3  # Each phoneme has 3 parts for each segment
    num_cols = 3  # Number of columns per row (3 parts)
    num_rows = (num_plots + num_cols - 1) // num_cols  # Calculate number of rows needed
    # Create a figure and axis objects for the current phoneme
    fig, axs = plt.subplots(num_rows, num_cols, figsize=(15, 5*num_rows), sharex=True, sharey=True)
    
    # Flatten the axes array for easy indexing
    axs = axs.flatten() if num_rows > 1 else [axs]
    
    # Iterate through each segment and plot in a subplot
    plot_index = 0
    for segment in data_truncated:
           
        for part in segment:
            # Compute STFT
            transformed_part = librosa.stft(part)
            
            # Convert to dB
            db = librosa.amplitude_to_db(np.abs(transformed_part))
            
            # Plot spectrogram
            # axs[plot_index].imshow(db, aspect='auto', origin='lower', cmap='inferno')
            # axs[plot_index].set_title(f'Part {plot_index % 3 + 1}')
            # axs[plot_index].set_xlabel('Time')
            # axs[plot_index].set_ylabel('Frequency')
            # axs[plot_index].set_ylim(0, 400)

            ax_spectrogram = axs[plot_index]
            ax_spectrogram.imshow(db, aspect='auto', origin='lower', cmap='inferno')
            ax_spectrogram.set_title(f'Spectrogram Part {plot_index % 3 + 1}')
            ax_spectrogram.set_xlabel('Time')
            ax_spectrogram.set_ylabel('Frequency')
            ax_spectrogram.set_ylim(0, 400)
            
            plot_index += 1
    
    # Hide any unused subplots
    for j in range(plot_index, len(axs)):
        axs[j].axis('off')

    # Adjust layout and add a title for the phoneme
    plt.suptitle(f'Spectrograms for phoneme: {phoneme}', fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    
    # Save the plot as an image file
    plot_file_path = os.path.join(output_dir, f'{phoneme}.png')
    plt.savefig(plot_file_path)
    # plt.show()


In [None]:

# Create the directory if it does not exist
output_dir = 'SpectrogramPlots'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Prepare data for plotting
data_truncated_by_phoneme = {phoneme: [] for phoneme in phonemes}
for phoneme in phonemes:
    data_truncated_by_phoneme[phoneme] = audio_data_truncated[phoneme]

# Create plots for each phoneme
for phoneme, data_truncated in data_truncated_by_phoneme.items():
    num_plots = len(data_truncated) * 3  # Each phoneme has 3 parts for each segment
    num_cols = 3  # Number of columns per row (3 parts)
    num_rows = (num_plots + num_cols - 1) // num_cols  # Calculate number of rows needed
    
    # Create a figure and axis objects for the current phoneme
    fig, axs = plt.subplots(num_rows, num_cols, figsize=(15, 5*num_rows), sharex=True, sharey=True)
    
    # Flatten the axes array for easy indexing
    axs = axs.flatten() if num_rows > 1 else [axs]
    
    # Iterate through each segment and plot in a subplot
    plot_index = 0
    for segment in data_truncated:
        for part in segment:
            ax_waveform = axs[plot_index]
            ax_waveform.plot(part, lw=1)
            ax_waveform.set_title(f'Waveform Part {plot_index % 3 + 1}')
            ax_waveform.set_xlabel('Time')
            ax_waveform.set_ylabel('Amplitude')
            
            plot_index += 1
    
    # Hide any unused subplots
    for j in range(plot_index, len(axs)):
        axs[j].axis('off')

    # Adjust layout and add a title for the phoneme
    plt.suptitle(f'WaveForms for phoneme: {phoneme}', fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    
    # Save the plot as an image file
    plot_file_path = os.path.join(output_dir, f'{phoneme}_wave.png')
    plt.savefig(plot_file_path)
    plt.show()


In [8]:

# num_phonemes = len(phonemes)

# # Prepare data for plotting
# data_truncated_by_phoneme = {phoneme: [] for phoneme in phonemes}
# for phoneme in phonemes:
#     data_truncated_by_phoneme[phoneme] = audio_data_truncated[phoneme]

# # Create plots for each phoneme
# for phoneme, data_truncated in data_truncated_by_phoneme.items():
#     num_plots = len(data_truncated)
#     num_cols = 3  # Number of columns per row (3 parts)
#     num_rows = (num_plots + num_cols - 1) // num_cols  # Calculate number of rows needed

#     # Create a figure and axis objects for the current phoneme
#     fig, axs = plt.subplots(num_rows, num_cols, figsize=(15, 5*num_rows), sharex=True, sharey=True)
    
#     # Flatten the axes array for easy indexing
#     axs = axs.flatten() if num_rows > 1 else [axs]
    
#     # Iterate through each segment and plot in a subplot
#     for i, audio_data in enumerate(data_truncated):
#         # Compute STFT
#         transformed_part = librosa.stft(audio_data)
        
#         # Convert to dB
#         db = librosa.amplitude_to_db(abs(transformed_part))
        
#         # Plot spectrogram
#         axs[i].imshow(db, aspect='auto', origin='lower', cmap='inferno')
#         axs[i].set_title(f'Part {i+1}')
#         axs[i].set_xlabel('Time')
#         axs[i].set_ylabel('Frequency')
#         axs[i].set_ylim(0, 400)
    
#     # Hide any unused subplots
#     for j in range(num_plots, len(axs)):
#         axs[j].axis('off')

#     # Adjust layout and add a title for the phoneme
#     plt.suptitle(f'Spectrograms for phoneme: {phoneme}', fontsize=16)
#     plt.tight_layout(rect=[0, 0.03, 1, 0.95])
#     plt.show()


In [9]:

# # plot all the audio data with all data of a phoneme in one row
# fig, axs = plt.subplots(len(phonemes), 1, figsize=(20, 20))
# for i, phoneme in enumerate(phonemes):
#     print(audio_data[phoneme])
#     for data in audio_data[phoneme]:
#         # plot like plt.plot(data[0],lw=1) with the axis
#         axs[i].plot(data, lw=1)
#     axs[i].set_title(phoneme)