In [8]:
import os
import glob
import matplotlib.pyplot as plt
import librosa
import librosa.display
import numpy as np
from IPython.display import display, HTML
import nbformat as nbf


In [10]:
# Base directory containing the audio files
base_dir = "../../Speech-Data/pilot"  # Update this to the correct path

# List of words
# words  = ['హరితం', 'మనస్సు', 'పోరాటం', 'ఫలితము', 'పరిషత్', 'తరువాత', 'ఎగుమతి', 'పట్ట', 'బట్ట', 'తన్ను', 'వామి', 'ఫాలు', 'కసాయి', 'ఆవు', 'వాని', 'కల', 'పాటు', 'పాడు', 'రాలు', 'కాశి', 'కాటు', 'గాటు', 'హాయి', 'బేణ్డు', 'అను', 'పిల్లి', 'మెట్టు']
# word_english = ['haritham','manassu','poraatam','phalitamu','parishath','taruvath','egumathi','patta','batta','thannu','vaami','faalu','kasaayi','aavu','vaani','kala','paatu','paadu','raalu','kaashi','kaatu','gaatu','haayi','baendu','anu','pilli','mettu']

words  = ["బట్ట", "పట్ట", "అట్ట", "పాడు", "పాటు", "పావు", "తన్ను", "దన్ను", "అన్ను", "కాటు", "గాటు", "ఆటు", "ఫాలు", "వాలు", "ఆలు", "హాయి", "ఆయి"]
word_english  = [ "batta", "patta", 
    "atta", "paadu", "paatu", "paavu", 
    "thannu", "dannu", "annu", 
    "kaatu", "gaatu", "aatu", 
    "faalu", "vaalu", "aalu", 
    "haayi", "aayi"]
# Function to find audio files
def find_audio_files(base_dir, words):
    audio_files = {word: [] for word in words}
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        if os.path.isdir(folder_path):
            # Extract the ID number from the folder name
            folder_id = folder.split('_')[1]
            # print(f'Processing folder {folder_id}')
            for word in words:
                word_clean = word.strip('/')
                pattern1 = os.path.join(folder_path, f'ID_{folder_id}_level2_{word_clean}.wav')
                files1 = glob.glob(pattern1)
                pattern2 = os.path.join(folder_path, f'ID_{folder_id}_level2_1_{word_clean}.wav')
                files2 = glob.glob(pattern2)
                if files1:
                    audio_files[word].append((folder_id, files1[0]))
                elif files2:
                    audio_files[word].append((folder_id, files2[0]))
                    # print(f'Found {len(files)} files for word {word}')
    return audio_files

audio_files = find_audio_files(base_dir, words)


In [9]:
# audio_files

In [11]:
audio_data = {word: [] for word in words}
for word in words:
    for id,file in audio_files[word]:
        data, sr = librosa.load(file)
        
        audio_data[word].append({
            'id': id,
            'data': data,
        })
        # audio_data[word].append(data)

In [12]:
audio_data_truncated = {word: [] for word in words}
for word in words:
    # for data in audio_data[word]:
    for entry in audio_data[word]:
        file_id = entry['id']  # Extracting the ID from the entry
        data = entry['data'] 
        total_length = len(data)
        part_length = total_length // 3

        # Split the audio into three equal parts
        parts = [data[:part_length], data[part_length:2*part_length], data[2*part_length:]]
        trimmed_parts = []

        # Trim each part and save the trimmed versions into trimmed_parts array
        for part in parts:
            trimmed_part, _ = librosa.effects.trim(part, top_db=35)
            trimmed_parts.append(trimmed_part)

        audio_data_truncated[word].append((file_id,trimmed_parts))

In [13]:

# Create the directory if it does not exist
output_dir = 'SpectrogramPlotsWords_new_list'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Prepare data for plotting
data_truncated_by_word = {word: [] for word in words}
for word in words:
    data_truncated_by_word[word] = audio_data_truncated[word]

Spectograms

In [None]:


# Create plots for each word
for word, data_truncated in data_truncated_by_word.items():
    num_plots = len(data_truncated) * 3  # Each word has 3 parts for each segment
    num_cols = 3  # Number of columns per row (3 parts)
    num_rows = (num_plots + num_cols - 1) // num_cols  # Calculate number of rows needed
    # Create a figure and axis objects for the current word
    fig, axs = plt.subplots(num_rows, num_cols, figsize=(15, 5*num_rows))
    
    # Flatten the axes array for easy indexing
    axs = axs.flatten() if num_rows > 1 else [axs]
    
    # Iterate through each segment and plot in a subplot
    plot_index = 0
    for id,segment in data_truncated:
           
        for part in segment:
            # Compute STFT
            transformed_part = librosa.stft(part)
            
            # Convert to dB
            db = librosa.amplitude_to_db(np.abs(transformed_part))
            
            # Plot spectrogram
            # axs[plot_index].imshow(db, aspect='auto', origin='lower', cmap='inferno')
            # axs[plot_index].set_title(f'Part {plot_index % 3 + 1}')
            # axs[plot_index].set_xlabel('Time')
            # axs[plot_index].set_ylabel('Frequency')
            # axs[plot_index].set_ylim(0, 400)

            ax_spectrogram = axs[plot_index]
            ax_spectrogram.imshow(db, aspect='auto', origin='lower', cmap='inferno')
            ax_spectrogram.set_title(f'Spectrogram Part {plot_index % 3 + 1} of {id}')
            ax_spectrogram.set_xlabel('Time')
            ax_spectrogram.set_ylabel('Frequency')
            # ax_spectrogram.set_ylim(0, 400)
            
            plot_index += 1
    
    # Hide any unused subplots
    for j in range(plot_index, len(axs)):
        axs[j].axis('off')

    # Adjust layout and add a title for the word
    word_index = words.index(word)
    plt.suptitle(f'Spectrograms for word: {word_english[word_index]}', fontsize=16)
    # plt.suptitle(f'Spectrograms for word: {word}', fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    
    # Save the plot as an image file
    plot_file_path = os.path.join(output_dir, f'{word}.png')
    plt.savefig(plot_file_path)
    # plt.show()


Waveforms

In [None]:
# Create plots for each word
for word, data_truncated in data_truncated_by_word.items():
    num_plots = len(data_truncated) * 3  # Each word has 3 parts for each segment
    num_cols = 3  # Number of columns per row (3 parts)
    num_rows = (num_plots + num_cols - 1) // num_cols  # Calculate number of rows needed
    
    # Create a figure and axis objects for the current word
    fig, axs = plt.subplots(num_rows, num_cols, figsize=(15, 5*num_rows))
    
    # Flatten the axes array for easy indexing
    axs = axs.flatten() if num_rows > 1 else [axs]
    
    # Iterate through each segment and plot in a subplot
    plot_index = 0
    for id,segment in data_truncated:
        for part in segment:
            ax_waveform = axs[plot_index]
            ax_waveform.plot(part, lw=1)
            ax_waveform.set_title(f'Waveform Part {plot_index % 3 + 1} of {id}')
            ax_waveform.set_xlabel('Time')
            ax_waveform.set_ylabel('Amplitude')
            
            plot_index += 1
    
    # Hide any unused subplots
    for j in range(plot_index, len(axs)):
        axs[j].axis('off')

    # Adjust layout and add a title for the word
    
    word_index = words.index(word)
    plt.suptitle(f'WaveForms for word: {word_english[word_index]}', fontsize=16)
    # plt.suptitle(f'Spectrograms for word: {word}', fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    
    # Save the plot as an image file
    plot_file_path = os.path.join(output_dir, f'{word}_wave.png')
    plt.savefig(plot_file_path)
    # plt.show()