In [10]:
#pip install noisereduce
#pip install scipy

In [1]:
# Global imports
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy import signal
from PIL import Image
import re
from mpl_toolkits.mplot3d import Axes3D
import librosa
import librosa.display

In [2]:
# Path to the directory containing folders with .wav files
root_path = "d:\Sistema\Escritorio\Escritorio\Tesis\DAIC-WOZ\data"

# Define the root path for saving progress
PROGRESS_PATH = os.path.join(os.getcwd(), 'progress.txt')

In [3]:

# Function to generate spectrogram
    
def generate_mel_spectrogram(dataa, rate, save_path, resize_path):
    data = dataa.astype(np.float32)
    # Generate Mel spectrogram
    mel_spect = librosa.feature.melspectrogram(y=data, sr=rate, n_fft=2048, hop_length=1024)
    
    # Convert power spectrogram to dB scale
    mel_spect = librosa.power_to_db(mel_spect, ref=np.max)
    
    # Display the spectrogram
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(mel_spect, y_axis='mel', fmax=8000, x_axis='time')
    #plt.title('Mel Spectrogram')
    #plt.colorbar(format='%+2.0f dB')
    plt.axis('off')  # Turn off axis labels
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)  # Save the figure
    plt.close()
    resize_image(save_path, (792, 252), resize_path)     
    
# Function to resize image
def resize_image(image_path, target_size, save_path):
    img = Image.open(image_path)
    img_resized = img.resize(target_size, resample=Image.Resampling.NEAREST)
    img_resized.save(save_path)

In [4]:
def apply_frequency_mask(spec, max_mask_pct=0.2, num_masks=5):
    """
    Apply frequency masking with multiple segments to a spectrogram.

    Parameters:
        spec (2D numpy array): Input spectrogram.
        max_mask_pct (float): Maximum percentage of frequencies to mask.
        num_masks (int): Number of masks to apply.

    Returns:
        masked_spec (2D numpy array): Spectrogram with frequency masking applied.
    """
    num_freqs = spec.shape[0]
    mask_length = int(max_mask_pct * num_freqs)

    # Randomly select starting frequencies for the masks
    start_idxs = np.random.randint(0, num_freqs - mask_length, size=num_masks)

    # Apply masks to the spectrogram
    masked_spec = np.copy(spec)
    for start_idx in start_idxs:
        end_idx = start_idx + mask_length
        masked_spec[start_idx:end_idx, :] = 0
        
    return masked_spec

In [5]:
def apply_time_mask(spec, max_mask_pct=0.2, num_masks=5):
    """
    Apply time masking with multiple segments to a spectrogram.

    Parameters:
        spec (2D numpy array): Input spectrogram.
        max_mask_pct (float): Maximum percentage of time steps to mask.
        num_masks (int): Number of masks to apply.

    Returns:
        masked_spec (2D numpy array): Spectrogram with time masking applied.
    """
    num_time_steps = spec.shape[1]
    mask_length = int(max_mask_pct * num_time_steps)

    # Randomly select starting time steps for the masks
    start_idxs = np.random.randint(0, num_time_steps - mask_length, size=num_masks)

    # Apply masks to the spectrogram
    masked_spec = np.copy(spec)
    for start_idx in start_idxs:
        end_idx = start_idx + mask_length
        masked_spec[:, start_idx:end_idx] = 0

    return masked_spec

In [6]:
# Function to save progress
def save_progress( wav_file_path):
    with open(PROGRESS_PATH, 'w') as file:
        file.write(wav_file_path)

# Function to load progress
def load_progress():
    progress_file = PROGRESS_PATH
    if os.path.exists(progress_file):
        with open(progress_file, 'r') as file:
            return file.read().strip()
    else:
        return None

In [7]:
def extract_numbers_from_string(input_string):
    # Regular expression to match numbers
    pattern = r'\d+'
    # Find all numbers in the input string
    numbers = re.findall(pattern, input_string)
    return numbers

In [8]:
def load_spectrogram(file_path):
    """Load a spectrogram from an image file."""
    # Open the image file
    img = Image.open(file_path)

    # Convert the image to a numpy array
    spec = np.array(img)

    return spec

In [9]:
def save_spectrogram(spec, save_path, rotate_180=True):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spec, y_axis='mel', fmax=8000, x_axis='time')
    #plt.title('Mel Spectrogram')
    #plt.colorbar(format='%+2.0f dB')
    plt.axis('off')  # Turn off axis labels
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)  # Save the figure
    plt.close()
    
    if rotate_180:
        # Load the saved image and rotate it by 180 degrees
        img = plt.imread(save_path)
        img_rotated = np.rot90(img, 2)
        
        # Flip the rotated image horizontally to revert the mirror effect
        img_rotated_flipped = np.fliplr(img_rotated)
        
        # Save the flipped image
        plt.imsave(save_path, img_rotated_flipped)


In [10]:
def apply_augmentation(save_path, spec, max_mask_pct = 0.01, num_masks = 10 ):
    freq = apply_frequency_mask(spec, max_mask_pct, num_masks )
    time = apply_time_mask(spec, max_mask_pct, num_masks)
    save_spectrogram(freq, save_path + f'_FREQ.jpg')
    save_spectrogram(time, save_path + f'_TIME.jpg')

In [11]:
def resize_and_save_image(input_path, output_path, target_size=(792, 252)):
    """Resize an image and save it to the output path."""
    # Open the image
    img = Image.open(input_path)
    
    # Resize the image
    img_resized = img.resize(target_size)
    
    # Create the output directory if it doesn't exist
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    # Save the resized image
    img_resized.save(output_path)

In [13]:

# Load progress
last_wav_file_path = load_progress()
# Seek and get
for folder_name in os.listdir(root_path):
    folder_path = os.path.join(root_path, folder_name)
    normal_path = folder_path +'/normal/'
    resized_path = folder_path +'/resized/'
    number_file = extract_numbers_from_string(folder_name)
    
    if os.path.isdir(folder_path):
        print(folder_path)
        print(last_wav_file_path)
        
        #if last_wav_file_path!= None and extract_numbers_from_string(folder_path) < extract_numbers_from_string(last_wav_file_path):
#            continue
        original_spec = load_spectrogram(os.path.join(normal_path ,'spectrogram_cleaned_'+number_file[0]+'_AUDIO.jpg'))
        apply_augmentation(normal_path +f'spectrogram_cleaned_{number_file[0]}_AUDIO', original_spec)
        
        preprocessed_spec = load_spectrogram(normal_path +f'spectrogram_cleaned_preprocessed_{number_file[0]}_AUDIO.jpg')
        apply_augmentation(normal_path +f'spectrogram_preprocessed_{number_file[0]}_AUDIO', preprocessed_spec)
                   
        for image_file in glob.glob(os.path.join(normal_path, '*.jpg')):
             # Extract the audio file name without extension
            image_file_name = os.path.splitext(os.path.basename(image_file))[0]
            print(image_file_name)
            # Resize and save
            resize_and_save_image(os.path.join(normal_path,image_file_name+'.jpg'), os.path.join(resized_path, image_file_name+'.jpg'))
            # Save progress
            save_progress(folder_path)

            # Delete original spectrogram
            #os.remove(spectrogram_save_path)

d:\Sistema\Escritorio\Escritorio\Tesis\DAIC-WOZ\data\300_P
d:\Sistema\Escritorio\Escritorio\Tesis\DAIC-WOZ\data\316_P
spectrogram_cleaned_300_AUDIO
spectrogram_cleaned_300_AUDIO_FREQ
spectrogram_cleaned_300_AUDIO_TIME
spectrogram_cleaned_preprocessed_300_AUDIO
spectrogram_preprocessed_300_AUDIO_FREQ
spectrogram_preprocessed_300_AUDIO_TIME
d:\Sistema\Escritorio\Escritorio\Tesis\DAIC-WOZ\data\301_P
d:\Sistema\Escritorio\Escritorio\Tesis\DAIC-WOZ\data\316_P
spectrogram_cleaned_301_AUDIO
spectrogram_cleaned_301_AUDIO_FREQ
spectrogram_cleaned_301_AUDIO_TIME
spectrogram_cleaned_preprocessed_301_AUDIO
spectrogram_preprocessed_301_AUDIO_FREQ
spectrogram_preprocessed_301_AUDIO_TIME
d:\Sistema\Escritorio\Escritorio\Tesis\DAIC-WOZ\data\302_P
d:\Sistema\Escritorio\Escritorio\Tesis\DAIC-WOZ\data\316_P
spectrogram_cleaned_302_AUDIO
spectrogram_cleaned_302_AUDIO_FREQ
spectrogram_cleaned_302_AUDIO_TIME
spectrogram_cleaned_preprocessed_302_AUDIO
spectrogram_preprocessed_302_AUDIO_FREQ
spectrogram_prepr