# Entorno

In [1]:
import torch
import librosa
import torchaudio
import numpy as np
from pydub import AudioSegment
from torch.utils.data import Dataset
from scripts.plot import plot_audio_sample
from scripts.extract import load_heart_noised_dict

# Diccionario de relación de datos

In [2]:
heart_noised_dict = load_heart_noised_dict(
    clean_dir="data/heart_sound_test_small",
    noised_dir="data/heart_noised_test_small",
)

In [4]:
heart_noised_dict["data/heart_sound_test_small/val/healthy/e00047.wav"]

['data/heart_noised_test_small/-val-healthy-e00047_4dB_seg_560noise.wav',
 'data/heart_noised_test_small/-val-healthy-e00047_2dB_seg_303noise.wav',
 'data/heart_noised_test_small/-val-healthy-e00047_1dB_seg_326noise.wav',
 'data/heart_noised_test_small/-val-healthy-e00047_4dB_seg_81noise.wav',
 'data/heart_noised_test_small/-val-healthy-e00047_1dB_seg_142noise.wav']

# Cómo suena?

In [5]:
audio_clean = AudioSegment.from_file("data/heart_sound_test_small/val/healthy/e00047.wav")
audio_noisy = AudioSegment.from_file("data/heart_noised_test_small/-val-healthy-e00047_4dB_seg_560noise.wav")

In [6]:
audio_clean

In [7]:
audio_noisy

# Cómo se ve?

In [9]:
clean_array = np.array(audio_clean.get_array_of_samples())
noisy_array = np.array(audio_noisy.get_array_of_samples())

# plot_audio_sample(clean_array, 'Audio Limpio')
# plot_audio_sample(noisy_array, 'Audio con Ruido')

In [10]:
clean_array.shape, noisy_array.shape

((53056,), (53056,))

# Transformations

In [15]:
def normalize_audio(audio):
    mean = audio.mean()
    std = audio.std()
    normalized = (audio - mean) / std
    normalized.unsqueeze_(dim=0)
    return normalized.reshape(1, 1, -1)


def create_spectrogram(audio, sample_rate, n_fft=400, hop_length=160, win_length=400):
    """
    Crea el espectrograma de un audio.

    Args:
        audio (torch.Tensor): Tensor que representa el audio.
        sample_rate (int): Tasa de muestreo del audio.
        n_fft (int): Tamaño de la ventana para la transformada de Fourier de tiempo corto.
        hop_length (int): Desplazamiento entre ventanas sucesivas en muestras.
        win_length (int): Tamaño de la ventana de análisis en muestras.

    Returns:
        torch.Tensor: Espectrograma del audio.
    """
    specgram = torchaudio.transforms.MelSpectrogram(
        sample_rate=sample_rate,
        n_fft=n_fft,
        hop_length=hop_length,
        win_length=win_length,
    )
    return specgram(audio).reshape(1, 1, -1)


def compute_mfcc(audio, sample_rate, n_mfcc=13):
    """
    Calcula los coeficientes cepstrales de frecuencia mel (MFCC) de un audio.

    Args:
        audio (numpy.ndarray): Señal de audio.
        sample_rate (int): Tasa de muestreo del audio.
        n_mfcc (int): Número de coeficientes MFCC a calcular.

    Returns:
        numpy.ndarray: Coeficientes MFCC calculados.
    """
    mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
    mfcc = torch.Tensor(mfcc)
    return mfcc.reshape(1, 1, -1)


def full_transform(audio, sample_rate):
    normalized = normalize_audio(audio)
    spec = create_spectrogram(audio, sample_rate)
    mfcc = compute_mfcc(audio.numpy(), sample_rate)

    joined = torch.cat((normalized, spec, mfcc), dim=2)
    return joined

In [16]:
class CustomAudioDataset(Dataset):
    def __init__(self, data_dict, transform=None):
        self.data_dict = data_dict
        self.transform = transform

    def __len__(self):
        return len(self.data_dict)

    def __getitem__(self, idx):
        clean_audio_path = list(self.data_dict.keys())[idx]
        noisy_audio_paths = self.data_dict[clean_audio_path]
        clean_audio, sample_rate = torchaudio.load(clean_audio_path)

        noisy_audios = []
        for noisy_audio_path in noisy_audio_paths:
            noisy_audio = AudioSegment.from_file(noisy_audio_path)
            noisy_audio = torch.Tensor(noisy_audio.get_array_of_samples())
            noisy_audios.append(noisy_audio)

        noisy_audios = torch.stack(noisy_audios)

        if self.transform:
            clean_audio = self.transform(clean_audio, sample_rate)

            noisy_transformed = []
            for noisy_audio in noisy_audios:
                noisy_audio = self.transform(noisy_audio, sample_rate)
                noisy_transformed.append(noisy_audio)

            noisy_audios = torch.stack(noisy_transformed)

        return clean_audio, noisy_audios

cad = CustomAudioDataset(heart_noised_dict, full_transform)

In [20]:
cad[0][0].shape, cad[0][1].shape

(torch.Size([1, 1, 100438]), torch.Size([5, 1, 1, 100438]))