In [1]:
import sys
sys.path.append("..")

from src.data.utils.spectrogram_normalizer import SpectrogramNormalizer
from src.data.utils.label_normalizer import LabelNormalizer
from src.data.datasets.melody_dataset import MelodyDataset
from src.data.datasets.audio_dataset import AudioDataset

In [2]:
train_dataset = AudioDataset.from_path("../datasets/melody_extraction/processed/train")
pipeline = train_dataset.pipeline

Slicing audio: 100%|██████████| 103/103 [00:18<00:00,  5.45it/s]


In [3]:
spectrograms = []

for a in train_dataset.audio:
    
    a.trim_silence()
    a = pipeline._preprocess_audio(a)

    spectrogram = pipeline._get_spectrogram(a)
    spectrogram = pipeline.amplitude_to_db(spectrogram.spectrogram)

    spectrograms.append(spectrogram)

In [5]:
normalizer1 = SpectrogramNormalizer()
normalizer1.fit(spectrograms)

print(f"Вычисленное среднее: {normalizer1.mean}")
print(f"Вычисленное стандартное отклонение: {normalizer1.std}")

normalizer2 = SpectrogramNormalizer(
    mean=normalizer1.mean,
    std=normalizer1.std
)
normalizer2.fit(
    spectrograms=[
        normalizer1.transform(spectrogram)
        for spectrogram in spectrograms
    ],
)

print(f"Среднее после нормализации: {normalizer2.mean}")
print(f"Стандартное отклонение после нормализации: {normalizer2.std}")

Calculating mean and std: 100%|██████████| 4/4 [00:02<00:00,  1.42it/s]


Вычисленное среднее: -1.0866488218307495
Вычисленное стандартное отклонение: 17.541074344830257


Calculating mean and std: 100%|██████████| 4/4 [00:02<00:00,  1.88it/s]

Среднее после нормализации: -9.405615841728832e-09
Стандартное отклонение после нормализации: 0.9999999997562181





In [2]:
train_dataset = MelodyDataset.from_path("../datasets/melody_extraction/processed/train")
pipeline = train_dataset.pipeline

Slicing audio and melody: 100%|██████████| 103/103 [00:07<00:00, 13.20it/s]


In [6]:
labels = [pipeline._get_label(m) for m in train_dataset.sliced_melody]

In [7]:
label_normalizer = LabelNormalizer()
label_normalizer.fit_from_labels(labels)

print(f"interval_min = {label_normalizer.interval_min}")
print(f"interval_max = {label_normalizer.interval_max}")
print(f"dur_min = {label_normalizer.dur_min}")
print(f"dur_max = {label_normalizer.dur_max}")
print(f"seq_len_min = {label_normalizer.seq_len_min}")
print(f"seq_len_max = {label_normalizer.seq_len_max}")

interval_min = -22.0
interval_max = 26.0
dur_min = 7.856235606595874e-05
dur_max = 9.999990463256836
seq_len_min = 2
seq_len_max = 49


In [24]:
labels[42].intervals

tensor([-inf, inf,  0., -inf, inf,  2., -2.,  9.])

In [25]:
label = label_normalizer.transform_label(labels[42])
label.intervals

tensor([-0.1000,  1.1000,  0.4583, -0.1000,  1.1000,  0.5000,  0.4167,  0.6458])

In [26]:
label = label_normalizer.inverse_transform_label(label)
label.intervals

tensor([-inf, inf,  0., -inf, inf,  2., -2.,  9.])