In [1]:
import sys
sys.path.append("..")

from src.data.utils.spectrogram_normalizer import SpectrogramNormalizer
from src.data.utils.label_normalizer import LabelNormalizer
from src.data.datasets.melody_dataset import MelodyDataset
from src.data.datasets.audio_dataset import AudioDataset

In [2]:
train_dataset = AudioDataset.from_path("../datasets/melody_extraction/processed/train")
pipeline = train_dataset.pipeline

Slicing audio: 100%|██████████| 103/103 [00:07<00:00, 14.04it/s]


In [3]:
spectrograms = []

for a in train_dataset.audio:
    
    a.trim_silence()
    a = pipeline._preprocess_audio(a)

    spectrogram = pipeline._get_spectrogram(a)
    spectrogram = pipeline.amplitude_to_db(spectrogram.spectrogram)

    spectrograms.append(spectrogram)

In [5]:
normalizer1 = SpectrogramNormalizer()
normalizer1.fit(spectrograms)

print(f"Вычисленное среднее: {normalizer1.mean}")
print(f"Вычисленное стандартное отклонение: {normalizer1.std}")

normalizer2 = SpectrogramNormalizer(
    mean=normalizer1.mean,
    std=normalizer1.std
)
normalizer2.fit(
    spectrograms=[
        normalizer1.transform(spectrogram)
        for spectrogram in spectrograms
    ],
)

print(f"Среднее после нормализации: {normalizer2.mean}")
print(f"Стандартное отклонение после нормализации: {normalizer2.std}")

Calculating mean and std: 100%|██████████| 4/4 [00:02<00:00,  1.42it/s]


Вычисленное среднее: -1.0866488218307495
Вычисленное стандартное отклонение: 17.541074344830257


Calculating mean and std: 100%|██████████| 4/4 [00:02<00:00,  1.88it/s]

Среднее после нормализации: -9.405615841728832e-09
Стандартное отклонение после нормализации: 0.9999999997562181





In [2]:
train_dataset = MelodyDataset.from_path("../datasets/melody_extraction/processed/train")
pipeline = train_dataset.pipeline

Slicing audio and melody: 100%|██████████| 103/103 [00:08<00:00, 11.52it/s]


In [3]:
labels = [pipeline._get_label(m) for m in train_dataset.sliced_melody]

In [4]:
label_normalizer = LabelNormalizer()
label_normalizer.fit_from_labels(labels)

print(f"freqs_min = {label_normalizer.freq_min}")
print(f"freqs_max = {label_normalizer.freq_max}")
print(f"durations_min = {label_normalizer.dur_min}")
print(f"durations_max = {label_normalizer.dur_max}")
print(f"seq_len_min = {label_normalizer.seq_len_min}")
print(f"seq_len_max = {label_normalizer.seq_len_max}")

freqs_min = 0.0
freqs_max = 1567.981689453125
durations_min = 0.25
durations_max = 10.0
seq_len_min = 3
seq_len_max = 58


In [13]:
labels[42].freqs

tensor([493.8833,   0.0000, 880.0000, 880.0000,   0.0000, 440.0000, 493.8833,
        440.0000, 739.9888])

In [14]:
label = label_normalizer.transform_label(labels[42])
label.freqs

tensor([0.6265, 0.0000, 0.6847, 0.6847, 0.0000, 0.6148, 0.6265, 0.6148, 0.6672])

In [15]:
label = label_normalizer.inverse_transform_label(label)
label.freqs

tensor([493.8834,   0.0000, 879.9999, 879.9999,   0.0000, 439.9999, 493.8834,
        439.9999, 739.9888])