In [1]:
import os
import pydub
import librosa
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
AMPLITUDE = 32767
MUSIC_DURATION = 30  # Following [1]
NOISE_DURATION = 235 # Following [2]
FRAME_WIDTH = 0.025  # Following [3]
FRAME_SHIFT = 0.010  # Following [3]

def wav2im(sig, rate, out_path):
    S = librosa.feature.melspectrogram(y=sig, sr=rate, hop_length=int(FRAME_SHIFT * rate), n_fft=int(FRAME_WIDTH * rate))

    fig, ax = plt.subplots(1, 1)
    img = librosa.display.specshow(
        librosa.amplitude_to_db(np.abs(S), ref=np.max),
        y_axis='log',
        x_axis='time', ax=ax)
    ax.axis('off')
    ax.axis('tight')
    plt.tight_layout()
    ax.set_axis_off()
    plt.savefig(out_path, bbox_inches='tight', pad_inches=0, dpi=np.abs(S).shape[0], transparent=False)
    plt.close('all')

def overlay_signals(sig1, rate1, sig2, rate2):
    sig1_int32 = (sig1 * AMPLITUDE).astype(np.int16)
    sig2_int32 = (sig2 * AMPLITUDE/10).astype(np.int16)

    asegment1 = pydub.AudioSegment(
        sig1_int32.tobytes(),
        frame_rate=rate1,
        sample_width=2,
        channels=1
    )

    asegment2 = pydub.AudioSegment(
        sig2_int32.tobytes(),
        frame_rate=rate2,
        sample_width=2,
        channels=1
    )

    overlayed = asegment1.overlay(asegment2)
    sig = np.array(overlayed.get_array_of_samples()).reshape(-1,).astype(float) / AMPLITUDE
    rate = overlayed.frame_rate

    return  overlayed, sig, rate


np.random.seed(0)

NOISE_PATH = './noises'
MUSIC_PATH = './genres_original'
OUT_PATH = "./data/spectrograms"
NOISE_TYPES = [
    "factory2"
]
GENRES = os.listdir(MUSIC_PATH)
fmat = []

i = 0
for ndomain, noise_type in enumerate(NOISE_TYPES):
    for nclass, genre in enumerate(GENRES):
        gen_dir = os.path.join(MUSIC_PATH, genre)
        filenames = os.listdir(gen_dir)
        print("Genre: {}, Noise: {}".format(genre, noise_type))
        print("Class: {}, Domain: {}".format(nclass, ndomain))
        for filename in tqdm(filenames):
            try:
                (sig, rate) = librosa.load(os.path.join(gen_dir, filename), mono=True, duration=MUSIC_DURATION)
            except:
                print("Error while reading file {}".format(filename))

            if noise_type is not None:
                (noise, nrate) = librosa.load(os.path.join(NOISE_PATH, noise_type + '.wav'), mono=True, duration=NOISE_DURATION)
                _, sig, rate = overlay_signals(sig1=sig, rate1=rate, sig2=noise, rate2=nrate)

            _noise_type = noise_type if noise_type is not None else 'original'
            wav2im(sig, rate, out_path=os.path.join(OUT_PATH, _noise_type, genre, filename).replace('.wav', '') + '.jpg')

Genre: blues, Noise: factory2
Class: 0, Domain: 0


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [01:31<00:00,  1.09it/s]


Genre: classical, Noise: factory2
Class: 1, Domain: 0


100%|██████████| 100/100 [01:26<00:00,  1.16it/s]


Genre: country, Noise: factory2
Class: 2, Domain: 0


100%|██████████| 100/100 [01:09<00:00,  1.44it/s]


Genre: disco, Noise: factory2
Class: 3, Domain: 0


100%|██████████| 100/100 [01:07<00:00,  1.48it/s]


Genre: hiphop, Noise: factory2
Class: 4, Domain: 0


100%|██████████| 100/100 [01:24<00:00,  1.18it/s]


Genre: jazz, Noise: factory2
Class: 5, Domain: 0


  (sig, rate) = librosa.load(os.path.join(gen_dir, filename), mono=True, duration=MUSIC_DURATION)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error while reading file jazz.00054.wav


100%|██████████| 100/100 [01:18<00:00,  1.27it/s]


Genre: metal, Noise: factory2
Class: 6, Domain: 0


100%|██████████| 100/100 [01:15<00:00,  1.33it/s]


Genre: pop, Noise: factory2
Class: 7, Domain: 0


100%|██████████| 100/100 [01:11<00:00,  1.39it/s]


Genre: reggae, Noise: factory2
Class: 8, Domain: 0


100%|██████████| 100/100 [01:13<00:00,  1.36it/s]


Genre: rock, Noise: factory2
Class: 9, Domain: 0


100%|██████████| 100/100 [01:13<00:00,  1.35it/s]
