In [71]:
import numpy as np
import os

In [72]:
import sys
sys.path.insert(1, '../src/modeling')

from vq_vae import VQ_VAE
from train_vq import *

In [73]:
import sys
sys.path.insert(1, '../src')

from generate import *

In [74]:
SPECTROGRAMS_SAVE_DIR = "./data/processed/maestro_spectrograms/"
SPECTROGRAMS_PATH = "../data/processed/maestro_spectrograms/"

LEARNING_RATE = 0.0005
BATCH_SIZE = 128
EPOCHS = 150

In [75]:
def load_maestro(path):
    x_train = []
    file_paths = []
    
    for root, _, file_names in os.walk(path):
        for file_name in file_names:
            if file_name.endswith(".npy"):
                file_path = os.path.join(root, file_name)
                spectrogram = np.load(file_path) # (n_bins, n_frames, 1)
                x_train.append(spectrogram)
                file_paths.append(file_path)
    
    x_train = np.array(x_train)
    x_train = x_train[..., np.newaxis] # -> (3000, 256, 64, 1)

    return x_train, file_paths

In [76]:
x_train, _ = load_maestro(SPECTROGRAMS_PATH)

In [77]:
print(x_train.shape)

(163, 256, 1801, 1)


In [78]:
data_variance = np.var(x_train / 255.0)

In [79]:
VQVAE = VQ_VAE(
    input_shape=(256, x_train.shape[2], 1),
    conv_filters=(512, 256, 128, 64, 32),
    conv_kernels=(3, 3, 3, 3, 3),
    conv_strides=(2, 2, 2, 2, (2, 1)),
    data_variance=data_variance,
    embeddings_size=256,
    latent_space_dim=128
)
# VQVAE.summary()


In [80]:
VQVAE.compile(LEARNING_RATE)

In [81]:
VQVAE.train(x_train, BATCH_SIZE, EPOCHS)

Epoch 1/150


ValueError: Dimensions must be equal, but are 1801 and 1808 for '{{node sub}} = Sub[T=DT_FLOAT](data, vq_vae_6_1/decoder_1/sigmoid_output_1/Sigmoid)' with input shapes: [?,256,1801,1], [?,256,1808,1].

In [35]:
VQVAE.save("../../model/vq_vae_maestro2011")

Model saved successfully in folder: ../../model/vq_vae_maestro2011


## Generate


In [36]:
sound_generator = SoundGenerator(VQVAE, HOP_LENGTH)

In [37]:
MIN_MAX_VALUES_PATH = "../data/raw/maestro-v3.0.0/2011/min_max_values.pkl"
SAVE_DIR_ORIGINAL = "../samples/vq_vae_maestro2011/original/"
SAVE_DIR_GENERATED = "../samples/vq_vae_maestro2011/generated/"

In [38]:
# Load spectrograms + min max values
with open(MIN_MAX_VALUES_PATH, "rb") as f:
    min_max_values = pickle.load(f)
specs, file_paths = load_fsdd(SPECTROGRAMS_PATH)


In [39]:
# Sample spectrograms + min max values

file_paths_selected = file_paths

sampled_indexes = np.random.choice(range(len(specs)), 5)
sampled_spectrogrmas = specs[sampled_indexes]

file_paths_selected = [file_paths_selected[index] for index in sampled_indexes]
file_paths_selected =  list(map(lambda st: str.replace(st, "\\", "/"), file_paths_selected))
file_paths_selected =  list(map(lambda st: str.replace(st, "..", "."), file_paths_selected))

sampled_min_max_values = [min_max_values[file_path] for file_path in file_paths_selected]

print(file_paths_selected)
print(sampled_min_max_values)

['./data/processed/maestro_spectrograms/MIDI-Unprocessed_09_R1_2011_MID--AUDIO_R1-D3_12_Track12_wav.wav.npy', './data/processed/maestro_spectrograms/MIDI-Unprocessed_23_R1_2011_MID--AUDIO_R1-D9_03_Track03_wav.wav.npy', './data/processed/maestro_spectrograms/MIDI-Unprocessed_10_R1_2011_MID--AUDIO_R1-D4_05_Track05_wav.wav.npy', './data/processed/maestro_spectrograms/MIDI-Unprocessed_01_R1_2011_MID--AUDIO_R1-D1_06_Track06_wav.wav.npy', './data/processed/maestro_spectrograms/MIDI-Unprocessed_16_R2_2011_MID--AUDIO_R2-D4_09_Track09_wav.wav.npy']
[{'min': -84.00179, 'max': -4.001792}, {'min': -83.94058, 'max': -3.9405813}, {'min': -88.7076, 'max': -8.707606}, {'min': -48.742622, 'max': 31.257378}, {'min': -82.653946, 'max': -2.6539454}]


In [40]:
# Generate audio for sampled spectrograms
signals, _ = sound_generator.generate(sampled_spectrogrmas, sampled_min_max_values)

original_signals = sound_generator.convert_spectrograms_to_audio(sampled_spectrogrmas, sampled_min_max_values)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step


In [41]:
save_signals(signals, SAVE_DIR_GENERATED)
save_signals(original_signals, SAVE_DIR_ORIGINAL)