In [3]:
import sys
sys.path.insert(1, '../src/modeling')

from vq_vae import VQ_VAE
from train_vq import *

In [4]:
import sys
sys.path.insert(1, '../src')

from generate import *

In [5]:
SPECTROGRAMS_SAVE_DIR = "./data/processed/maestro_spectrograms/"
SPECTROGRAMS_PATH = "../data/processed/maestro_spectrograms/"

LEARNING_RATE = 0.0005
BATCH_SIZE = 64
EPOCHS = 200

In [6]:
def load_maestro(path):
    x_train = []
    file_paths = []
    
    for root, _, file_names in os.walk(path):
        for file_name in file_names:
            if file_name.endswith(".npy"):
                file_path = os.path.join(root, file_name)
                spectrogram = np.load(file_path) # (n_bins, n_frames, 1)
                x_train.append(spectrogram)
                file_paths.append(file_path)
    
    x_train = np.array(x_train)
    x_train = x_train[..., np.newaxis] # -> (3000, 256, 64, 1)

    return x_train, file_paths

In [7]:
x_train, _ = load_maestro(SPECTROGRAMS_PATH)

In [8]:
print(x_train.shape)

(163, 256, 1264, 1)


In [9]:
data_variance = np.var(x_train / 255.0)

In [10]:
VQVAE = VQ_VAE(
    input_shape=(256, x_train.shape[2], 1),
    conv_filters=(256, 128, 64, 32),
    conv_kernels=(3, 3, 3, 3),
    conv_strides=(2, 2, 2, (2, 1)),
    data_variance=data_variance,
    embeddings_size=256,
    latent_space_dim=128
)
VQVAE.summary()





In [14]:
VQVAE.compile(LEARNING_RATE)

In [15]:
import tensorflow as tf

In [16]:
# run_options = tf.compat.v1.RunOptions(report_tensor_allocations_upon_oom=True)

In [17]:
VQVAE.train(x_train, BATCH_SIZE, EPOCHS)

Epoch 1/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 36s/step - reconstruction_loss: 340384.3125 - total_loss: 340668.4375 - vq_loss: 8.5343e-04
Epoch 2/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 56s/step - reconstruction_loss: 336396.3750 - total_loss: 336627.0625 - vq_loss: 0.0013
Epoch 3/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 54s/step - reconstruction_loss: 331797.3125 - total_loss: 332141.6250 - vq_loss: 0.0073
Epoch 4/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 60s/step - reconstruction_loss: 323598.7812 - total_loss: 323641.6875 - vq_loss: 0.0506
Epoch 5/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 42s/step - reconstruction_loss: 311189.0000 - total_loss: 311772.6250 - vq_loss: 0.2821
Epoch 6/200
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 74s/step - reconstruction_loss: 292106.7812 - total_loss: 292981.0000 - vq_loss: 1.3427
Epoch 7/200


In [18]:
VQVAE.save("../../model/vq_vae_maestro2011")

Model saved successfully in folder: ../../model/vq_vae_maestro2011


In [11]:
VQVAE.load("../../model/vq_vae_maestro2011")

An unexpected error occurred: VQ_VAE.__init__() missing 1 required positional argument: 'data_variance'


## Generate


In [19]:
sound_generator = SoundGenerator(VQVAE, HOP_LENGTH)

In [20]:
MIN_MAX_VALUES_PATH = "../data/raw/maestro-v3.0.0/2011/min_max_values.pkl"
SAVE_DIR_ORIGINAL = "../samples/vq_vae_maestro2011/original/"
SAVE_DIR_GENERATED = "../samples/vq_vae_maestro2011/generated/"

In [45]:
# Load spectrograms + min max values
with open(MIN_MAX_VALUES_PATH, "rb") as f:
    min_max_values = pickle.load(f)
specs, file_paths = load_fsdd(SPECTROGRAMS_PATH)


In [46]:
# Sample spectrograms + min max values

file_paths_selected = file_paths

sampled_indexes = np.random.choice(range(len(specs)), 5)
sampled_spectrogrmas = specs[sampled_indexes]

file_paths_selected = [file_paths_selected[index] for index in sampled_indexes]
file_paths_selected =  list(map(lambda st: str.replace(st, "\\", "/"), file_paths_selected))
file_paths_selected =  list(map(lambda st: str.replace(st, "..", "."), file_paths_selected))

sampled_min_max_values = [min_max_values[file_path] for file_path in file_paths_selected]

print(file_paths_selected)
print(sampled_min_max_values)

['./data/processed/maestro_spectrograms/MIDI-Unprocessed_03_R2_2011_MID--AUDIO_R2-D1_06_Track06_wav.wav.npy', './data/processed/maestro_spectrograms/MIDI-Unprocessed_09_R1_2011_MID--AUDIO_R1-D3_15_Track15_wav.wav.npy', './data/processed/maestro_spectrograms/MIDI-Unprocessed_17_R3_2011_MID--AUDIO_R3-D6_04_Track04_wav.wav.npy', './data/processed/maestro_spectrograms/MIDI-Unprocessed_16_R1_2011_MID--AUDIO_R1-D6_13_Track13_wav.wav.npy', './data/processed/maestro_spectrograms/MIDI-Unprocessed_11_R1_2011_MID--AUDIO_R1-D4_07_Track07_wav.wav.npy']
[{'min': -48.496315, 'max': 31.503687}, {'min': -59.46514, 'max': 20.53486}, {'min': -51.82123, 'max': 28.178768}, {'min': -56.189335, 'max': 23.810665}, {'min': -51.204254, 'max': 28.795748}]


In [47]:
# Generate audio for sampled spectrograms
signals, _ = sound_generator.generate(sampled_spectrogrmas, sampled_min_max_values)

original_signals = sound_generator.convert_spectrograms_to_audio(sampled_spectrogrmas, sampled_min_max_values)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 214ms/step


In [48]:
save_signals(signals, SAVE_DIR_GENERATED)
save_signals(original_signals, SAVE_DIR_ORIGINAL)