In [1]:
import sys
sys.path.append('../')

In [2]:
import random

import numpy as np
from IPython.display import Audio, display

from rhythm.models import build_cond_mlp_enc, build_cond_mlp_dec, RhythmVAE
from rhythm.preprocessing import LEN_SEQ, N_DRUMS
from rhythm.util import create_drum_pretty_midi

In [3]:
training_data_path = '../../data/rhythm/groove_v2_nob2_qnr4_nod8_mo8.npz'

data = np.load(training_data_path)
onset_matrices = data['drum_onset_matrices']
vel_matrices = data['drum_vel_matrices']
print(onset_matrices.shape)
print(vel_matrices.shape)

(20204, 32, 8)
(20204, 32, 8)


In [4]:
n_samples = 4
bpm = 120.0
sample_indices = random.sample(range(onset_matrices.shape[0]), n_samples)
print(sample_indices)

for idx in sample_indices:
    onset_matrix = onset_matrices[idx, :, :]
    vel_matrix = vel_matrices[idx, :, :]

    pm = create_drum_pretty_midi(onset_matrix, vel_matrix, bpm=bpm)
    display(Audio(pm.fluidsynth(), rate=44100))

[2616, 998, 6907, 6696]


In [5]:
n_z = 128
len_seq = LEN_SEQ
n_notes = N_DRUMS
dropout = 0.4

In [6]:
model_path = '../../models/vae_mlp_cond_nz128_do40_e40_vl0.1519.h5'
enc = build_cond_mlp_enc(len_seq, n_notes, n_z, dropout)
dec = build_cond_mlp_dec(len_seq, n_notes, n_z, dropout)
vae = RhythmVAE(enc, dec)
vae.predict([
    np.ones((1, len_seq, n_notes)),
    np.ones((1, len_seq, n_notes)),
    np.ones((1, n_notes))
])
vae.load_weights(model_path)

Model: "cond_encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
onset_input (InputLayer)        [(None, 32, 8)]      0                                            
__________________________________________________________________________________________________
vel_input (InputLayer)          [(None, 32, 8)]      0                                            
__________________________________________________________________________________________________
flatten (Flatten)               (None, 256)          0           onset_input[0][0]                
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 256)          0           vel_input[0][0]                  
_______________________________________________________________________________________

In [7]:
n_samples = 8
bpm = 120.0
cond = np.zeros((1, n_notes))
# Uncomment to condition different drums
# cond[0, 4] = 3.0
# cond[0, 6] = 3.0
# cond[0, 6] = 0.0
# cond[0, 7] = 2.0
# cond[0, 7] = 2.0
# cond[0, 0] = -2.0
# cond[0, 1] = -2.0


for _ in range(n_samples):
    random_z = np.random.normal(0.0, 1.0, (1, n_z))
    onset_m, vel_m = dec.predict([random_z, cond])
    onset_m = np.squeeze(onset_m)
    vel_m = np.squeeze(vel_m)
    
    # Use maximum volume for onsets
    pm = create_drum_pretty_midi(onset_m, np.ones((len_seq, n_notes)), bpm=bpm)
    display(Audio(pm.fluidsynth(), rate=44100))