In [77]:
import numpy as np
from tqdm import tqdm

import models
import preprocessing
import score_utils


In [226]:
scores = preprocessing.get_score_tensors(n=1000)

Loading score tensors from disk


100%|██████████| 1001/1001 [00:00<00:00, 2959.26it/s]


In [228]:
metas = preprocessing.get_metadata_tensors(n=1000)

Loading metadata tensors from disk


100%|██████████| 1001/1001 [00:00<00:00, 1624.36it/s]


In [229]:
metas[0].shape

(5, 720, 18)

In [239]:
all_notes = np.hstack(np.asarray([np.ravel(score) for score in scores]))

In [241]:
all_notes.shape

(3257408,)

In [6]:
max_pitch = np.max([np.max(t) for t in scores])
min_pitch = np.min([np.min(t[t > 0]) for t in scores])

In [10]:
min_pitch = 31.0
max_pitch = 88.0

In [11]:
preprocessing.make_input_sequence(scores[0], metas[0], 0, max_pitch).shape

(720, 4, 32, 5, 3)

In [12]:
model = models.conv2dlstm_model()
model.load_weights('./weights/conv2dlstm.weights.best.hdf5')
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
conv_lst_m2d_2_input (InputLaye (None, None, 32, Non 0                                            
__________________________________________________________________________________________________
conv_lst_m2d_2 (ConvLSTM2D)     (None, None, 32, Non 40448       conv_lst_m2d_2_input[0][0]       
__________________________________________________________________________________________________
max_pooling3d_1 (MaxPooling3D)  (None, None, 16, Non 0           conv_lst_m2d_2[0][0]             
__________________________________________________________________________________________________
conv_lst_m2d_3 (ConvLSTM2D)     (None, 16, None, 64) 221440      max_pooling3d_1[0][0]            
__________________________________________________________________________________________________
global_ave

In [13]:
score_length = 4 * 16  # let's start with 4 bars
score_length

64

In [14]:
# The model is conditioned on a fixed beat pattern
beats = np.zeros((score_length, 16))
for i in range(score_length):
    beats[i, i % 16] = 1
beats

array([[ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  1.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  1., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  1.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  1.]])

In [15]:
# Let's first try a score with 5 voices
n_voices = 5
sequence_steps = 4
window_size = 32
gibbs_steps = 100


In [63]:
def sample_softmax(preds, temperature=1.0):
    # taken from https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py
    # helper function to sample an index from a probability array
    preds = preds.astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [64]:
def sample_softmax_multi(preds, temperature=1.0):
    # taken from https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py
    # helper function to sample an index from a probability array
    preds = preds.astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds, axis=1).reshape((-1, 1))
    probas = np.apply_along_axis(lambda pred: np.random.multinomial(1, pred, 1), 1, preds)
    return probas.argmax(axis=-1)

In [65]:
score = np.zeros((n_voices, score_length))
meta = np.zeros((n_voices, score_length, 18))
for i in range(n_voices):
    meta[i, :, 2:] = beats

In [119]:
def sample_1(n_voices, score_length, gibbs_steps, temperature):
    score = np.zeros((n_voices, score_length))
    meta = np.zeros((n_voices, score_length, 18))
    beats = np.zeros((score_length, 16))
    for i in range(score_length):
        beats[i, i % 16] = 1
    for i in range(n_voices):
        meta[i, :, 2:] = beats
    for step in tqdm(range(gibbs_steps)):
        for time_step in range(score_length):
            for voice in range(n_voices):
                input_ = preprocessing.make_input_sequence(score, meta, voice, max_pitch)
                note, slur = model.predict([
                    input_[time_step:time_step+1], beats[time_step:time_step+1]])
                sampled_note = sample_softmax(note[0], temperature)
                slur_proba = np.squeeze(np.array([1 - slur[0], slur[0]]))
                sampled_slur = sample_softmax(slur_proba, temperature)
                if sampled_note > 0:
                    score[voice, time_step] = sampled_note + min_pitch - 1
                else:
                    meta[voice, time_step, score_utils.idx_rest] = 1
                meta[voice, time_step, score_utils.idx_slur] = sampled_slur
    return score, meta

In [242]:
def sample_2(n_voices, score_length, gibbs_steps, temperature):
    score = np.random.choice(all_notes, size=(n_voices, score_length))
    meta = np.zeros((n_voices, score_length, 18))
    beats = np.zeros((score_length, 16))
    for i in range(score_length):
        beats[i, i % 16] = 1
    for i in range(n_voices):
        meta[i, :, 2:] = beats
    for step in tqdm(range(gibbs_steps)):
        for voice in range(n_voices):
            input_ = preprocessing.make_input_sequence(score, meta, voice, max_pitch)
            note, slur = model.predict([
                input_, beats])
            sampled_notes = np.squeeze(sample_softmax_multi(note, temperature))
            slur_proba = np.hstack([1 - slur, slur])
            sampled_slurs = np.squeeze(sample_softmax_multi(slur_proba, temperature))
            
            score[voice] = sampled_notes + ((sampled_notes > 1) * (min_pitch - 1))
            meta[voice, :, score_utils.idx_rest] = sampled_notes == 0
            meta[voice, :, score_utils.idx_slur] = sampled_slurs
    return score, meta

In [243]:
def sample_conditioned(score_template, meta_template, voices, gibbs_steps, temperature):
    score = np.zeros(score_template.shape)
    meta = np.zeros(meta_template.shape)
    
    for voice in range(score.shape[0]):
        if voice not in voices:
            score[voice] = score_template[voice]
            meta[voice] = meta_template[voice]
        
    for time_step in tqdm(range(score.shape[1])):
        for _ in range(gibbs_steps):
            for voice in voices:
                input_ = preprocessing.make_input_sequence(
                    score, meta, voice, max_pitch)
                beats = preprocessing.make_input_beat(meta, voice)
                note, slur = model.predict([
                    input_[time_step:time_step+1], beats[time_step:time_step+1]])
                sampled_note = sample_softmax(note[0], temperature)
                slur_proba = np.squeeze(np.array([1 - slur[0], slur[0]]))
                sampled_slur = sample_softmax(slur_proba, temperature)
                if sampled_note > 0:
                    score[voice, time_step] = sampled_note + min_pitch - 1
                else:
                    meta[voice, time_step, score_utils.idx_rest] = 1
                meta[voice, time_step, score_utils.idx_slur] = sampled_slur
    return score, meta

In [244]:
def sample_conditioned_2(score_template, meta_template, voices, gibbs_steps, temperature):
    score = np.zeros(score_template.shape)
    meta = np.zeros(meta_template.shape)
    
    for voice in range(score.shape[0]):
        if voice not in voices:
            score[voice] = score_template[voice]
            meta[voice] = meta_template[voice]
            
    for _ in tqdm(range(gibbs_steps)):
        for voice in voices:
            input_ = preprocessing.make_input_sequence(score, meta, voice, max_pitch)
            beats = preprocessing.make_input_beat(meta, voice)
            note, slur = model.predict([
                input_, beats])
            sampled_notes = np.squeeze(sample_softmax_multi(note, temperature))
            slur_proba = np.hstack([1 - slur, slur])
            sampled_slurs = np.squeeze(sample_softmax_multi(slur_proba, temperature))
            
            score[voice] = sampled_notes + ((sampled_notes > 1) * (min_pitch - 1))
            meta[voice, :, score_utils.idx_rest] = sampled_notes == 0
            meta[voice, :, score_utils.idx_slur] = sampled_slurs
    return score, meta

In [245]:
score, meta = sample_2(4, 64, 500, 1.0)

100%|██████████| 500/500 [00:39<00:00, 12.64it/s]


In [246]:
# score, meta = sample_2(scores[0], metas[0], [1], 50, 1.1)

In [247]:
idx = -1
voice = 4
a = preprocessing.make_input_sequence(scores[idx], metas[idx], voice, 88.0)
b = preprocessing.make_input_beat(metas[idx], voice)
notes, slurs = model.predict([a, b])
n = np.argmax(notes, axis=-1)
np.mean(scores[idx][voice] == (n + ((n > 1) * (min_pitch - 1))))

IndexError: index 4 is out of bounds for axis 2 with size 4

In [None]:
(n + ((n > 1) * (min_pitch - 1))) - scores[idx][voice]

In [250]:
music = score_utils.tensor_to_score(score, meta)

[[ 0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5  7.
   7.5  0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5
   7.   7.5  0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.
   6.5  7.   7.5  0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5
   6.   6.5  7.   7.5]
 [ 0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5  7.
   7.5  0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5
   7.   7.5  0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.
   6.5  7.   7.5  0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5
   6.   6.5  7.   7.5]
 [ 0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5  7.
   7.5  0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.   6.5
   7.   7.5  0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5  6.
   6.5  7.   7.5  0.   0.5  1.   1.5  2.   2.5  3.   3.5  4.   4.5  5.   5.5
   6.   6.5  7.   7.5]
 [ 0.   0.5  

In [251]:
music

<music21.stream.Score 0x7f374d45a860>

In [252]:
music.show('midi')

In [248]:
score.shape

(4, 64)

In [249]:
meta[2, :, 0]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
        1.,  1.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  0.,  0.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,
        0.,  1.,  1.,  1.,  0.,  1.,  1.,  1.,  0.,  1.,  1.,  0.])