In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import librosa

import shelve
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops

import matplotlib

matplotlib.use('Agg')
import matplotlib.pyplot as plt
import librosa
from tqdm import tqdm
import pretty_midi

from os import listdir
from os.path import isfile, join
import sys
sys.path.append('..')

from Pre_Production.Midi_Pre_Processor import *
from Pre_Production.Model_Generator import *
from Shared_Files.Music_Pallete import *
from Pre_Production.Model_Generator import *
from Pre_Production.Music_Translation import *

In [2]:
pre_processor_obj = None
pre_processor_shelve = shelve.open(ABS_PATHS.SHELVES_PATH
                                   + SHELVE_NAMES.PRE_PROCESSOR)

# Check to see if the object already exists
if "pre_processor" in pre_processor_shelve.keys():
    print("Found stored pre processor!")
    pre_processor_obj = pre_processor_shelve["pre_processor"]

# Pre-processor not found generate pre-processor
else:

    print("Generating pre processor!")
    pre_processor_obj = MidiPreProcessor(
        ABS_PATHS.TRAINING_DATASET_DIRECTORY_PATH)

    pre_processor_shelve["pre_processor"] = pre_processor_obj

pre_processor_shelve.close()

instrument_name_contains = {"Guitar": False,
                            "Piano": False,
                            "Brass": False,
                            "Synth": False,
                            "Drums": True}

all_instruments = pre_processor_obj.return_all_instruments()
instr_note_pairs_dict = pre_processor_obj.return_instr_note_pairs_dict()


print("Synthesizing wanted instr/note pairs...")
instr_wave_forms = get_instr_wave_forms(instrument_name_contains=instrument_name_contains,
                                        all_instruments=all_instruments,
                                        instr_note_pairs_dict=instr_note_pairs_dict,
                                        unique_matrix=True)


inst_waves_list = []
for instr, waves in instr_wave_forms.items():
    print("instr:{0} Matrix_Shape: {1}".format(instr, waves.shape))
    inst_waves_list.append(waves)

Found stored pre processor!


  0%|          | 0/5 [00:00<?, ?it/s, Guitar]

Synthesizing wanted instr/note pairs...


100%|██████████| 5/5 [01:40<00:00, 20.76s/it, Drums]


instr:Guitar Matrix_Shape: (529, 63063)
instr:Piano Matrix_Shape: (584, 63063)
instr:Brass Matrix_Shape: (456, 63063)
instr:Synth Matrix_Shape: (1260, 63063)
instr:Drums Matrix_Shape: (182, 63063)


In [3]:
INSTRUMENTS_NUM = len(inst_waves_list)

In [4]:
def mulaw(x, MU):
    return tf.sign(x) * tf.log(1. + MU * tf.abs(x)) / tf.log(1. + MU)

def inv_mulaw(x, MU):
    return tf.sign(x) * (1. / MU) * (tf.pow(1. + MU, tf.abs(x)) - 1.)
    
def naive_wavenet(inputs, condition, layers, h_filters, out_filters, name='naive_wavenet', reuse=False):
    with tf.variable_scope(name, reuse=reuse):
        
        outputs = tf.pad(inputs, [[0, 0], [1, 0], [0, 0]])
        outputs = tf.layers.conv1d(inputs=outputs, filters=h_filters, 
                                       kernel_size=2, dilation_rate=1, use_bias=False)
        dilation_sum = 1
        skips = []

        for layer in range(layers):
            dilation = 2 ** layer
            dilation_sum += dilation
            layer_outputs = tf.pad(outputs, [[0, 0], [dilation, 0], [0, 0]])
            filter_outputs = tf.layers.conv1d(inputs=layer_outputs, filters=h_filters, 
                                       kernel_size=2, dilation_rate=dilation, use_bias=False)
            gate_outputs = tf.layers.conv1d(inputs=layer_outputs, filters=h_filters, 
                                       kernel_size=2, dilation_rate=dilation, use_bias=False)
            if condition is not None:
                filter_condition = tf.layers.dense(condition, h_filters)
                gate_condition = tf.layers.dense(condition, h_filters)
            else:
                filter_condition = 0
                gate_condition = 0

            layer_outputs = tf.nn.tanh(filter_outputs + filter_condition) * \
                            tf.nn.sigmoid(gate_outputs + gate_condition)

            residual = tf.layers.dense(layer_outputs, h_filters)
            outputs += residual

            skip = tf.layers.dense(layer_outputs, h_filters)
            skips.append(skip)

        outputs = tf.nn.relu(sum(skips))
        outputs = tf.layers.dense(outputs, out_filters, activation=tf.nn.relu)
        outputs = tf.layers.dense(outputs, out_filters, activation=None)

    return dilation_sum, outputs

def downsample(inputs, pool_size, channel):
    #pad_size = (pool_size - 1) - (tf.shape(inputs)[1] - pool_size + 1) % pool_size
    #outputs = tf.pad(inputs, [[0, 0], [pad_size, 0], [0, 0]])
    outputs = tf.layers.average_pooling1d(inputs=inputs, pool_size=pool_size, strides=pool_size)
    #outputs = tf.reshape(outputs, [tf.shape(outputs)[0], tf.shape(outputs)[1], channel])
    pad_size = 1
    return pad_size, outputs

def upsample(inputs, output_size, channel):
    outputs = tf.expand_dims(inputs, axis=1)
    outputs = tf.image.resize_nearest_neighbor(outputs, [1, output_size])
    outputs = tf.squeeze(outputs, axis=1)
    outputs = tf.reshape(outputs, [tf.shape(outputs)[0], tf.shape(outputs)[1], channel])
    return outputs[:, -output_size:]

def domain_confusion(inputs, layers, domain_num, h_filters):
    outputs = inputs
    for layer in range(layers):
        dilation = 2 ** layers
        outputs = tf.layers.conv1d(inputs=outputs, filters=h_filters, kernel_size=2, 
                                   dilation_rate=1, activation=tf.nn.elu)
    
    outputs = tf.layers.dense(outputs, domain_num, activation=tf.nn.tanh)
    outputs = tf.layers.dense(outputs, domain_num)
    return outputs

print('done')

done


In [5]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
from tensorflow.python.framework import ops


class FlipGradientBuilder(object):
    def __init__(self):
        self.num_calls = 0

    def __call__(self, x, l=1.0):
        grad_name = "FlipGradient%d" % self.num_calls
        @ops.RegisterGradient(grad_name)
        def _flip_gradients(op, grad):
            return [tf.negative(grad) * l]
        
        g = tf.get_default_graph()
        with g.gradient_override_map({"Identity": grad_name}):
            y = tf.identity(x)
            
        self.num_calls += 1
        return y
    
flip_gradient = FlipGradientBuilder()

In [6]:
# hyper-parameters
MU = 256
LATENT_DIM = 64
POOL_SIZE = 400


In [9]:
tf.reset_default_graph()

'''

INPUT LAYER

'''
# wave input
x_holder = tf.placeholder(dtype=tf.float32, shape=[None, None])
x_mulaw = mulaw(x_holder, MU)
x_onehot_index = tf.clip_by_value(tf.cast((x_mulaw + 1.) * 0.5 * MU, tf.int32), 0, MU - 1)
x_onehot = tf.one_hot(x_onehot_index, depth=MU)

# label input
label_holder = tf.placeholder(dtype=tf.int32, shape=())

'''

ENCODER LAYER

'''

# encode
_, latents = naive_wavenet(inputs=tf.expand_dims(x_holder, axis=-1), condition=None, 
                           layers=9, h_filters=64, out_filters=LATENT_DIM, name='wavenet_encoder')

# downsample
_, down_latents = downsample(latents, POOL_SIZE, LATENT_DIM)

# upsample
up_latents = upsample(down_latents, tf.shape(x_holder)[1], LATENT_DIM)

'''

DOMAIN CONFUSION LAYER

'''

# gradient reversal layer
flipped_down_latents = flip_gradient(down_latents)
#flipped_down_latents = down_latents

# domain predict
label_predicts = domain_confusion(flipped_down_latents, 3, INSTRUMENTS_NUM, 128)
label_predicts = tf.reduce_mean(label_predicts, axis=1)
label_predicts_prob = tf.nn.softmax(label_predicts)
label_tiled = tf.tile(tf.expand_dims(label_holder, axis=0), [tf.shape(label_predicts)[0]])

# loss
domain_confusion_loss = tf.losses.sparse_softmax_cross_entropy(labels=label_tiled, logits=label_predicts)

'''

DECODER LAYER for traininng

'''
decode_losses = []
samples_list = []
for instrument_index in range(INSTRUMENTS_NUM):
    # decode
    dilation_sum, outputs = naive_wavenet(inputs=x_onehot, condition=up_latents, 
                                          layers=9, h_filters=64, out_filters=MU, 
                                          name='wavenet_decoder_' + str(instrument_index))
    outputs_probs = tf.nn.softmax(outputs)
    
    # sample from outputs
    dist = tf.distributions.Categorical(probs=outputs_probs)
    samples = inv_mulaw(tf.cast(dist.sample(), tf.float32) / MU * 2. - 1., MU)
    samples_list.append(samples)

    # loss
    decode_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=x_onehot_index[:, dilation_sum + 1:],
                                                                 logits=outputs[:, dilation_sum:-1])
    decode_loss = tf.reduce_mean(decode_loss)
    decode_losses.append(decode_loss)

decode_losses = tf.stack(decode_losses, axis=0) * tf.one_hot(label_holder, depth=INSTRUMENTS_NUM)
decode_losses = tf.reduce_mean(decode_losses)

loss = decode_losses + 1e-2 * domain_confusion_loss
train_step = tf.train.AdamOptimizer(1e-3).minimize(loss)

'''

DECODER LAYER for inference

'''

# input for decoder
latents_holder = tf.placeholder(dtype=tf.float32, shape=[None, None, LATENT_DIM])

inference_sample_list = []

for instrument_index in range(INSTRUMENTS_NUM):
    # decode
    _, outputs = naive_wavenet(inputs=x_onehot, condition=latents_holder, 
                               layers=9, h_filters=64, out_filters=MU, 
                               name='wavenet_decoder_' + str(instrument_index), reuse=True)
    outputs_probs = tf.nn.softmax(outputs)

    # sample from outputs
    dist = tf.distributions.Categorical(probs=outputs_probs[:, -1])
    sample = inv_mulaw(tf.cast(dist.sample(), tf.float32) / MU * 2. - 1., MU)
    inference_sample_list.append(sample)

'''

SESSION CREATE

'''

config = tf.ConfigProto(
        device_count = {'GPU': 0}
    )
sess = tf.Session(config=config)

#sess = tf.Session()
sess.run(tf.global_variables_initializer())

saver = tf.train.Saver()
# Restore variables from disk.
saver.restore(sess, ABS_PATHS.SAVED_MODELS_PATH_LYREBIRD_TN + "/model.ckpt-" + str(115))
print("Model restored.")

print('Tensorflow graph created.')


INFO:tensorflow:Restoring parameters from /home/eric/Desktop/LyreBird/Data_Dump/Saved_Models/LyreBird_TN/model.ckpt-119


NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key wavenet_decoder_0/conv1d/kernel not found in checkpoint
	 [[node save/RestoreV2 (defined at <ipython-input-9-68a9a7f35281>:120)  = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]

Caused by op 'save/RestoreV2', defined at:
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
    self._run_once()
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
    handle._run()
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-68a9a7f35281>", line 120, in <module>
    saver = tf.train.Saver()
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1102, in __init__
    self.build()
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1114, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1151, in _build
    build_save=build_save, build_restore=build_restore)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 795, in _build_internal
    restore_sequentially, reshape)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 406, in _AddRestoreOps
    restore_sequentially)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 862, in bulk_restore
    return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1466, in restore_v2
    shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/home/eric/anaconda3/envs/LyreBird/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

NotFoundError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key wavenet_decoder_0/conv1d/kernel not found in checkpoint
	 [[node save/RestoreV2 (defined at <ipython-input-9-68a9a7f35281>:120)  = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]


In [None]:
def pitch_shift(inputs, start_index, end_index, n_steps):
    shifted = librosa.effects.pitch_shift(inputs[start_index:end_index], 8000, n_steps)
    outputs = np.concatenate([inputs[:start_index], shifted, inputs[end_index:]], axis=0)
    return outputs

def wave_augmentation(inputs):
    length = np.random.randint(2000, 4000, 1)[0]
    start_index = np.random.randint(0, len(inputs) - length, 1)[0]
    end_index = start_index + length
    n_steps = float(np.random.ranf(1)[0] - 0.5)
    return pitch_shift(inputs, start_index, end_index, n_steps)

In [None]:
from IPython.display import clear_output

while(True):
    for i in range(1):
        for instrument_index in range(INSTRUMENTS_NUM):
            batch_size = 3
            indexes = np.random.randint(0, inst_waves_list[instrument_index].shape[0], batch_size)
            augmented = []
            for _wave in inst_waves_list[instrument_index][indexes]:
                augmented.append(wave_augmentation(_wave))
            augmented = np.stack(augmented, axis=0)
            _, _loss = sess.run([train_step, loss], feed_dict={x_holder: augmented,
                                                     label_holder: instrument_index})
            print(i, _loss)
        
    clear_output()
    
    inst_waves_test = []
    for instrument_index in range(INSTRUMENTS_NUM):
        index = np.random.randint(0, inst_waves_list[instrument_index].shape[0], 1)
        inst_waves_test.append(inst_waves_list[instrument_index][index])
    
    inst_waves_test = np.vstack(inst_waves_test)
    _down_latents, _label_predicts_prob, _samples_list = sess.run([down_latents, label_predicts_prob, samples_list], 
                                                   feed_dict={x_holder: inst_waves_test})
    
    for instrument_index1 in range(INSTRUMENTS_NUM):
        for instrument_index2 in range(INSTRUMENTS_NUM):
            plt.figure(figsize=[18, 3])
            plt.plot(inst_waves_test[instrument_index1], alpha=0.5)
            plt.plot(_samples_list[instrument_index1][instrument_index2], alpha=0.5)
            plt.ylim([-1., 1.])
            plt.show()
    
    for _down_latent, inst_waves_test1 in zip(_down_latents, inst_waves_test):
        plt.figure(figsize=[18, 3])
        plt.plot(inst_waves_test1)
        plt.show()
        
        plt.figure(figsize=[18, 3])
        plt.plot(_down_latent)
        plt.show()
        
    plt.figure(figsize=[18, 3])
    plt.plot(_label_predicts_prob[0, :])
    plt.ylim([0., 1.])
    plt.show()
    

In [None]:
src_instrument_index = 2
dest_instrument_index = 0

In [None]:
index = np.random.randint(0, inst_waves_list[src_instrument_index].shape[0], 1)

In [None]:
_src = inst_waves_list[src_instrument_index][index]

_latents = sess.run(up_latents, feed_dict={x_holder: _src})

print(_latents.shape)

plt.figure(figsize=[18, 2])
plt.plot(_src[0])
plt.show()

plt.figure(figsize=[18, 2])
plt.plot(_latents[0])
plt.show()

import IPython.display as ipd
ipd.Audio(_src[0], rate=8000) # load a NumPy array

In [None]:
from tqdm import tqdm
from IPython.display import clear_output

_samples = np.zeros([1, 1024])
_latents = np.concatenate([np.zeros([1, 1024, LATENT_DIM]), _latents], axis=1)
for i in tqdm(range(T)):
    _inference_sample_list = sess.run(inference_sample_list, feed_dict={x_holder: _samples[:, -1024:], 
                                                                        latents_holder: _latents[:, i:i + 1024]})
    _samples = np.concatenate([_samples, np.expand_dims(_inference_sample_list[dest_instrument_index], axis=0)], axis=-1)
    if i % 200 == 0:
        clear_output()
        plt.plot(_src[0, :i])
        plt.show()
        
        plt.plot(_samples[0, 1024:])
        plt.show()

print(_samples.shape)


In [None]:
import IPython.display as ipd
ipd.Audio(_src[0], rate=8000) # load a NumPy array


In [None]:
ipd.Audio(_samples[0], rate=8000) # load a NumPy array