## A13 DeepComposer

In [37]:
!conda update --all --y 
!pip install tensorflow-gpu==1.14.0
!pip install numpy==1.16.4
!pip install pretty_midi
!pip install pypianoroll==0.5.3
!pip install music21
!pip install seaborn
!pip install --ignore-installed moviepy

/bin/bash: conda: command not found
You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.[0m
Collecting numpy==1.16.4
  Using cached numpy-1.16.4-cp36-cp36m-manylinux1_x86_64.whl (17.3 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.19.5
    Uninstalling numpy-1.19.5:
      Successfully uninstalled numpy-1.19.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
moviepy 1.0.3 requires numpy>=1.17.3; python_version != "2.7", but you have numpy 1.16.4 which is incompatible.[0m
Successfully installed numpy-1.16.4
You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via th

In [1]:
import os 
import numpy as np
from numpy import asarray
from numpy import save
from PIL import Image
import logging
import pypianoroll
from pypianoroll import Multitrack, Track
import scipy.stats
import pickle
import music21
from IPython import display
import matplotlib.pyplot as plt

import tensorflow as tf
print(tf.__version__)
tf.logging.set_verbosity(tf.logging.ERROR)
tf.enable_eager_execution()

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"

from utils import display_utils, metrics_utils, path_utils, inference_utils, midi_utils

LOGGER = logging.getLogger("gan.train")
%matplotlib inline

1.14.0


In [2]:
root_dir = './music'

demo_midi_location = './sample_midi/'

model_dir = os.path.join(root_dir,'style')    

train_dir = os.path.join(model_dir, 'train')

check_dir = os.path.join(model_dir, 'preload')

sample_dir = os.path.join(model_dir, 'sample')

eval_dir = os.path.join(model_dir, 'eval')

dataset_eval_dir = './dataset/'

os.makedirs(train_dir, exist_ok=True)
os.makedirs(eval_dir, exist_ok=True)
os.makedirs(sample_dir, exist_ok=True)


In [3]:
instrument_list = [] 

for filename in os.listdir(demo_midi_location): 
    if filename.endswith(".mid"):
        try:
            music_tracks = pypianoroll.Multitrack(beat_resolution=4)
            music_tracks.parse_midi(demo_midi_location + filename) 
            
            for index, track in enumerate(music_tracks.tracks): 
                if track.name not in instrument_list: 
                    print(track.name, "   ", track.program)
                    instrument_list.append(track.name)
        except Exception as e:
            print("**********ERROR**************") 
            print(e)
            
print("The amount of instruments across tracks: ", len(instrument_list))
print(instrument_list)

FRETLSSS     35
ORGAN 2     17
CLAVINET     7
MUTED GTR     28
CLEAN GTR     27
VIBRAPHONE     11
DRUMS     0
The amount of instruments across tracks:  7
['FRETLSSS', 'ORGAN 2', 'CLAVINET', 'MUTED GTR', 'CLEAN GTR', 'VIBRAPHONE', 'DRUMS']


In [4]:
def store_track(track, collection):
    
    instrument1_program_numbers = [1,2,3,4,5,6,7,8] #Piano
    instrument2_program_numbers = [17,18,19,20,21,22,23,24] #Organ
    instrument3_program_numbers = [33,34,35,36,37,38,39,40] #Bass
    instrument4_program_numbers = [25,26,27,28,29,30,31,32] #Guitar
    
    if isinstance (collection, dict): 
        if track.program in instrument1_program_numbers: 
            collection['Piano'].append(track)
        elif track.program in instrument2_program_numbers:
            collection['Organ'].append(track)
        elif track.program in instrument3_program_numbers:
            collection['Bass'].append(track)
        elif track.program in instrument4_program_numbers:
            collection['Guitar'].append(track)
        else:
            print("Skipping this instrument------------------->", track.name)
    else: #collection will hold chosen tracks
        if track.program in instrument1_program_numbers: 
            collection.append(track)
        elif track.program in instrument2_program_numbers:
            collection.append(track)
        elif track.program in instrument3_program_numbers:
            collection.append(track)
        elif track.program in instrument4_program_numbers:
            collection.append(track)
        else:
            print("Skipping this instrument------------------->", track.name)
    
    return collection

In [5]:
def get_merged(music_tracks, filename):
    
    chosen_tracks = [] 
    
    for index, track in enumerate(music_tracks.tracks): 
        chosen_tracks = store_track(track, chosen_tracks)
     
    reshaped_piano_roll_dict = {'Piano': [], 'Organ': [], 'Bass': [], 'Guitar': []}  
    
    for index, track in enumerate(chosen_tracks): 
        
        try:
             
            track.pianoroll = track.pianoroll.reshape( -1, 32, 128)
            
            
            reshaped_piano_roll_dict = store_track(track, reshaped_piano_roll_dict)     
        except Exception as e: 
            print("ERROR!!!!!----> Skipping track # ", index, " with error ", e)
        
    merge_piano_roll_list = []
    
    for instrument in reshaped_piano_roll_dict: 
        try:
            merged_pianorolls = np.empty(shape=(0,32,128))

            if len(reshaped_piano_roll_dict[instrument]) > 0:
                if reshaped_piano_roll_dict[instrument]:     
                    merged_pianorolls = np.stack([track.pianoroll for track in reshaped_piano_roll_dict[instrument]], -1)
                    
                merged_pianorolls = merged_pianorolls[:, :, :, 0] 
                merged_piano_rolls = np.any(merged_pianorolls, axis=0)
                merge_piano_roll_list.append(merged_piano_rolls)
        except Exception as e: 
            print("ERROR!!!!!----> Cannot concatenate/merge track for instrument", instrument, " with error ", e)
            continue;
        
    merge_piano_roll_list = np.stack([track for track in merge_piano_roll_list], -1)
    return merge_piano_roll_list.reshape(-1,32,128,4)

In [6]:
track_list = np.empty(shape=(0,32,128,4))

music_tracks = pypianoroll.Multitrack(beat_resolution=4) 

for filename in os.listdir(demo_midi_location):
    print("Starting to process filename---->", demo_midi_location + filename)
    
    if filename.endswith(".mid"):
        try:
            music_tracks.parse_midi(demo_midi_location + filename) 
            music_tracks.pad_to_multiple(32)
            music_tracks.pad_to_same()
            merged_tracks_to_add_to_training_file = get_merged(music_tracks, filename)
            track_list = np.concatenate((merged_tracks_to_add_to_training_file, track_list))
            
            print("Successfully processed filename---->", demo_midi_location + filename)
        except Exception as e:
            print("**********ERROR**************It's possible that not all 4 instruments exist in this track; at least one is 0")
            print("Skipping file---->", filename, e)
            print(e)
            
track_list[track_list == 0] = -1
track_list[track_list >= 0] = 1


Starting to process filename----> ./sample_midi/sample (5).mid
Skipping this instrument-------------------> VIBRAPHONE
Skipping this instrument-------------------> DRUMS
Successfully processed filename----> ./sample_midi/sample (5).mid
Starting to process filename----> ./sample_midi/sample (9).mid
Skipping this instrument-------------------> VIBRAPHONE
Skipping this instrument-------------------> DRUMS
Successfully processed filename----> ./sample_midi/sample (9).mid
Starting to process filename----> ./sample_midi/sample (1).mid
Skipping this instrument-------------------> VIBRAPHONE
Skipping this instrument-------------------> DRUMS
Successfully processed filename----> ./sample_midi/sample (1).mid
Starting to process filename----> ./sample_midi/sample (10).mid
Skipping this instrument-------------------> VIBRAPHONE
Skipping this instrument-------------------> DRUMS
Successfully processed filename----> ./sample_midi/sample (10).mid
Starting to process filename----> ./sample_midi/sample

In [7]:
training_data, eval_data = np.split(track_list, 2)
save(train_dir + '/reggae-train.npy', np.array(training_data)) 
save(dataset_eval_dir + '/eval.npy', np.array(eval_data)) 

In [8]:
BATCH_SIZE = 5

SHUFFLE_BUFFER_SIZE = 10

PREFETCH_SIZE = 2

In [9]:
def prepare_dataset(filename):
    
    data = np.load(train_dir + '/reggae-train.npy')
    data = np.asarray(data, dtype=np.float32)  # {-1, 1}
    print('data shape = {}'.format(data.shape))

    dataset = tf.data.Dataset.from_tensor_slices(data)
    dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE).repeat()
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
    dataset = dataset.prefetch(PREFETCH_SIZE)

    return dataset 

dataset = prepare_dataset(train_dir + '/reggae-train.npy')

data shape = (5, 32, 128, 4)


In [10]:
def _conv2d(layer_input, filters, f_size=4, bn=True):
    d = tf.keras.layers.Conv2D(filters, kernel_size=f_size, strides=2,
                               padding='same')(layer_input)
    d = tf.keras.layers.LeakyReLU(alpha=0.2)(d)
    if bn:
        d = tf.keras.layers.BatchNormalization(momentum=0.8)(d)
    return d


def _deconv2d(layer_input, pre_input, filters, f_size=4, dropout_rate=0):
    u = tf.keras.layers.UpSampling2D(size=2)(layer_input)
    u = tf.keras.layers.Conv2D(filters, kernel_size=f_size, strides=1,
                               padding='same')(u)
    u = tf.keras.layers.BatchNormalization(momentum=0.8)(u)
    u = tf.keras.layers.ReLU()(u)

    if dropout_rate:
        u = tf.keras.layers.Dropout(dropout_rate)(u)
        
    u = tf.keras.layers.Concatenate()([u, pre_input])
    return u

    
def build_generator(condition_input_shape=(32, 128, 1), filters=64,
                    instruments=4, latent_shape=(2, 8, 512)):
    c_input = tf.keras.layers.Input(shape=condition_input_shape)
    z_input = tf.keras.layers.Input(shape=latent_shape)

    d1 = _conv2d(c_input, filters, bn=False)
    d2 = _conv2d(d1, filters * 2)
    d3 = _conv2d(d2, filters * 4)
    d4 = _conv2d(d3, filters * 8)

    d4 = tf.keras.layers.Concatenate(axis=-1)([d4, z_input])

    u4 = _deconv2d(d4, d3, filters * 4)
    u5 = _deconv2d(u4, d2, filters * 2)
    u6 = _deconv2d(u5, d1, filters)

    u7 = tf.keras.layers.UpSampling2D(size=2)(u6)
    output = tf.keras.layers.Conv2D(instruments, kernel_size=4, strides=1,
                               padding='same', activation='tanh')(u7)  # 32, 128, 4

    generator = tf.keras.models.Model([c_input, z_input], output, name='Generator')

    return generator

In [11]:
generator = build_generator()
generator.summary()

Model: "Generator"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 32, 128, 1)] 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 16, 64, 64)   1088        input_1[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu (LeakyReLU)         (None, 16, 64, 64)   0           conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 8, 32, 128)   131200      leaky_re_lu[0][0]                
__________________________________________________________________________________________

In [12]:
def _build_critic_layer(layer_input, filters, f_size=4):

    d = tf.keras.layers.Conv2D(filters, kernel_size=f_size, strides=2,
                               padding='same')(layer_input)
    d = tf.keras.layers.LeakyReLU(alpha=0.2)(d) 
    return d


def build_critic(pianoroll_shape=(32, 128, 4), filters=64):
    
    condition_input_shape = (32,128,1)
    groundtruth_pianoroll = tf.keras.layers.Input(shape=pianoroll_shape)
    condition_input = tf.keras.layers.Input(shape=condition_input_shape)
    combined_imgs = tf.keras.layers.Concatenate(axis=-1)([groundtruth_pianoroll, condition_input])


    
    d1 = _build_critic_layer(combined_imgs, filters)
    d2 = _build_critic_layer(d1, filters * 2)
    d3 = _build_critic_layer(d2, filters * 4)
    d4 = _build_critic_layer(d3, filters * 8)

    x = tf.keras.layers.Flatten()(d4)
    logit = tf.keras.layers.Dense(1)(x)

    critic = tf.keras.models.Model([groundtruth_pianoroll,condition_input], logit,
                                          name='Critic')
    

    return critic

In [13]:
critic = build_critic()
critic.summary() 

Model: "Critic"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 32, 128, 4)] 0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 32, 128, 1)] 0                                            
__________________________________________________________________________________________________
concatenate_4 (Concatenate)     (None, 32, 128, 5)   0           input_3[0][0]                    
                                                                 input_4[0][0]                    
__________________________________________________________________________________________________
conv2d_8 (Conv2D)               (None, 16, 64, 64)   5184        concatenate_4[0][0]         

In [14]:
def generator_loss(critic_fake_output):

    return -tf.reduce_mean(critic_fake_output)


def wasserstein_loss(critic_real_output, critic_fake_output):

    return tf.reduce_mean(critic_fake_output) - tf.reduce_mean(
        critic_real_output)


def compute_gradient_penalty(critic, x, fake_x):
    
    c = tf.expand_dims(x[..., 0], -1)
    batch_size = x.get_shape().as_list()[0]
    eps_x = tf.random.uniform(
        [batch_size] + [1] * (len(x.get_shape()) - 1))  # B, 1, 1, 1, 1
    inter = eps_x * x + (1.0 - eps_x) * fake_x

    with tf.GradientTape() as g:
        g.watch(inter)
        disc_inter_output = critic((inter,c), training=True)
    grads = g.gradient(disc_inter_output, inter)
    slopes = tf.sqrt(1e-8 + tf.reduce_sum(
        tf.square(grads),
        reduction_indices=tf.range(1, grads.get_shape().ndims)))
    gradient_penalty = tf.reduce_mean(tf.square(slopes - 1.0))
    
    return gradient_penalty


In [15]:
generator_optimizer = tf.keras.optimizers.Adam(1e-3, beta_1=0.5, beta_2=0.9)
critic_optimizer = tf.keras.optimizers.Adam(1e-3, beta_1=0.5, beta_2=0.9)

ckpt = tf.train.Checkpoint(generator=generator,
                           generator_optimizer=generator_optimizer,
                           critic=critic,
                           critic_optimizer=critic_optimizer)
ckpt_manager = tf.train.CheckpointManager(ckpt, check_dir, max_to_keep=5)

In [16]:
@tf.function
def generator_train_step(x, condition_track_idx=0):

    c = tf.expand_dims(x[..., condition_track_idx], -1)

    z = tf.random.truncated_normal([BATCH_SIZE, 2, 8, 512])

    with tf.GradientTape() as tape:
        fake_x = generator((c, z), training=True)
        fake_output = critic((fake_x,c), training=False)

        gen_loss = generator_loss(fake_output)

    gradients_of_generator = tape.gradient(gen_loss,
                                           generator.trainable_variables)
    generator_optimizer.apply_gradients(
        zip(gradients_of_generator, generator.trainable_variables))

    return gen_loss


In [17]:
@tf.function
def critic_train_step(x, condition_track_idx=0):

    c = tf.expand_dims(x[..., condition_track_idx], -1)

    z = tf.random.truncated_normal([BATCH_SIZE, 2, 8, 512])

    fake_x = generator((c, z), training=False)


    with tf.GradientTape() as tape:
        real_output = critic((x,c), training=True)
        fake_output = critic((fake_x,c), training=True)
        critic_loss =  wasserstein_loss(real_output, fake_output)

    grads_of_critic = tape.gradient(critic_loss,
                                               critic.trainable_variables)

    with tf.GradientTape() as tape:
        gp_loss = compute_gradient_penalty(critic, x, fake_x)
        gp_loss *= 10.0
    
    grads_gp = tape.gradient(gp_loss, critic.trainable_variables)
    gradients_of_critic = [g + ggp for g, ggp in
                                  zip(grads_of_critic, grads_gp)
                                  if ggp is not None]

    critic_optimizer.apply_gradients(
        zip(gradients_of_critic, critic.trainable_variables))

    return critic_loss + gp_loss


In [18]:
sample_x, sample_z = inference_utils.load_melody_samples(n_sample=5)

Loaded 5 melody samples


In [19]:
iterations = 5000
n_dis_updates_per_gen_update = 5
condition_track_idx = 0 
sample_c = tf.expand_dims(sample_x[..., condition_track_idx], -1)

In [20]:
metrics_utils.metrics_manager.initialize()

c_losses = []
g_losses = []

it = iter(dataset)

for iteration in range(iterations):


    for _ in range(n_dis_updates_per_gen_update):
        c_loss = critic_train_step(next(it))


    g_loss = generator_train_step(next(it))


    c_losses.append(c_loss)
    g_losses.append(g_loss)

    display.clear_output(wait=True)
    fig = plt.figure(figsize=(15, 5))
    line1, = plt.plot(range(iteration+1), c_losses, 'r')
    line2, = plt.plot(range(iteration+1), g_losses, 'k')
    plt.xlabel('Iterations')
    plt.ylabel('Losses')
    plt.legend((line1, line2), ('C-loss', 'G-loss'))
    display.display(fig)
    plt.close(fig)
    

    print('Iteration {}, c_loss={:.2f}, g_loss={:.2f}'.format(iteration, c_loss, g_loss))
    

    if iteration < 100 or iteration % 50 == 0 :

        fake_sample_x = generator((sample_c, sample_z), training=False)
        metrics_utils.metrics_manager.append_metrics_for_iteration(fake_sample_x.numpy(), iteration)

        if iteration % 50 == 0:

            ckpt_manager.save(checkpoint_number=iteration) 
        
            fake_sample_x = fake_sample_x.numpy()
    

            display_utils.plot_pianoroll(iteration, sample_x[:4], fake_sample_x[:4], save_dir=train_dir)


            destination_path = path_utils.generated_midi_path_for_iteration(iteration, saveto_dir=sample_dir)
            

            midi_utils.save_pianoroll_as_midi(fake_sample_x[:4], programs=[17, 28, 27, 11], destination_path=destination_path)




KeyboardInterrupt: 

In [26]:
ckpt = tf.train.Checkpoint(generator=generator)
ckpt_manager = tf.train.CheckpointManager(ckpt, check_dir, max_to_keep=5)

ckpt.restore(ckpt_manager.latest_checkpoint).expect_partial()
print('Latest checkpoint {} restored.'.format(ckpt_manager.latest_checkpoint))

Latest checkpoint ./music/style/preload/ckpt-300 restored.


In [33]:
conditioned_track = midi_utils.get_conditioned_track(midi='./321.midi')
generated_pianoroll = inference_utils.generate_pianoroll(generator, conditioned_track)

destination_path = path_utils.new_temp_midi_path(saveto_dir=eval_dir)

# 17 = Drawbar Organ, 28 = Electric Guitar, 27 = Electric Guitar, 11 = Music Box
# TODO: CHANGE THIS BASED ON YOUR SPECIFIC DATASET
midi_utils.save_pianoroll_as_midi(generated_pianoroll.numpy(), destination_path=destination_path, programs=[17, 28, 27, 11],)

latest_midi = destination_path

Midi saved to  ./music/style/eval/temp-20.mid
