In [1]:
import numpy as np
import glob
import mido as md
import tensorflow as tf
from tensorflow.keras import layers
from music21 import converter, instrument, note, chord, stream
from tensorflow.keras.layers import Input, Dense, Reshape, Dropout, LSTM, Bidirectional
from tensorflow.keras.layers import BatchNormalization, Activation, ZeroPadding2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
#from tensorflow.keras.utils import np_utils

#how many notes to generate
output_filepath = "composedSongs"

latent_dim = 1000
disc_loss = []
gen_loss = []

network_input = np.load("inputs_150.npy")
#network_input = np.load("inputs_mogus.npy")


#labels will not be needed for this network
#network_output = np.load("labels_50.npy") 

normalization_values = np.load("normalization_values.npy")

seq_shape = (network_input.shape[1], network_input.shape[2])

biggest_note = normalization_values[0]
smallest_note = normalization_values[1]
std_note = normalization_values[2]

biggest_time = normalization_values[3]
smallest_time = normalization_values[4]
std_time = normalization_values[5]

In [2]:
#old generator
###def get_generator():
###        model = Sequential()
###        model.add(Dense(256, input_dim=latent_dim))
###        model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
###        model.add(BatchNormalization(momentum=0.8))
###        model.add(Dense(512))
###        model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
###        model.add(BatchNormalization(momentum=0.8))
###        model.add(Dense(1024))
###        model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
###        model.add(BatchNormalization(momentum=0.8))
###        model.add(Dense(np.prod(seq_shape), activation='sigmoid'))
###        model.add(Reshape(seq_shape))
###       
###        noise = Input(shape=(latent_dim,))
###        seq = model(noise)
###   
###        return Model(noise, seq)

def get_generator():
        model = Sequential()
        
        model.add(Dense(256, input_dim=latent_dim))
        model.add(BatchNormalization(momentum=0.8))
        model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
        model.add(Dropout(0.2))
        
        model.add(Dense(512))
        model.add(BatchNormalization(momentum=0.8))
        model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
        model.add(Dropout(0.3))
        
        model.add(Dense(1024))
        model.add(BatchNormalization(momentum=0.8))
        model.add(tf.keras.layers.LeakyReLU(alpha=0.2))        
        model.add(Dropout(0.3))     
        
        model.add(Dense(1024))
        model.add(BatchNormalization(momentum=0.8))
        model.add(tf.keras.layers.LeakyReLU(alpha=0.2))        
        model.add(Dropout(0.3))   
        
        model.add(Dense(np.prod(seq_shape), activation='sigmoid'))
        model.add(Reshape(seq_shape))
       
        noise = Input(shape=(latent_dim,))
        seq = model(noise)
   
        return Model(noise, seq)

In [3]:
def get_discriminator():
    model = Sequential()
    model.add(LSTM(512, input_shape=seq_shape, return_sequences=True))
    model.add(Bidirectional(LSTM(512)))
    
    model.add(Dense(512))
    model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    model.add(Dropout(0.2))   

    model.add(Dense(256))
    model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    model.add(Dropout(0.2))   
    
    model.add(Dense(512))
    model.add(tf.keras.layers.LeakyReLU(alpha=0.2))
    model.add(Dropout(0.3))
    
    model.add(Dense(1, activation='sigmoid'))
    
    seq = Input(shape = seq_shape)
    validity = model(seq)
    
    return Model(seq, validity)

In [4]:
def train_generator(model, num_epochs=50, batch_size=32):
    filepath = "generator/weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
                      filepath, monitor='loss' 
                    , verbose=0        
                    , save_best_only=True        
                    , mode='min')    
    callbacks_list = [checkpoint]     
    model.fit(network_input, network_output, epochs=num_epochs, batch_size=64, callbacks=callbacks_list)

In [5]:
def generate_notes(model, generation_length=100):
    start = np.random.randint(0, len(network_input) - 1)   
    pattern = network_input[start]
    
    prediction_output = []
    
    for note_index in range(generation_length):
        prediction_input = np.reshape(pattern, (1, len(pattern), 2))
        prediction = model.predict(prediction_input, verbose=0)
        
        pattern = pattern[1:len(pattern)]
        pattern = np.append(pattern, prediction, axis=0)
        prediction_output.append(prediction[0])
        
    prediction_output = np.reshape(prediction_output, (generation_length, 2))
    
    return prediction_output 

In [6]:
from matplotlib.pyplot import figure
import matplotlib.pyplot as plt

def plot_loss():
       # figure(figsize=(8, 6), dpi=80)
        plt.plot(disc_loss, c='red')
        plt.plot(gen_loss, c='blue')
        plt.title("GAN Loss per Epoch")  
        plt.legend(['Discriminator', 'Generator'])
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.savefig('GAN_Loss_per_Epoch_final3.png', transparent=True)
        plt.close()

In [7]:
plot_loss()

In [8]:
def train_gan(generator, discriminator, combined, epochs, batch_size=128, sample_interval=50):

        # Adversarial ground truths
        real = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))
        
        # Training the model
        for epoch in range(epochs):

            # Training the discriminator
            # Select a random batch of note sequences
            idx = np.random.randint(0, network_input.shape[0], batch_size)
            real_seqs = network_input[idx]

            #noise = np.random.choice(range(484), (batch_size, self.latent_dim))
            #noise = (noise-242)/242
            noise = np.random.normal(0, 1, (batch_size, latent_dim))

            # Generate a batch of new note sequences
            gen_seqs = generator.predict(noise)

            # Train the discriminator
            d_loss_real = discriminator.train_on_batch(real_seqs, real)
            d_loss_fake = discriminator.train_on_batch(gen_seqs, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)


            #  Training the Generator
            noise = np.random.normal(0, 1, (batch_size, latent_dim))

            # Train the generator (to have the discriminator label samples as real)
            g_loss = combined.train_on_batch(noise, real)

            # Print the progress and save into loss lists
            if epoch % sample_interval == 0:
                print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
                
                generator.save_weights('checkpoints/generator', overwrite = True)
                discriminator.save_weights('checkpoints/discriminator', overwrite = True)
                combined.save_weights('checkpoints/combined', overwrite = True)
                disc_loss.append(d_loss[0])
                gen_loss.append(g_loss)
                
                np.save("disc_loss.npy", disc_loss)
                np.save("gen_loss.npy", gen_loss)

In [9]:
def get_gan():
    optimizer = Adam(0.0002, 0.5)
    
    # Build and compile the discriminator
    discriminator = get_discriminator()
    discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    # Build the generator
    generator = get_generator()
    
    # The generator takes noise as input and generates note sequences
    z = Input(shape=(latent_dim,))
    generated_seq = generator(z)
    
    # For the combined model we will only train the generator
    discriminator.trainable = False
    
    # The discriminator takes generated images as input and determines validity
    validity = discriminator(generated_seq)
    
    # The combined model  (stacked generator and discriminator)
    # Trains the generator to fool the discriminator
    combined = Model(z, validity)
    combined.compile(loss='binary_crossentropy', optimizer=optimizer)
    
    return generator, discriminator, combined


In [10]:
def convert_to_midi(prediction_output, vel=64, time_scale=1):
    mid = md.MidiFile()
    track = md.MidiTrack()
    mid.tracks.append(track)
    prediction_output_aux = np.empty_like(prediction_output)
    
    #copy the array
    prediction_output_aux[:] = prediction_output
    
    
    #queue we will use to end each note
    notes_queue = []
    
    #after how many notes the current note in the queue will end
    note_end_interval = 8
    
    #convert the data into valid values 
    #for generating a midi
    for pair in prediction_output_aux:
        
        #Denormalise data
        pair[0] = round(pair[0] * std_note + smallest_note)
        pair[1] = round((pair[1] * std_time + smallest_time) * time_scale) 
        
        pitch = pair.astype(int)[0]
        ticks = int(pair[1])

        print(pair)
        msg = md.Message( 'note_on'
                         , note = pitch
                         , time = ticks
                         , velocity=vel )
        
        notes_queue.append(pitch)
        
        if len(notes_queue) == note_end_interval:
            end_msg = md.Message( 'note_off'
                                 , note = notes_queue[0]
                                 , time = 0
                                 , velocity=0 )
            track.append(end_msg)
            del notes_queue[0]
                     
        track.append(msg)
    
    #mid.save((output_filepath + '/generatedSong.mid'))
    
    for note_pitch in notes_queue:
        end_msg = md.Message( 'note_off'
                             , note = note_pitch
                             , time = 100
                             , velocity=0 )
        track.append(end_msg)
       
   # for msg in mid:
      #  print(msg)
        
    return mid

In [11]:
def load_checkpoint():
    generator.load_weights('checkpoints/generator')
    discriminator.load_weights('checkpoints/discriminator')
    combined.load_weights('checkpoints/combined')
    gen_loss = np.load('gen_loss.npy').tolist()
    disc_loss = np.load('disc_loss.npy').tolist()

generator, discriminator, combined = get_gan()

load_checkpoint()
    
#resume training
 ##  generator.load_weights('checkpoints/generator')
 ##  discriminator.load_weights('checkpoints/discriminator')
 ##  combined.load_weights('checkpoints/combined')

 ##  gen_loss = np.load('gen_loss.npy').tolist()
 ##  disc_loss = np.load('disc_loss.npy').tolist()

In [12]:
#change learning rate of the models
from tensorflow.keras import backend as K
learning_rate = 0.0002
K.set_value(combined.optimizer.learning_rate, learning_rate)
K.set_value(discriminator.optimizer.learning_rate, learning_rate)

In [13]:
plot_loss()

In [24]:
noise = np.random.normal(0, 1, (1, latent_dim))

# Generate a batch of new note sequences
gen_seqs = generator.predict(noise)
#midi = convert_to_midi(gen_seqs[0])


In [26]:
midi = convert_to_midi(gen_seqs[0], time_scale=2)
midi.save("ohboi1.mid")

[ 59. 317.]
[  70. 1391.]
[ 42. 955.]
[ 35. 577.]
[ 49. 861.]
[ 50. 257.]
[ 40. 458.]
[ 67. 285.]
[ 49. 298.]
[ 46. 321.]
[ 88. 442.]
[ 96. 433.]
[100. 391.]
[ 90. 153.]
[ 82. 457.]
[ 40. 341.]
[ 30. 358.]
[ 44. 460.]
[ 59. 592.]
[ 85. 764.]
[  61. 1252.]
[ 64. 705.]
[ 48. 797.]
[ 40. 932.]
[ 74. 769.]
[  77. 1385.]
[ 39. 260.]
[ 50. 603.]
[ 52. 563.]
[ 67. 681.]
[ 91. 653.]
[ 88. 655.]
[ 62. 529.]
[ 39. 710.]
[ 41. 526.]
[ 55. 374.]
[ 39. 256.]
[ 36. 197.]
[ 32. 383.]
[ 39. 770.]
[ 36. 263.]
[ 30. 582.]
[ 31. 381.]
[ 45. 775.]
[ 35. 296.]
[ 31. 285.]
[ 42. 359.]
[ 45. 224.]
[ 44. 251.]
[ 49. 352.]
[  44. 1097.]
[  87. 5127.]
[ 100. 2958.]
[  98. 2432.]
[  98. 1361.]
[  67. 2880.]
[ 65. 812.]
[  65. 2508.]
[  67. 1463.]
[ 92. 606.]
[ 62. 922.]
[  43. 1285.]
[ 32. 597.]
[ 64. 525.]
[ 77. 727.]
[ 92. 983.]
[ 86. 759.]
[ 72. 593.]
[ 45. 603.]
[ 85. 512.]
[ 82. 398.]
[ 36. 490.]
[ 32. 427.]
[ 35. 736.]
[ 37. 417.]
[101. 746.]
[ 98. 515.]
[ 31. 891.]
[ 40. 679.]
[  57. 1309.]
[ 59. 212.]
[ 