In [None]:
!pip install mido

Collecting mido
[?25l  Downloading https://files.pythonhosted.org/packages/b5/6d/e18a5b59ff086e1cd61d7fbf943d86c5f593a4e68bfc60215ab74210b22b/mido-1.2.10-py2.py3-none-any.whl (51kB)
[K     |██████▍                         | 10kB 24.3MB/s eta 0:00:01[K     |████████████▉                   | 20kB 29.4MB/s eta 0:00:01[K     |███████████████████▎            | 30kB 19.3MB/s eta 0:00:01[K     |█████████████████████████▋      | 40kB 16.5MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 4.6MB/s 
[?25hInstalling collected packages: mido
Successfully installed mido-1.2.10


In [3]:
import os, sys, mido
import numpy as np
from random import randrange
from mido import MidiFile, MidiTrack, Message

from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Bidirectional, LSTM, Reshape, RepeatVector, TimeDistributed
from keras.layers import BatchNormalization, Activation
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model, load_model
from keras.optimizers import Adam

def note_reg(note):
    C = [0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 120]
    C_sharp = [1, 13, 25, 37, 49, 61, 73, 85, 97, 109, 121]
    D = [2, 14, 26, 38, 50, 62, 74, 86, 98, 110, 122]
    D_sharp = [3, 15, 27, 39, 51, 63, 75, 87, 99, 111, 123]
    E = [4, 16, 28, 40, 52, 64, 76, 88, 100, 112, 124]
    F = [5, 17, 29, 41, 53, 65, 77, 89, 101, 113, 125]
    F_sharp = [6, 18, 30, 42, 54, 66, 78, 90, 102, 114, 126]
    G = [7, 19, 31, 43, 55, 67, 79, 91, 103, 115, 127]
    G_sharp = [8, 20, 32, 44, 56, 68, 80, 92, 104, 116]
    A = [9, 21, 33, 45, 57, 69, 81, 93, 105, 117]
    A_sharp = [10, 22, 34, 46, 58, 70, 82, 94, 106, 118]
    B = [11, 23, 35, 47, 59, 71, 83, 95, 107, 119]
    if (note in C):
        return 0
    elif(note in C_sharp):
        return 1
    elif(note in D):
        return 2
    elif(note in D_sharp):
        return 3
    elif(note in E):
        return 4
    elif(note in F):
        return 5
    elif(note in F_sharp):
        return 6
    elif(note in G):
        return 7
    elif(note in G_sharp):
        return 8
    elif(note in A):
        return 9
    elif(note in A_sharp):
        return 10
    elif(note in B):
        return 11
    else:
        return 'fail'


def read_inputFiles(inputPath, dataPath):
  paths = []
  songs = []
  for r, d, f in os.walk(inputPath):
      for file in f:
          if '.mid' in file:
              paths.append(os.path.join(r, file))

  for path in paths:
      mid = MidiFile(path, type = 1)
      songs.append(mid)

  notes = []
  dataset = []
  x = []

  #for each in midi object in list of songs
  for i in range(len(songs)):
      #for each note in midi object
      for msg in songs[i]:
          #filtering out meta messages
          if not msg.is_meta:
              #filtering out control changes
              if (msg.type == 'note_on'):
                  #normalizing note and velocity values
                  notes.append([note_reg(msg.note)/11])
      for i in range(1, len(notes)):
          x.append(notes[i])
          if (i >15):
              dataset.append(x)
              x = x[1:]
      x = []
      notes = []

  dataset = np.array(dataset)
  np.save(dataPath, dataset)


def load_data(dataPath):
  x_train = np.load(dataPath, allow_pickle=True)
  x_train = x_train.reshape(len(x_train),4,4)
  return x_train


class GAN():
  def __init__(self):
    # Input shape
    self.img_rows = 4
    self.img_cols = 4
    self.img_shape = (self.img_rows, self.img_cols)
    self.latent_dim = 16

    optimizer = Adam(0.0001, 0.4)

    # Build and compile the discriminator
    self.discriminator = self.build_discriminator()
    self.discriminator.compile(loss='binary_crossentropy',
        optimizer=optimizer,
        metrics=['accuracy'])

    # Build the generator
    self.generator = self.build_generator()

    # The generator takes noise as input and generates imgs
    z = Input(shape=(4,4))
    img = self.generator(z)

    # For the combined model we will only train the generator
    self.discriminator.trainable = False

    # The discriminator takes generated images as input and determines validity
    valid = self.discriminator(img)

    # The combined model  (stacked generator and discriminator)
    # Trains the generator to fool the discriminator
    self.combined = Model(z, valid)
    self.combined.compile(loss='mean_squared_error', optimizer=optimizer)

  def build_generator(self):

    model = Sequential()
    #encoder
    model.add(Bidirectional(LSTM(128, return_sequences=True), input_shape=(4, 4)))
    model.add(Dropout(0.2))
    model.add(Bidirectional(LSTM(128)))
    model.add(Dropout(0.2))
    #specifying output to have 16 timesteps
    model.add(RepeatVector(16))
    #decoder
    model.add(Bidirectional(LSTM(128, return_sequences=True)))
    model.add(Dropout(0.2))
    model.add(Bidirectional(LSTM(128, return_sequences=True)))
    model.add(Dropout(0.2))
    model.add(TimeDistributed(Dense(256)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(TimeDistributed(Dense(1)))
    model.add(LeakyReLU(alpha=0.2))
    model.summary()

    noise = Input(shape=(4,4))
    img = model(noise)

    return Model(noise, img)

  def build_discriminator(self):

    model = Sequential()

    model.add(Bidirectional(LSTM(256, return_sequences=True), input_shape=(16, 1)))
    model.add(Dropout(0.2))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Bidirectional(LSTM(256)))
    model.add(Dropout(0.2))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(RepeatVector(1))
    model.add(TimeDistributed(Dense(300)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(TimeDistributed(Dense(300)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(TimeDistributed(Dense(300)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(TimeDistributed(Dense(1)))
    model.summary()

    img = Input(shape=(16,1))
    validity = model(img)

    return Model(img, validity)
  

  def train(self, dataPath, modelPath, epochs, batch_size=128, save_interval=50):

    # Load the dataset
    X_train = load_data(dataPath)

    # Rescale -1 to 1
    X_train = X_train / 127

    # Adversarial ground truths
    valid = np.ones((batch_size,1,1))
    fake = np.zeros((batch_size,1,1))
    
    g_loss_epochs = np.zeros((epochs, 1))
    d_loss_epochs = np.zeros((epochs, 1))

    for epoch in range(epochs):

      # ---------------------
      #  Train Discriminator
      # ---------------------

      # Select a random half of images
      idx = np.random.randint(0, X_train.shape[0], batch_size)
      imgs = X_train[idx]
      imgs = np.array(imgs)
      imgs = imgs.reshape(len(imgs),16,1)

      # Sample noise and generate a batch of new images
      noise = np.random.normal(0, 1, (batch_size,4,4))
      gen_imgs = self.generator.predict(noise)

      # Train the discriminator (real classified as ones and generated as zeros)
      d_loss_real = self.discriminator.train_on_batch(imgs, valid)
      d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
      d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

      # ---------------------
      #  Train Generator
      # ---------------------

      # Train the generator (wants discriminator to mistake images as real)
      g_loss = self.combined.train_on_batch(noise, valid)
      
      #save loss history
      g_loss_epochs[epoch] = g_loss
      d_loss_epochs[epoch] = d_loss[0]

      # Plot the progress
      print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))

      # If at save interval => save generated image samples
      if epoch % save_interval == 0:
          self.generator.save(modelPath)
    return g_loss_epochs, d_loss_epochs


def predict_newSong(model):
  random = np.random.normal(1, 1, (1,4,4))
  newSong = model.predict(random)
  for x in range(16):
      newSong[0][x][0] = abs(newSong[0][x][0] * 11)
      while(newSong[0][x][0] < 10):
          newSong[0][x][0] = abs(newSong[0][x][0] * 10)
      while(newSong[0][x][0] > 127):
          newSong[0][x][0] = newSong[0][x][0] / 10
  return newSong


def generate_newMidFile(timingFilePath, resultPath, newSong):
  timing = np.load(timingFilePath, allow_pickle=True)
  count = 0
  mid = MidiFile()
  track = MidiTrack()
  mid.tracks.append(track)
  track2 = MidiTrack()
  mid.tracks.append(track2)
  track3 = MidiTrack()
  mid.tracks.append(track3)
  track4 = MidiTrack()
  mid.tracks.append(track4)
  octave = 12 * 6
  t = randrange(len(timing))

  track.append(Message('program_change', program=2,time=0))
  track2.append(Message('program_change', program=2, time=0))
  track3.append(Message('program_change', program=2,time=0))
  track4.append(Message('program_change', program=2, time=0))
      
  for i in range(2):
      track2.append(Message('note_on', note= 68, velocity=randrange(70,80), time=0))
      track2.append(Message('note_off', note= 68, velocity=randrange(70,80), time=1920))
      track3.append(Message('note_on', note= 64, velocity=randrange(70,80), time=0))
      track3.append(Message('note_off', note= 64, velocity=randrange(70,80), time=1920))
      track4.append(Message('note_on', note= 59, velocity=randrange(70,80), time=0))
      track4.append(Message('note_off', note=59, velocity=randrange(70,80), time=1920))

      track2.append(Message('note_on', note= 69, velocity=randrange(70,80), time=0))
      track2.append(Message('note_off', note= 69, velocity=randrange(70,80), time=1920))
      track3.append(Message('note_on', note= 66, velocity=randrange(70,80), time=0))
      track3.append(Message('note_off', note= 66, velocity=randrange(70,80), time=1920))
      track4.append(Message('note_on', note= 62, velocity=randrange(70,80), time=0))
      track4.append(Message('note_off', note=62, velocity=randrange(70,80), time=1920))

      track2.append(Message('note_on', note= 73, velocity=randrange(70,80), time=0))
      track2.append(Message('note_off', note= 73, velocity=randrange(70,80), time=1920))
      track3.append(Message('note_on', note= 68, velocity=randrange(70,80), time=0))
      track3.append(Message('note_off', note= 68, velocity=randrange(70,80), time=1920))
      track4.append(Message('note_on', note= 64, velocity=randrange(70,80), time=0))
      track4.append(Message('note_off', note=64, velocity=randrange(70,80), time=1920))

      track2.append(Message('note_on', note= 69, velocity=randrange(70,80), time=0))
      track2.append(Message('note_off', note= 69, velocity=randrange(70,80), time=1920))
      track3.append(Message('note_on', note= 66, velocity=randrange(70,80), time=0))
      track3.append(Message('note_off', note= 66, velocity=randrange(70,80), time=1920))
      track4.append(Message('note_on', note= 62, velocity=randrange(70,80), time=0))
      track4.append(Message('note_off', note=62, velocity=randrange(70,80), time=1920))
  for x in range(2):
      count = 0
      for i in range(16):
          if(i == 0):
              track.append(Message('note_on', note= octave + note_reg(int(newSong[0][i][0])), velocity=randrange(90,110), time=0))
          else:
              track.append(Message('note_on', note= octave + note_reg(int(newSong[0][i][0])), velocity=randrange(90,110), time=20))
          track.append(Message('note_off', note= octave + note_reg(int(newSong[0][i][0])), velocity=randrange(90,110), time=timing[t].get(str(i))))
          count = count + 1
  mid.save(resultPath)


def main():
  inputPath = "input/"
  dataPath = "data/data.npy"
  read_inputFiles(inputPath, dataPath)

  myGAN = GAN()
  modelPath = "data/GAN_generator.h5"
  g_loss, d_loss = myGAN.train(dataPath, modelPath, epochs=301, batch_size=128, save_interval=100)
  model = load_model(modelPath)

  newSong = predict_newSong(model)
  timingFilePath = "data/csp.npy"
  resultPath = "data/newSong.mid"
  generate_newMidFile(timingFilePath, resultPath, newSong)
  
if __name__ == "__main__":
  main()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_6 (Bidirection (None, 16, 512)           528384    
_________________________________________________________________
dropout_11 (Dropout)         (None, 16, 512)           0         
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 16, 512)           0         
_________________________________________________________________
bidirectional_7 (Bidirection (None, 512)               1574912   
_________________________________________________________________
dropout_12 (Dropout)         (None, 512)               0         
_________________________________________________________________
leaky_re_lu_8 (LeakyReLU)    (None, 512)               0         
_________________________________________________________________
dropout_13 (Dropout)         (None, 512)              