In [1]:
import pandas as pd
import numpy as np
from mido import MidiFile, MidiTrack, Message
from keras.layers import Input, Dense, Reshape, Flatten
from keras.layers import BatchNormalization, Activation, Conv2D, Conv2DTranspose
from keras.models import Model
from keras.optimizers import Adam

In [5]:
# Define the generator
def build_generator(input_shape):
    inputs = Input(input_shape)
    
    x = Dense(128 * 32 * 1, activation="relu")(inputs)
    x = Reshape((32, 128, 1))(x)
    
    x = Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same")(x)
    x = BatchNormalization(momentum=0.8)(x)
    x = Activation("relu")(x)
    
    x = Conv2DTranspose(64, (4, 4), strides=(2, 2), padding="same")(x)
    x = BatchNormalization(momentum=0.8)(x)
    x = Activation("relu")(x)
    
    x = Conv2DTranspose(1, (4, 4), strides=(2, 2), padding="same")(x)
    outputs = Activation("tanh")(x)
    
    generator = Model(inputs, outputs)
    return generator

# Define the discriminator
def build_discriminator(input_shape):
    inputs = Input(input_shape)
    
    x = Conv2D(32, (4, 4), strides=(2, 2), padding="same")(inputs)
    x = Activation("relu")(x)
    
    x = Conv2D(64, (4, 4), strides=(2, 2), padding="same")(x)
    x = BatchNormalization(momentum=0.8)(x)
    x = Activation("relu")(x)
    
    x = Conv2D(128, (4, 4), strides=(2, 2), padding="same")(x)
    x = BatchNormalization(momentum=0.8)(x)
    x = Activation("relu")(x)
    
    x = Flatten()(x)
    outputs = Dense(1, activation="sigmoid")(x)
    
    discriminator = Model(inputs, outputs)
    return discriminator

# Define the GAN
def build_gan(generator, discriminator):
    discriminator.trainable = False
    
    inputs = Input(generator.input_shape[1:])
    generated_data = generator(inputs)
    outputs = discriminator(generated_data)
    
    gan = Model(inputs, outputs)
    gan.compile(optimizer=Adam(0.0002, 0.5), loss="binary_crossentropy")
    return gan

# Load the MIDI data
midi_data = MidiFile('C:/Users/Ody/Desktop/Work/ComputerSciencePostgraduate/ComputerSciencePostgraduate/Deep Learning\Music_Generation_Project/maestro-v3.0.0-midi/maestro-v3.0.0/2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi')
note_array = []
for i, track in enumerate(midi_data.tracks):
    for msg in track:
        if msg.type == "note_on":
            note_array.append(msg.note)

# Normalize the note values to between -1 and 1
note_array = (np.array(note_array) / 127.5) - 1

# Reshape the data to be compatible with the GAN
note_array = note_array.reshape((-1, 1, 1, len(note_array)))

# Set the input shape
input_shape = (1, 1, len(note_array))

# Build the generator, discriminator, and GAN
generator = build_generator(input_shape)
discriminator = build_discriminator(input_shape)
gan = build_gan(generator, discriminator)

# Train the GAN
epochs = 10000
batch_size = 128

X_train = note_array.astype(np.float32)

noise = np.random.normal(0, 1, (batch_size, 1, 1, 100))

for epoch in range(epochs):
# Generate a batch of MIDI data
    idx = np.random.randint(0, X_train.shape[0], batch_size)
    real_data = X_train[idx]

    # Generate a batch of noise to use as input to the generator
    noise = np.random.normal(0, 1, (batch_size, 1, 1, 100))

    # Generate a batch of fake MIDI data using the generator
    generated_data = generator.predict(noise)

    # Train the discriminator on the real and fake MIDI data
    d_loss_real = discriminator.train_on_batch(real_data, np.ones((batch_size, 1)))
    d_loss_fake = discriminator.train_on_batch(generated_data, np.zeros((batch_size, 1)))
    d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

    # Train the generator to fool the discriminator
    g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1)))

    # Print the progress
    print(f"Epoch {epoch+1}, Discriminator Loss: {d_loss}, Generator Loss: {g_loss}")

    # Save generated MIDI data every 1000 epochs
    if epoch % 1000 == 0:
        generated_data = generator.predict(noise)
        generated_data = (generated_data.reshape((-1, len(note_array))) + 1) * 127.5
        generated_midi = MidiFile()
        track = MidiTrack()
        for note in generated_data[0]:
            track.append(Message('note_on', note=int(note), velocity=127, time=0))
        generated_midi.tracks.append(track)
        generated_midi.save(f"generated_midi_{epoch}.mid")





ValueError: Exception encountered when calling layer "model_1" (type Functional).

Input 0 of layer "dense_1" is incompatible with the layer: expected axis -1 of input shape to have value 128, but received input with shape (None, 524288)

Call arguments received by layer "model_1" (type Functional):
  • inputs=tf.Tensor(shape=(None, 256, 1024, 1), dtype=float32)
  • training=False
  • mask=None