In [13]:
import collections
import datetime
import fluidsynth
import glob
import numpy as np
import pathlib
import pandas as pd
import pretty_midi
import seaborn as sns
import tensorflow as tf

import keras_tuner as kt
from tensorflow.keras import layers
from tensorflow.keras.losses import SparseCategoricalCrossentropy, MeanSquaredError
from tensorflow.keras.optimizers import Adam

from IPython import display
from matplotlib import pyplot as plt
from typing import Optional

In [14]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

# Sampling rate for audio playback
_SAMPLING_RATE = 16000

In [15]:
# Download Maestro = Midi Files 
data_dir = pathlib.Path('data/maestro-v3.0.0')
if not data_dir.exists():
  tf.keras.utils.get_file(
     'maestro-v3.0.0-midi.zip',
      origin='https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip',
      extract=True,
      cache_dir='.', cache_subdir='data',
  )

In [16]:
filenames = glob.glob(str(data_dir/'**/*.mid*'))
print('Number of files:', len(filenames))

Number of files: 1276


In [17]:
# Process a Midi file 
sample_file = filenames[1]
print(sample_file)

data\maestro-v3.0.0\2004\MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_06_Track06_wav.midi


In [18]:
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=30):
  waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
  # Take a sample of the generated waveform to mitigate kernel resets
  waveform_short = waveform[:seconds*_SAMPLING_RATE]
  return display.Audio(waveform_short, rate=_SAMPLING_RATE)

In [19]:
def midi_to_notes(midi_file: str) -> pd.DataFrame:
  pm = pretty_midi.PrettyMIDI(midi_file)
  instrument = pm.instruments[0]
  notes = collections.defaultdict(list)

  # Sort the notes by start time
  sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
  prev_start = sorted_notes[0].start

  for note in sorted_notes:
    start = note.start
    end = note.end
    notes['pitch'].append(note.pitch)
    notes['start'].append(start)
    notes['end'].append(end)
    notes['step'].append(start - prev_start)
    notes['duration'].append(end - start)
    prev_start = start

  return pd.DataFrame({name: np.array(value) for name, value in notes.items()})

In [20]:
def notes_to_midi(
  notes: pd.DataFrame,
  out_file: str, 
  instrument_name: str,
  velocity: int = 100,  # note loudness
) -> pretty_midi.PrettyMIDI:

  pm = pretty_midi.PrettyMIDI()
  instrument = pretty_midi.Instrument(
      program=pretty_midi.instrument_name_to_program(
          instrument_name))

  prev_start = 0
  for i, note in notes.iterrows():
    start = float(prev_start + note['step'])
    end = float(start + note['duration'])
    note = pretty_midi.Note(
        velocity=velocity,
        pitch=int(note['pitch']),
        start=start,
        end=end,
    )
    instrument.notes.append(note)
    prev_start = start

  pm.instruments.append(instrument)
  pm.write(out_file)
  return pm

In [21]:
num_files = 5
all_notes = []
for f in filenames[:num_files]:
  notes = midi_to_notes(f)
  all_notes.append(notes)

all_notes = pd.concat(all_notes)

In [22]:
n_notes = len(all_notes)
print('Number of notes parsed:', n_notes)

Number of notes parsed: 37241


In [23]:
key_order = ['pitch', 'step', 'duration']
train_notes = np.stack([all_notes[key] for key in key_order], axis=1)

In [27]:
notes_ds = tf.data.Dataset.from_tensor_slices(train_notes)
notes_ds.element_spec

TensorSpec(shape=(3,), dtype=tf.float64, name=None)

In [28]:
def create_sequences(
    dataset: tf.data.Dataset, 
    seq_length: int,
    vocab_size = 128,
) -> tf.data.Dataset:
  """Returns TF Dataset of sequence and label examples."""
  seq_length = seq_length+1

  # Take 1 extra for the labels
  windows = dataset.window(seq_length, shift=1, stride=1,
                              drop_remainder=True)

  # `flat_map` flattens the" dataset of datasets" into a dataset of tensors
  flatten = lambda x: x.batch(seq_length, drop_remainder=True)
  sequences = windows.flat_map(flatten)

  # Normalize note pitch
  def scale_pitch(x):
    x = x/[vocab_size,1.0,1.0]
    return x

  # Split the labels
  def split_labels(sequences):
    inputs = sequences[:-1]
    labels_dense = sequences[-1]
    labels = {key:labels_dense[i] for i,key in enumerate(key_order)}

    return scale_pitch(inputs), labels

  return sequences.map(split_labels, num_parallel_calls=tf.data.AUTOTUNE)

In [29]:
seq_length = 25
vocab_size = 128
seq_ds = create_sequences(notes_ds, seq_length, vocab_size)
seq_ds.element_spec

(TensorSpec(shape=(25, 3), dtype=tf.float64, name=None),
 {'pitch': TensorSpec(shape=(), dtype=tf.float64, name=None),
  'step': TensorSpec(shape=(), dtype=tf.float64, name=None),
  'duration': TensorSpec(shape=(), dtype=tf.float64, name=None)})

In [15]:
batch_size = 64
buffer_size = n_notes - seq_length  # the number of items in the dataset
train_ds = (seq_ds
            .shuffle(buffer_size)
            .batch(batch_size, drop_remainder=True)
            .cache()
            .prefetch(tf.data.experimental.AUTOTUNE))

In [16]:
train_ds.element_spec

(TensorSpec(shape=(64, 25, 3), dtype=tf.float64, name=None),
 {'pitch': TensorSpec(shape=(64,), dtype=tf.float64, name=None),
  'step': TensorSpec(shape=(64,), dtype=tf.float64, name=None),
  'duration': TensorSpec(shape=(64,), dtype=tf.float64, name=None)})

In [17]:
def mse_with_positive_pressure(y_true: tf.Tensor, y_pred: tf.Tensor):
  mse = (y_true - y_pred) ** 2
  positive_pressure = 10 * tf.maximum(-y_pred, 0.0)
  return tf.reduce_mean(mse + positive_pressure)

In [18]:
def build_model(hp):
    inputs = tf.keras.Input(input_shape)
    x = tf.keras.layers.LSTM(hp.Int('units', min_value=64, max_value=256, step=32))(inputs)  # Hier variieren wir die Anzahl der LSTM-Einheiten

    pitch_output = tf.keras.layers.Dense(128, name='pitch')(x)
    step_output = tf.keras.layers.Dense(1, name='step')(x)
    duration_output = tf.keras.layers.Dense(1, name='duration')(x)

    model = tf.keras.Model(inputs, [pitch_output, step_output, duration_output])

    # Kompilieren des Modells mit der optimierten Lernrate
    model.compile(
        loss=loss,
        optimizer=tf.keras.optimizers.Adam(
            hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])  # Hier variieren wir die Lernrate
        )
    )
    return model

In [20]:
# new version only
#loss = {
#    'pitch': SparseCategoricalCrossentropy(from_logits=True),
#    'step': MeanSquaredError(),    
#    'duration': MeanSquaredError()
#}

#input_shape = (seq_length, 3)
#learning_rate = 0.005
#optimizer = Adam(learning_rate=learning_rate)

#model = build_model(input_shape, learning_rate)
#model.summary()

In [21]:
# og version
"""
input_shape = (seq_length, 3)
learning_rate = 0.005

inputs = tf.keras.Input(input_shape)
x = tf.keras.layers.LSTM(128)(inputs)

outputs = {
  'pitch': tf.keras.layers.Dense(128, name='pitch')(x),
  'step': tf.keras.layers.Dense(1, name='step')(x),
  'duration': tf.keras.layers.Dense(1, name='duration')(x),
}

model = tf.keras.Model(inputs, outputs)

loss = {
      'pitch': tf.keras.losses.SparseCategoricalCrossentropy(
          from_logits=True),
      'step': mse_with_positive_pressure,
      'duration': mse_with_positive_pressure,
}

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

model.compile(loss=loss, optimizer=optimizer)

model.summary()
"""

"\ninput_shape = (seq_length, 3)\nlearning_rate = 0.005\n\ninputs = tf.keras.Input(input_shape)\nx = tf.keras.layers.LSTM(128)(inputs)\n\noutputs = {\n  'pitch': tf.keras.layers.Dense(128, name='pitch')(x),\n  'step': tf.keras.layers.Dense(1, name='step')(x),\n  'duration': tf.keras.layers.Dense(1, name='duration')(x),\n}\n\nmodel = tf.keras.Model(inputs, outputs)\n\nloss = {\n      'pitch': tf.keras.losses.SparseCategoricalCrossentropy(\n          from_logits=True),\n      'step': mse_with_positive_pressure,\n      'duration': mse_with_positive_pressure,\n}\n\noptimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n\nmodel.compile(loss=loss, optimizer=optimizer)\n\nmodel.summary()\n"

In [22]:
losses = model.evaluate(train_ds, return_dict=True)
losses

NameError: name 'model' is not defined

In [27]:
#model.compile(
#    loss=loss,
#    loss_weights={
#        'pitch': 0.05,
#        'step': 1.0,
#        'duration':1.0,
#    },
#    optimizer=optimizer,
#)
#old version compiled 

tuner = kt.Hyperband(
    build_model,
    objective='loss',
    max_epochs=10,
    hyperband_iterations=2
)

# Definition des Hyperparameter-Raums und Suche nach den besten Hyperparametern
tuner.search(train_ds)

# Erhalten der besten Hyperparametern
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Erstellen des finalen Modells mit den besten Hyperparametern
best_model = tuner.hypermodel.build(best_hps)

Trial 24 Complete [00h 02m 12s]
loss: 3.3743269443511963

Best loss So Far: 3.3743269443511963
Total elapsed time: 00h 21m 43s


NameError: name 'x_train' is not defined

In [33]:
best_model.save('models/tuner_best_mode.keras')

In [None]:
"""callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath='./training_checkpoints/ckpt_{epoch}',
        save_weights_only=True),
    tf.keras.callbacks.EarlyStopping(
        monitor='loss',
        patience=5,
        verbose=1,
        restore_best_weights=True),
]"""

In [None]:
"""%%time
epochs = 10 # change back to 50

history = model.fit(
    train_ds,
    epochs=epochs,
    callbacks=callbacks,
)"""

In [34]:
plt.plot(history.epoch, history.history['loss'], label='total loss')
plt.show()

NameError: name 'history' is not defined

In [None]:
# og Version 
""" def predict_next_note(
    notes: np.ndarray, 
    model: tf.keras.Model, 
    temperature: float = 1.0) -> tuple[int, float, float]:
  """Generates a note as a tuple of (pitch, step, duration), using a trained sequence model."""

  assert temperature > 0

  # Add batch dimension
  inputs = tf.expand_dims(notes, 0)

  predictions = model.predict(inputs)
  pitch_logits = predictions['pitch']
  step = predictions['step']
  duration = predictions['duration']

  pitch_logits /= temperature
  pitch = tf.random.categorical(pitch_logits, num_samples=1)
  pitch = tf.squeeze(pitch, axis=-1)
  duration = tf.squeeze(duration, axis=-1)
  step = tf.squeeze(step, axis=-1)

  # `step` and `duration` values should be non-negative
  step = tf.maximum(0, step)
  duration = tf.maximum(0, duration)

  return int(pitch), float(step), float(duration)
  """

In [31]:
#new version
def predict_next_note(
    notes: np.ndarray, 
    model: tf.keras.Model, 
    temperature: float = 1.0) -> tuple[int, float, float]:
  """Generates a note as a tuple of (pitch, step, duration), using a trained sequence model."""

  assert temperature > 0

  # Add batch dimension
  inputs = tf.expand_dims(notes, 0)

  predictions = model.predict(inputs)
  pitch_logits = predictions[0]
  step = predictions[1]
  duration = predictions[2]

  pitch_logits /= temperature
  pitch = tf.random.categorical(pitch_logits, num_samples=1)
  pitch = tf.squeeze(pitch, axis=-1)
  duration = tf.squeeze(duration, axis=-1)
  step = tf.squeeze(step, axis=-1)

  # `step` and `duration` values should be non-negative
  step = tf.maximum(0, step)
  duration = tf.maximum(0, duration)

  return int(pitch), float(step), float(duration)

In [1]:
sample_file = filenames[1]
raw_notes = midi_to_notes(sample_file)

pm = pretty_midi.PrettyMIDI(sample_file)

instrument = pm.instruments[0]
instrument_name = pretty_midi.program_to_instrument_name(instrument.program)

NameError: name 'filenames' is not defined

In [2]:
temperature = 2.0
num_predictions = 120

sample_notes = np.stack([raw_notes[key] for key in key_order], axis=1)

# The initial sequence of notes; pitch is normalized similar to training
# sequences
input_notes = (
    sample_notes[:seq_length] / np.array([vocab_size, 1, 1]))

generated_notes = []
prev_start = 0
for _ in range(num_predictions):
  pitch, step, duration = predict_next_note(input_notes, model, temperature)
  start = prev_start + step
  end = start + duration
  input_note = (pitch, step, duration)
  generated_notes.append((*input_note, start, end))
  input_notes = np.delete(input_notes, 0, axis=0)
  input_notes = np.append(input_notes, np.expand_dims(input_note, 0), axis=0)
  prev_start = start

generated_notes = pd.DataFrame(
    generated_notes, columns=(*key_order, 'start', 'end'))

NameError: name 'np' is not defined

In [39]:
out_file = 'output.mid'
out_pm = notes_to_midi(
    generated_notes, out_file=out_file, instrument_name=instrument_name)
display_audio(out_pm)

In [None]:
# todo: notwenig? 
#from google.colab import files
#files.download(out_file)

In [38]:
# load with best model
sample_file = filenames[40]
raw_notes = midi_to_notes(sample_file)

pm = pretty_midi.PrettyMIDI(sample_file)

model = tf.keras.saving.load_model('models/tuner_best_mode.keras')


instrument = pm.instruments[0]
instrument_name = pretty_midi.program_to_instrument_name(instrument.program)
# model nutzen was gespeichert wurde (hier muss nicht vorher model gesucht werden)
temperature = 2.0
num_predictions = 120

sample_notes = np.stack([raw_notes[key] for key in key_order], axis=1)

# The initial sequence of notes; pitch is normalized similar to training
# sequences
input_notes = (
    sample_notes[:seq_length] / np.array([vocab_size, 1, 1]))

generated_notes = []
prev_start = 0
for _ in range(num_predictions):
  pitch, step, duration = predict_next_note(input_notes, model, temperature)
  start = prev_start + step
  end = start + duration
  input_note = (pitch, step, duration)
  generated_notes.append((*input_note, start, end))
  input_notes = np.delete(input_notes, 0, axis=0)
  input_notes = np.append(input_notes, np.expand_dims(input_note, 0), axis=0)
  prev_start = start

generated_notes = pd.DataFrame(
    generated_notes, columns=(*key_order, 'start', 'end'))





In [None]:
#plot_piano_roll(generated_notes)

In [None]:
#plot_distributions(generated_notes)

In [None]:
# One of the alternatives to using RNNs for music generation is using GANs. 
#Rather than generating audio, a GAN-based approach can generate an entire sequence in parallel. 
# The Magenta team has done impressive work on this approach with GANSynth. 
#You can also find many wonderful music and art projects and open-source code on Magenta project website.