In [None]:
%pip install pretty_midi
%pip install tensorflow
%pip install music21
%pip uninstall fluidsynth -y
%pip install --upgrade fluidsynth 

In [None]:
import pandas as pd
import numpy as np
import pretty_midi
import pathlib
import collections
import datetime
import glob
import music21
import tensorflow as tf
import fluidsynth
import my_functions
import importlib


In [None]:
# Global things used throughout the notebook
importlib.reload(my_functions)


seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

# Sampling rate for audio playback
_SAMPLING_RATE = 16000

# length of trainig sequences
seq_length = 20

# Size of pitch vocab
vocab_size = 128

# Keys that will get extracted into the training set. This are the inputs to the model!
key_order = ['pitch', 'contour', 'step', 'duration', 'tempo']

# Normalizer for keys   This is used to normalize the inputs
#  MUST HAVE SAME DIMENSION AS KEY_ORDER!!
key_normalizer = [vocab_size, 1, 1, 1, 1]

In [None]:
# Load the dataset and massage it for training

# This loads up all the notes in the dataset
pitchInst = my_functions.extract_notes()

# Calc the gap between start of consecutive notes
pitchInst['step'] = pitchInst['start'] - pitchInst['start'].shift(1)

# Fix up the first one cause of the wacky shift problem
pitchInst.loc[0,"step"] = 0

# Calculate the inverval between successive notes
pitchInst['interval'] = (pitchInst['pitch'] - pitchInst['pitch'].shift(1))

# Fix up the first one cause of the wacky shift problem
pitchInst.loc[0, 'interval'] = 0

# apply a contour function
pitchInst['contour'] = pitchInst['interval'].apply(my_functions.contour)

n_notes = len(pitchInst)
pitchInst.head()


In [None]:
# get rid of partial training sequences (no end of one song right into the next)

# Create an empty data set with same shape
train_sub = pd.DataFrame(None, columns=pitchInst.columns)

# Loop through all melid values to trim partial seqeunces off
# to do subset just change the start range of the loop to 200 as you had before
# for i in range(pitchInst['melid'].min(), pitchInst['melid'].max(), 1):
for i in range(50, pitchInst['melid'].max(), 1):
    song = pitchInst[pitchInst['melid'] == i]
    # This is not super efficient, but it works.  
    train_sub = pd.concat([train_sub, song.head(len(song) - len(song) % seq_length)], ignore_index=True)  # append only sets of seq_length

train_notes = np.stack([train_sub[key] for key in key_order], axis=1)
notes_ds = tf.data.Dataset.from_tensor_slices(train_notes)
notes_ds.element_spec

In [None]:
len(train_sub)

In [None]:
# from tensorFlow MusGen tutorial 
def create_sequences(
    dataset: tf.data.Dataset,
    seq_length: int,
    vocab_size: int,
) -> tf.data.Dataset:
  """Returns TF Dataset of sequence and label examples."""
  seq_length = seq_length+1

  # Take 1 extra for the labels
  windows = dataset.window(seq_length, shift=1, stride=1,
                              drop_remainder=True)

  # `flat_map` flattens the" dataset of datasets" into a dataset of tensors
  flatten = lambda x: x.batch(seq_length, drop_remainder=True)
  sequences = windows.flat_map(flatten)

  # Normalize note pitch (is this a good idea?)
  def scale_pitch(x):
    x = x/key_normalizer
    return x

  # Split the labels
  def split_labels(sequences):
    inputs = sequences[:-1]
    labels_dense = sequences[-1]
    labels = {key:labels_dense[i] for i,key in enumerate(key_order)}

    return scale_pitch(inputs), labels
    #return inputs, labels

  return sequences.map(split_labels, num_parallel_calls=tf.data.AUTOTUNE)

In [None]:
seq_ds = create_sequences(notes_ds, seq_length, vocab_size)
seq_ds.__len__

In [None]:
batch_size = 64
buffer_size = n_notes - seq_length  # the number of items in the dataset
train_ds = (seq_ds
            .shuffle(buffer_size)
            .batch(batch_size, drop_remainder=True)
            .cache()
            .prefetch(tf.data.experimental.AUTOTUNE))

In [None]:
train_ds.element_spec

In [None]:
def mse_with_positive_pressure(y_true: tf.Tensor, y_pred: tf.Tensor):
  mse = (y_true - y_pred) ** 2
  positive_pressure = 10 * tf.maximum(-y_pred, 0.0)
  return tf.reduce_mean(mse + positive_pressure)

In [None]:
# definition for the inpus.  Note how num of inputs is realted to size of key_order global
input_shape = (seq_length, len(key_order))
learning_rate = 0.010

#change input layers here? to suit contour
inputs = tf.keras.Input(input_shape)
#hidden layers
x = tf.keras.layers.LSTM(128, return_sequences=True, activation='tanh')(inputs)
x = tf.keras.layers.LSTM(16, return_sequences=True)(x)
x = tf.keras.layers.LSTM(16, return_sequences=True)(x)
x = tf.keras.layers.LSTM(16, return_sequences=False)(x)



# 128 outputs for pitch for OHE
outputs = {
  'pitch': tf.keras.layers.Dense(128, name='pitch')(x),
  'step': tf.keras.layers.Dense(1, name='step')(x),
  'duration': tf.keras.layers.Dense(1, name='duration')(x),
}

model = tf.keras.Model(inputs, outputs)

loss = {
      'pitch':tf.keras.losses.SparseCategoricalCrossentropy(
          from_logits=True),
      'step': mse_with_positive_pressure,
      'duration': mse_with_positive_pressure,
}

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

model.compile(loss=loss, optimizer=optimizer)

model.summary()

In [None]:
# these are the three outputs of the model

model.compile(
    loss=loss,
    loss_weights={
        'pitch': 0.80,
        'step': 1.0,
        'duration':1.0,
    },
    optimizer=optimizer,
)

In [None]:
model.evaluate(train_ds, return_dict=True)

In [None]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath='./training_checkpoints/ckpt_{epoch}.weights.h5',
        save_weights_only=True),
    tf.keras.callbacks.EarlyStopping(
        monitor='loss',
        patience=5,
        verbose=1,
        restore_best_weights=True),
]

In [None]:
%%time
epochs = 50

history = model.fit(
    train_ds,
    epochs=epochs,
    callbacks=callbacks,
)

In [None]:
def predict_next_note(
    notes: np.ndarray,
    model: tf.keras.Model,
    temperature: float = 1.0) -> tuple[int, float, float]:
  """Generates a note as a tuple of (pitch, step, duration), using a trained sequence model."""

  assert temperature > 0

  # Add batch dimension
  inputs = tf.expand_dims(notes, 0)

  predictions = model.predict(inputs)
  pitch_logits = predictions['pitch']
  step = predictions['step']
  duration = predictions['duration']

  pitch_logits /= temperature 
  pitch = tf.random.categorical(pitch_logits, num_samples=1) 
  pitch = tf.squeeze(pitch, axis=-1) 
  duration = tf.squeeze(duration, axis=-1)
  step = tf.squeeze(step, axis=-1)

  # `step` and `duration` values should be non-negative

  step = tf.maximum(0, step)
  duration = tf.maximum(0, duration)
  

  return int(pitch), float(step), float(duration)

In [None]:
data_dir=pathlib.Path('Jazzomat_midi_folder')
filenames = glob.glob(str(data_dir/"*.mid"))
sorted_filenames = sorted(filenames)
sample_file = filenames[8]


In [None]:
temperature = 1.0
num_predictions = 60

# pick one of the midi files
# test_file = sorted_filenames[24]

# test_notes = my_functions.midi_to_notes(test_file)
# # Calculate the inverval between successive notes
# test_notes['interval'] = (test_notes['pitch'] - test_notes['pitch'].shift(1))
# # Fix up the first one cause of the wacky shift problem
# test_notes.loc[0, 'interval'] = 0
# # apply a contour function
# test_notes['contour'] = test_notes['interval'].apply(my_functions.contour)
#easier to sort testing dataframes
#dfs = dict(tuple(pitchInst.groupby('melid')))
song = 25
test_notes = pitchInst[pitchInst['melid'] == song].reset_index()
test_notes.loc[0, 'step'] = 0.0
sample_notes = np.stack([test_notes[key] for key in key_order], axis=1)
# input_notes = tf.data.Dataset.from_tensor_slices(sample_notes[:seq_length])


# The initial sequence of notes; pitch is normalized similar to training sequences
input_notes = (sample_notes[:seq_length] / np.array(key_normalizer))

# This is the tempo of the midi input file...
tempo = test_notes['tempo'].iloc[seq_length]


generated_notes = []
prev_start = 0
input_note = input_notes[-1]
#contour = 0  #initial contour should be last contour of input sequence
for i in range(num_predictions): #THERE ARE PROBLEMS HERE
  pitch, step, duration = predict_next_note(input_notes, model, temperature)
  start = prev_start + step
  end = start + duration
  # TODO:  Should this be input_note[0] or the last note?
  interval = pitch - input_note[0]
  contour = my_functions.contour(interval)
  # TODO:
  # This line has to change when you change the inputs to the model.  The input_note
  # that is getting appended to input notes needs to have the correct number of 
  # fields cause it is gonna get fed back into the model.predict function
  input_note = (pitch, contour, step, duration, tempo)
  generated_notes.append((*input_note, start, end))
  input_notes = np.delete(input_notes, 0, axis=0)
  input_notes = np.append(input_notes , np.expand_dims(input_note, 0), axis=0) 
  prev_start = start
  

generated_notes = pd.DataFrame(
    generated_notes, columns=(*key_order, 'start', 'end'))

generated_notes.head()

In [None]:
test_notes = test_notes[:seq_length]
start_df = test_notes.drop(['interval'], axis=1)
start_df.head()

In [None]:
# string together the first training data and the generated notes
full_sequence = pd.concat([start_df, generated_notes], ignore_index=True)
full_sequence[['pitch', 'step', 'duration']].head()


In [None]:
def notes_to_midi(
  notes: pd.DataFrame,
  out_file: str, 
  instrument_name: str,
  velocity: int = 100,  # note loudness
) -> pretty_midi.PrettyMIDI:

  pm = pretty_midi.PrettyMIDI()
  instrument = pretty_midi.Instrument(
      program=pretty_midi.instrument_name_to_program(
          instrument_name))

  prev_start = 0
  for i, note in notes.iterrows():
    start = float(prev_start + note['step'])
    end = float(start + note['duration'])
    note = pretty_midi.Note(
        velocity=velocity,
        pitch=int(note['pitch']),
        start=start,
        end=end,
    )
    instrument.notes.append(note)
    prev_start = start

  pm.instruments.append(instrument)
  pm.write(out_file)
  return pm

In [None]:
pm = pretty_midi.PrettyMIDI(sample_file)
example_file = 'contourExampleTempo.midi'
instrument = pm.instruments[0]
instrument_name = pretty_midi.program_to_instrument_name(instrument.program)
example_pm = my_functions.notes_to_midi(full_sequence[['pitch', 'step', 'duration']], out_file=example_file, instrument_name=instrument_name)

In [None]:
my_functions.plot_piano_roll(full_sequence)

In [None]:
my_functions.plot_distributions(generated_notes)

In [None]:
my_functions.plot_distributions(test_notes)