# Midi Sound Generation

## Setup

Importing the necessary Python modules

In [1]:
import ipywidgets as widgets
from IPython.display import display, Audio
import os
import pretty_midi
import fluidsynth
import pandas as pd
import collections
import numpy as np
import tensorflow as tf
from typing import Optional
import keras




In [2]:
# Sampling rate for audio playback
_SAMPLING_RATE = 16000

full_path = ""

## Midi file upload

This section provide functionality to upload MIDI files and display audio from the selected file. The code includes a file upload button where you can select a MIDI file. Upon selection, the file's full path is displayed, and the audio is rendered for preview.

In [3]:
upload_button = widgets.FileUpload(accept='.mid', multiple=False)

In [4]:
display(upload_button)

FileUpload(value=(), accept='.mid', description='Upload')

In [5]:
def find_file(file_name, search_path='.'):
    for root, dirs, files in os.walk(search_path):
        if file_name in files:
            return os.path.join(root, file_name)
    return None

def browse_and_display(b):
    global full_path 
    file_path = find_file(file_name)
    if file_path:
        result_label.value = "Full path for '{}' is: {}".format(file_name, file_path)
        full_path = file_path
    else:
        result_label.value = "File '{}' not found.".format(file_name)

file_name = next(iter(upload_button.value)).name

# Create a button widget
browse_button = widgets.Button(description="Find File")
browse_button.on_click(browse_and_display)

# Create a label widget to display the result
result_label = widgets.HTML(value="")

# Display the widgets
display(browse_button, result_label)

Button(description='Find File', style=ButtonStyle())

HTML(value='')

In [6]:
pm = pretty_midi.PrettyMIDI(full_path)

def display_audio(pm: pretty_midi.PrettyMIDI, seconds=30):
  waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
  # Take a sample of the generated waveform to mitigate kernel resets
  waveform_short = waveform[:seconds*_SAMPLING_RATE]
  return Audio(waveform_short, rate=_SAMPLING_RATE)

display_audio(pm)

## Midi details

This section display details about the MIDI file, such as the number of instruments it contains and provide an option to select a specific instrument for further analysis. The code includes a dropdown menu where you can choose the instrument of your choice. Upon selection, details of the selected instrument are displayed.

In [7]:
print('Number of instruments:', len(pm.instruments))
options = []
for i, item in enumerate(pm.instruments):
    instrument_name = pretty_midi.program_to_instrument_name(item.program)
    options.append((instrument_name, i))


print("Kindly pick the instrument of your choice")

instrument_selection = widgets.Dropdown(
    options=options,
    value=1,
    description='Instrument Name:',
)
display(instrument_selection)

Number of instruments: 2
Kindly pick the instrument of your choice


Dropdown(description='Instrument Name:', index=1, options=(('Acoustic Grand Piano', 0), ('Acoustic Grand Piano…

In [8]:
instrument = pm.instruments[instrument_selection.value]

instrument

Instrument(program=0, is_drum=False, name="Piano left")

## Midi data generation

This section generate structured data from the MIDI file based on the selected instrument. The code extracts musical notes from the chosen instrument in the MIDI file and organizes them into a DataFrame. The extracted data includes information such as pitch, start time, end time, step, and duration of each note.

In [9]:
def midi_to_notes(midi_file: str) -> pd.DataFrame:
  pm = pretty_midi.PrettyMIDI(midi_file)
  instrument = pm.instruments[instrument_selection.value]
  notes = collections.defaultdict(list)

  # Sort the notes by start time
  sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
  prev_start = sorted_notes[0].start

  for note in sorted_notes:
    start = note.start
    end = note.end
    notes['pitch'].append(note.pitch)
    notes['start'].append(start)
    notes['end'].append(end)
    notes['step'].append(start - prev_start)
    notes['duration'].append(end - start)
    prev_start = start

  return pd.DataFrame({name: np.array(value) for name, value in notes.items()})

In [10]:
raw_notes = midi_to_notes(full_path)
raw_notes.head()

Unnamed: 0,pitch,start,end,step,duration
0,59,0.0,0.126051,0.0,0.126051
1,63,0.252101,0.378152,0.252101,0.126051
2,66,0.504202,0.627153,0.252101,0.122951
3,64,0.750104,0.833765,0.245901,0.083661
4,66,0.833765,0.917426,0.083661,0.083661


## Model loading

This section loads a pre-trained music generation model from a saved file named 'trained_midi_gen'. The model is loaded into memory, allowing us to use it for inference. Additionally, a custom loss function named 'mse_with_positive_pressure' is provided during model loading to ensure proper reconstruction of the model.

In [11]:
def mse_with_positive_pressure(y_true: tf.Tensor, y_pred: tf.Tensor):
  mse = (y_true - y_pred) ** 2
  positive_pressure = 10 * tf.maximum(-y_pred, 0.0)
  return tf.reduce_mean(mse + positive_pressure)

In [12]:
model = tf.keras.models.load_model('trained_midi_gen', custom_objects={'mse_with_positive_pressure': mse_with_positive_pressure})





## Note prediction

This section utilize the pre-trained sequence model to predict the next musical notes based on a given sequence of input notes. The code generates a series of predicted notes using the trained model, considering factors such as pitch, step, and duration. These predicted notes are then organized into a DataFrame, representing the generated musical sequence.

In [13]:
def predict_next_note(
    notes: np.ndarray,
    model: tf.keras.Model,
    temperature: float = 1.0) -> tuple[int, float, float]:
  """Generates a note as a tuple of (pitch, step, duration), using a trained sequence model."""

  assert temperature > 0

  # Add batch dimension
  inputs = tf.expand_dims(notes, 0)

  predictions = model.predict(inputs)
  pitch_logits = predictions['pitch']
  step = predictions['step']
  duration = predictions['duration']

  pitch_logits /= temperature
  pitch = tf.random.categorical(pitch_logits, num_samples=1)
  pitch = tf.squeeze(pitch, axis=-1)
  duration = tf.squeeze(duration, axis=-1)
  step = tf.squeeze(step, axis=-1)

  # `step` and `duration` values should be non-negative
  step = tf.maximum(0, step)
  duration = tf.maximum(0, duration)

  return int(pitch), float(step), float(duration)

In [14]:
temperature = 0.5
num_predictions = 120
key_order = ['pitch', 'step', 'duration']
seq_length = 150
vocab_size = 128

sample_notes = np.stack([raw_notes[key] for key in key_order], axis=1)

# The initial sequence of notes; pitch is normalized similar to training
# sequences
input_notes = (
    sample_notes[:seq_length] / np.array([vocab_size, 1, 1]))

generated_notes = []
prev_start = 0
for _ in range(num_predictions):
  pitch, step, duration = predict_next_note(input_notes, model, temperature)
  start = prev_start + step
  end = start + duration
  input_note = (pitch, step, duration)
  generated_notes.append((*input_note, start, end))
  input_notes = np.delete(input_notes, 0, axis=0)
  input_notes = np.append(input_notes, np.expand_dims(input_note, 0), axis=0)
  prev_start = start

generated_notes = pd.DataFrame(
    generated_notes, columns=(*key_order, 'start', 'end'))



## Generated midi

This section convert the generated musical notes into a MIDI file. The code takes the predicted notes generated by the model and converts them into MIDI format, assigning instrument properties and note attributes accordingly. The resulting MIDI file, named 'output.mid', is then played back to provide an auditory representation of the generated musical sequence.

In [15]:
def notes_to_midi(
  notes: pd.DataFrame,
  out_file: str,
  instrument_name: str,
  velocity: int = 100,  # note loudness
) -> pretty_midi.PrettyMIDI:

  pm = pretty_midi.PrettyMIDI()
  instrument = pretty_midi.Instrument(
      program=pretty_midi.instrument_name_to_program(
          instrument_name))

  prev_start = 0
  for i, note in notes.iterrows():
    start = float(prev_start + note['step'])
    end = float(start + note['duration'])
    note = pretty_midi.Note(
        velocity=velocity,
        pitch=int(note['pitch']),
        start=start,
        end=end,
    )
    instrument.notes.append(note)
    prev_start = start

  pm.instruments.append(instrument)
  pm.write(out_file)
  return pm

In [16]:
out_file = 'output.mid'
out_pm = notes_to_midi(
    generated_notes, out_file=out_file, instrument_name=instrument_name)
display_audio(out_pm)