In [30]:
!pip install pretty_midi
!sudo apt install -y fluidsynth
!pip install --upgrade pyfluidsynth

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
fluidsynth is already the newest version (2.2.5-1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [2]:
import numpy as np
import tensorflow as tf
import pandas as pd
import collections
import fluidsynth
import glob
import pretty_midi
from IPython import display
from typing import Dict, List, Optional, Sequence, Tuple

Dataset Loading

In [4]:
from google.colab import files
uploaded = files.upload()  # Upload your ZIP file here

                                                                        # create a directory to extract the dataset
                              # replace the zip file name as per your file name

Saving archive (8).zip to archive (8) (1).zip


In [9]:
!mkdir -p music-midi-dataset
!ls  # check the exact filename
!unzip "archive (8).zip" -d music-midi-dataset  # Use the correct name


'archive (8) (1).zip'  'archive (8).zip'   music-midi-dataset   sample_data
Archive:  archive (8).zip
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (1).mid  
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (10).mid  
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (11).mid  
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (12).mid  
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (13).mid  
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (14).mid  
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (15).mid  
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (16).mid  
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (17).mid  
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (18).mid  
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (19).mid  
  inflating: music-midi-dataset/midi_dataset/midi_dataset/x (2).mid  
  inflating: music-midi-dataset/midi_dataset/mid

In [10]:
sampling_rate = 44100

def display_audio(pm, seconds=30):
	waveform = pm.fluidsynth(fs=sampling_rate)
  # Take a sample of the generated waveform to mitigate kernel resets
	waveform_short = waveform[:seconds*sampling_rate]
	return display.Audio(waveform_short, rate=sampling_rate)

pm = pretty_midi.PrettyMIDI()
# Create an instrument instance and add it to the PrettyMIDI object
instrument = pretty_midi.Instrument(program=0, is_drum=False, name='acoustic grand piano')
pm.instruments.append(instrument)
print(pm.instruments)
instrument = pm.instruments[0]



[Instrument(program=0, is_drum=False, name="acoustic grand piano")]


In [6]:
from google.colab import files
uploaded = files.upload()

Saving x (43).mid to x (43) (1).mid


In [11]:
!ls


'archive (8) (1).zip'  'archive (8).zip'  'x (43) (1).mid'
'archive (8) (2).zip'   sample_data	  'x (43).mid'


In [16]:
import pretty_midi
import numpy as np
import pandas as pd
import collections

def midi_to_notes(midi_file):
    pm = pretty_midi.PrettyMIDI(midi_file)
    instrument = pm.instruments[0]
    notes = collections.defaultdict(list)
    sorted_notes = sorted(instrument.notes , key=lambda note:note.start)
    prev_start = sorted_notes[0].start

    for note in sorted_notes:
        start = note.start
        end = note.end
        notes["pitch"].append(note.pitch)
        notes["start"].append(start)
        notes["end"].append(end)
        notes["step"].append(start - prev_start)
        notes["duration"].append(end - start)
        prev_start = start

    return pd.DataFrame({name: np.array(value) for name, value in notes.items()})

# Replace with exact name
raw_notes = midi_to_notes("x (43).mid")
note_names = np.vectorize(pretty_midi.note_number_to_name)
sample_note_names = note_names(raw_notes["pitch"])




In [17]:
def notes_to_midi(
  notes: pd.DataFrame,
  out_file: str,
  instrument_name: str,
  velocity: int = 100,  # note loudness
) -> pretty_midi.PrettyMIDI:

  pm = pretty_midi.PrettyMIDI()
  instrument = pretty_midi.Instrument(
      program=pretty_midi.instrument_name_to_program(
          instrument_name))

  prev_start = 0
  for i, note in notes.iterrows():
    start = float(prev_start + note['step'])
    end = float(start + note['duration'])
    note = pretty_midi.Note(
        velocity=velocity,
        pitch=int(note['pitch']),
        start=start,
        end=end,
    )
    instrument.notes.append(note)
    prev_start = start

  pm.instruments.append(instrument)
  pm.write(out_file)
  return pm

In [20]:
import glob
import pandas as pd
import numpy as np
import tensorflow as tf
num_files = 5
all_notes = []
filenames = glob.glob('*.mid') # Get a list of all MIDI files in the current directory
for f in filenames[:num_files] :
	notes = midi_to_notes(f)
	all_notes.append(notes)
all_notes = pd.concat(all_notes)
print(all_notes)
key_order = ["pitch" , "step" , "duration"]
train_notes = np.stack([all_notes[key] for key in key_order] , axis = 1)
notes_ds=tf.data.Dataset.from_tensor_slices(train_notes)
notes_ds.element_spec



     pitch   start      end   step  duration
0       66   0.000   0.2500  0.000    0.2500
1       66   0.500   0.6250  0.500    0.1250
2       64   0.625   0.7500  0.125    0.1250
3       66   0.750   0.8750  0.125    0.1250
4       66   1.000   1.2500  0.250    0.2500
..     ...     ...      ...    ...       ...
244     69  77.750  77.9375  0.125    0.1875
245     62  78.000  78.1875  0.250    0.1875
246     61  78.500  78.6250  0.500    0.1250
247     61  78.750  78.8750  0.250    0.1250
248     59  79.000  79.1250  0.250    0.1250

[498 rows x 5 columns]




TensorSpec(shape=(3,), dtype=tf.float64, name=None)

Creating Music Sequence

In [22]:
seq_length = 20
vocab_size = 128

def create_sequences(dataset, seq_length, vocab_size=128):
    sequences = []
    targets = []
    num_seq = train_notes.shape[0] - seq_length
    for i in range(num_seq):
        sequence = train_notes[i:i+seq_length - 1, :] / [vocab_size, 1, 1]
        target = train_notes[i+seq_length] / vocab_size
        sequences.append(sequence)
        targets.append(target)

    sequences = np.array(sequences)
    targets = np.array(targets)
    print(sequences.shape, targets.shape)

    dataset = tf.data.Dataset.from_tensor_slices(
        (sequences, {
            "pitch": targets[:, 0],
            "step": targets[:, 1],
            "duration": targets[:, 2]
        })
    )
    return dataset

# Create sequence dataset
seq_ds = create_sequences(notes_ds, 21, vocab_size)

# Shuffle and batch
batch_size = 64
buffer_size = 5000
train_ds = seq_ds.shuffle(buffer_size).batch(batch_size)

# Confirm element spec
train_ds.element_spec


(477, 20, 3) (477, 3)


(TensorSpec(shape=(None, 20, 3), dtype=tf.float64, name=None),
 {'pitch': TensorSpec(shape=(None,), dtype=tf.float64, name=None),
  'step': TensorSpec(shape=(None,), dtype=tf.float64, name=None),
  'duration': TensorSpec(shape=(None,), dtype=tf.float64, name=None)})

Define LSTM Model

In [26]:
import tensorflow as tf

layer = tf.keras.layers
learning_rate = 0.005
seq_length = 20  # or your actual value

# Input layer with shape (sequence_length, 3 features: pitch, step, duration)
input_data = tf.keras.Input(shape=(seq_length, 3))

# LSTM layer
x = layer.LSTM(128)(input_data)

# Output layers (multi-output model)
outputs = {
    "pitch": tf.keras.layers.Dense(64, name="pitch")(x),
    "step": tf.keras.layers.Dense(1, name="step")(x),
    "duration": tf.keras.layers.Dense(1, name="duration")(x),
}

# Build the model
model = tf.keras.Model(input_data, outputs)

# Define loss functions for each output
loss = {
    "pitch": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    "step": tf.keras.losses.MeanSquaredError(),
    "duration": tf.keras.losses.MeanSquaredError(),
}

# Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

# Compile the model with custom loss weights
model.compile(
    loss={
        "pitch": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        "step": tf.keras.losses.MeanSquaredError(),
        "duration": tf.keras.losses.MeanSquaredError()
    },
    loss_weights={
        "pitch": 0.05,
        "step": 1.0,
        "duration": 1.0
    },
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.005)
)

model.compile(
    loss={
        "pitch": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        "step": tf.keras.losses.MeanSquaredError(),
        "duration": tf.keras.losses.MeanSquaredError()
    },
    loss_weights={
        "pitch": 0.05,
        "step": 1.0,
        "duration": 1.0
    },
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.005)
)

model.summary()

In [28]:
# Train the model
model.fit(train_ds, epochs=10)

# Predict on the same training data
hist = model.predict(train_ds)

# Print the shape of the predicted durations
print(hist["duration"].shape)


Epoch 1/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step - duration_loss: 0.0067 - loss: 0.2225 - pitch_loss: 4.0497 - step_loss: 0.0129
Epoch 2/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - duration_loss: 0.0152 - loss: 0.1345 - pitch_loss: 2.2211 - step_loss: 0.0085
Epoch 3/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - duration_loss: 0.0306 - loss: 0.0373 - pitch_loss: 0.0458 - step_loss: 0.0045
Epoch 4/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - duration_loss: 0.0098 - loss: 0.0134 - pitch_loss: 0.0230 - step_loss: 0.0022
Epoch 5/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - duration_loss: 0.0051 - loss: 0.0060 - pitch_loss: 0.0163 - step_loss: 1.1859e-04
Epoch 6/10
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - duration_loss: 0.0020 - loss: 0.0031 - pitch_loss: 0.0116 - step_loss: 4.5226e-04
Epoch 7/10
[1

Prediction of Notes

In [31]:
def predict_next_note(notes, keras_model, temperature):
    assert temperature > 0
    inputs = np.expand_dims(notes, 0)
    predictions = keras_model.predict(inputs)

    pitch_logits = predictions["pitch"]
    step = predictions["step"]
    duration = predictions["duration"]

    pitch_logits /= temperature
    pitch = tf.random.categorical(pitch_logits, num_samples=1)
    pitch = tf.squeeze(pitch, axis=-1)

    duration = tf.squeeze(duration, axis=-1)
    step = tf.squeeze(step, axis=-1)

    step = tf.maximum(0, step)
    duration = tf.maximum(0, duration)

    return int(pitch), float(step), float(duration)


# Parameters
temperature = 2.0
num_predictions = 1200

# Prepare seed input
sample_notes = np.stack([raw_notes[key] for key in key_order], axis=1)
input_notes = sample_notes[:seq_length] / np.array([vocab_size, 1, 1])

# Generate notes
generated_notes = []
prev_start = 0

for _ in range(num_predictions):
    pitch, step, duration = predict_next_note(input_notes, model, temperature)
    start = prev_start + step
    end = start + duration
    input_note = (pitch, step, duration)

    generated_notes.append((*input_note, start, end))

    input_notes = np.delete(input_notes, 0, axis=0)
    input_notes = np.append(input_notes, np.expand_dims(input_note, 0), axis=0)
    prev_start = start

# Convert to DataFrame
generated_notes = pd.DataFrame(generated_notes, columns=(*key_order, 'start', 'end'))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 352ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4

Music Generation

In [33]:
import pretty_midi

def notes_to_midi(df, out_file='output.mid', instrument_name="Acoustic Grand Piano"):
    # Create a PrettyMIDI object
    pm = pretty_midi.PrettyMIDI()
    program = pretty_midi.instrument_name_to_program(instrument_name)
    instrument = pretty_midi.Instrument(program=program)

    for _, note in df.iterrows():
        midi_note = pretty_midi.Note(
            velocity=100,
            pitch=int(note['pitch']),
            start=float(note['start']),
            end=float(note['end'])
        )
        instrument.notes.append(midi_note)

    pm.instruments.append(instrument)
    pm.write(out_file)
    return pm


In [35]:
from IPython import display

sampling_rate = 44100

def display_audio(pm, seconds=30):
    waveform = pm.fluidsynth(fs=sampling_rate)
    waveform_short = waveform[:seconds * sampling_rate]
    return display.Audio(waveform_short, rate=sampling_rate)

# Define output filename and instrument
out_file = 'gfgmusicgenerate.mid'
instrument_program = 0  # Acoustic Grand Piano
instrument_name = pretty_midi.program_to_instrument_name(instrument_program)

# Convert notes to MIDI
out_pm = notes_to_midi(generated_notes, out_file=out_file, instrument_name=instrument_name)

# Play the audio (you need to define the display_audio function if not done yet)
display_audio(out_pm, seconds=30)