<a href="https://colab.research.google.com/github/Hridaybekal/Music-gen/blob/main/genai_music.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install pyfluidsynth and FluidSynth dependencies
!apt-get install -y fluidsynth
!pip install pyfluidsynth
!pip install pretty_midi

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  fluid-soundfont-gm libevdev2 libfluidsynth3 libgudev-1.0-0 libinput-bin libinput10
  libinstpatch-1.0-2 libmd4c0 libmtdev1 libqt5core5a libqt5dbus5 libqt5gui5 libqt5network5
  libqt5svg5 libqt5widgets5 libwacom-bin libwacom-common libwacom9 libxcb-icccm4 libxcb-image0
  libxcb-keysyms1 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xinput0 libxcb-xkb1
  libxkbcommon-x11-0 qsynth qt5-gtk-platformtheme qttranslations5-l10n timgm6mb-soundfont
Suggested packages:
  fluid-soundfont-gs qt5-image-formats-plugins qtwayland5 jackd
The following NEW packages will be installed:
  fluid-soundfont-gm fluidsynth libevdev2 libfluidsynth3 libgudev-1.0-0 libinput-bin libinput10
  libinstpatch-1.0-2 libmd4c0 libmtdev1 libqt5core5a libqt5dbus5 libqt5gui5 libqt5network5
  libqt5svg5 libqt5widgets5 libwacom-bin libwacom-common libwacom9 libxcb-icc

In [None]:
# Install the required libraries
!pip install pretty_midi
!pip install pyfluidsynth
!apt-get update -qq && apt-get install -qq libfluidsynth1 fluid-soundfont-gm build-essential libasound2-dev libjack-dev
!pip install nest_asyncio


# Import necessary libraries
import numpy as np
import tensorflow as tf
import pandas as pd
import collections
import pretty_midi
import glob
from IPython.display import Audio
from typing import Dict, List, Optional, Sequence, Tuple


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
E: Package 'libfluidsynth1' has no installation candidate


In [None]:
# Load the MIDI file
sample_file = '/content/x (5).mid'  # Path to the uploaded MIDI file

# Function to display audio
def display_audio(pm, seconds=30):
    sampling_rate = 16000  # Adjust as needed
    waveform = pm.fluidsynth(fs=sampling_rate)
    waveform_short = waveform[:seconds * sampling_rate]
    return Audio(waveform_short, rate=sampling_rate)
# def display_audio(pm, seconds=30):
#     sampling_rate = 16000  # Adjust as needed
#     waveform = pm.fluidsynth(fs=sampling_rate)
#     waveform_short = waveform[:seconds * sampling_rate]
#     return Audio(waveform_short, rate=sampling_rate)

# Load the MIDI file and display the instruments
pm = pretty_midi.PrettyMIDI(sample_file)
print(pm.instruments)  # Display the instruments in the MIDI file


[Instrument(program=85, is_drum=False, name="Lead"), Instrument(program=73, is_drum=False, name="Voice"), Instrument(program=87, is_drum=False, name="Bass"), Instrument(program=0, is_drum=False, name="Chords"), Instrument(program=0, is_drum=True, name="Drums")]




In [None]:
def midi_to_notes(midi_file):
    pm = pretty_midi.PrettyMIDI(midi_file)
    instrument = pm.instruments[0]
    notes = collections.defaultdict(list)
    sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
    prev_start = sorted_notes[0].start

    for note in sorted_notes:
        start = note.start
        end = note.end
        notes["pitch"].append(note.pitch)
        notes["start"].append(start)
        notes["end"].append(end)
        notes["step"].append(start - prev_start)
        notes["duration"].append(end - start)
        prev_start = start
    return pd.DataFrame({name: np.array(value) for name, value in notes.items()})

# Extract notes from the MIDI file
raw_notes = midi_to_notes(sample_file)
print(raw_notes.head())  # Display the first few notes


   pitch  start    end   step  duration
0     69  1.500  1.625  0.000     0.125
1     69  1.875  2.000  0.375     0.125
2     66  2.125  2.250  0.250     0.125
3     64  2.500  2.750  0.375     0.250
4     62  2.750  2.875  0.250     0.125


In [None]:
def notes_to_midi(
    notes: pd.DataFrame,
    out_file: str,
    instrument_name: str,
    velocity: int = 100,  # note loudness
) -> pretty_midi.PrettyMIDI:

    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(
        program=pretty_midi.instrument_name_to_program(instrument_name))

    prev_start = 0
    for i, note in notes.iterrows():
        start = float(prev_start + note['step'])
        end = float(start + note['duration'])
        midi_note = pretty_midi.Note(
            velocity=velocity,
            pitch=int(note['pitch']),
            start=start,
            end=end,
        )
        instrument.notes.append(midi_note)
        prev_start = start

    pm.instruments.append(instrument)
    pm.write(out_file)
    return pm

# Test the function by converting the extracted notes back to MIDI
out_file = 'recreated.mid'
instrument_name = pretty_midi.program_to_instrument_name(pm.instruments[0].program)
out_pm = notes_to_midi(raw_notes, out_file=out_file, instrument_name=instrument_name)


# Display the audio of the recreated MIDI
display_audio(out_pm, 30)


In [None]:
# Prepare the dataset
key_order = ["pitch", "step", "duration"]
train_notes = np.stack([raw_notes[key] for key in key_order], axis=1)
notes_ds = tf.data.Dataset.from_tensor_slices(train_notes)

# Check the dataset
print(notes_ds.element_spec)



TensorSpec(shape=(3,), dtype=tf.float64, name=None)


In [None]:
seq_length = 20
vocab_size = 128

def create_sequences(dataset, seq_length, vocab_size=128):
    sequences = []
    targets = []
    num_seq = train_notes.shape[0] - seq_length
    for i in range(num_seq):
        sequence = train_notes[i:i+seq_length - 1,:] / [vocab_size, 1, 1]
        target = train_notes[i+seq_length] / vocab_size
        sequences.append(sequence)
        targets.append(target)
    sequences = np.array(sequences)
    targets = np.array(targets)
    print(sequences.shape, targets.shape)
    dataset = tf.data.Dataset.from_tensor_slices((sequences, {"pitch": targets[:, 0], "step": targets[:, 1], "duration": targets[:, 2]}))
    return dataset

seq_ds = create_sequences(notes_ds, 21, vocab_size)
batch_size = 64
buffer_size = 5000
train_ds = seq_ds.shuffle(buffer_size).batch(batch_size)

print(train_ds.element_spec)  # Check the training dataset


(31, 20, 3) (31, 3)
(TensorSpec(shape=(None, 20, 3), dtype=tf.float64, name=None), {'pitch': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'step': TensorSpec(shape=(None,), dtype=tf.float64, name=None), 'duration': TensorSpec(shape=(None,), dtype=tf.float64, name=None)})


In [None]:
layer = tf.keras.layers
learning_rate = 0.005
input_data = tf.keras.Input(shape=(seq_length, 3))
x = layer.LSTM(128)(input_data)
outputs = {
    "pitch": tf.keras.layers.Dense(64, name="pitch")(x),
    "step": tf.keras.layers.Dense(1, name="step")(x),
    "duration": tf.keras.layers.Dense(1, name="duration")(x),
}
model = tf.keras.Model(input_data, outputs)

loss = {
    "pitch": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    "step": tf.keras.losses.MeanSquaredError(),
    "duration": tf.keras.losses.MeanSquaredError(),
}
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(loss=loss, loss_weights={
    'pitch': 0.05,
    'step': 1.0,
    'duration': 1.0,
}, optimizer=optimizer)

model.summary()

# Train the model
model.fit(train_ds, epochs=10)


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 20, 3)]              0         []                            
                                                                                                  
 lstm_1 (LSTM)               (None, 128)                  67584     ['input_2[0][0]']             
                                                                                                  
 duration (Dense)            (None, 1)                    129       ['lstm_1[0][0]']              
                                                                                                  
 pitch (Dense)               (None, 64)                   8256      ['lstm_1[0][0]']              
                                                                                            

<keras.src.callbacks.History at 0x7963d8fbf670>

In [None]:
def predict_next_note(
    notes, keras_model, temperature):

    assert temperature > 0
    inputs = np.expand_dims(notes, 0)
    predictions = model.predict(inputs)
    pitch_logits = predictions['pitch']
    step = predictions["step"]
    duration = predictions["duration"]
    pitch_logits /= temperature
    pitch = tf.random.categorical(pitch_logits, num_samples=1)
    pitch = tf.squeeze(pitch, axis=-1)
    duration = tf.squeeze(duration, axis=-1)
    step = tf.squeeze(step, axis=-1)
    step = tf.maximum(0, step)
    duration = tf.maximum(0, duration)
    return int(pitch), float(step), float(duration)

# Generate new notes
temperature = 2.0
num_predictions = 1200

sample_notes = np.stack([raw_notes[key] for key in key_order], axis=1)

# The initial sequence of notes and the pitch is normalized similar to training sequences
input_notes = (
    sample_notes[:seq_length] / np.array([vocab_size, 1, 1]))

generated_notes = []
prev_start = 0
for _ in range(num_predictions):
    pitch, step, duration = predict_next_note(input_notes, model, temperature)
    start = prev_start + step
    end = start + duration
    input_note = (pitch, step, duration)
    generated_notes.append((*input_note, start, end))
    input_notes = np.delete(input_notes, 0, axis=0)
    input_notes = np.append(input_notes, np.expand_dims(input_note, 0), axis=0)
    prev_start = start

generated_notes = pd.DataFrame(
    generated_notes, columns=(*key_order, 'start', 'end'))

# Convert the generated notes back to MIDI and play it
generated_out_file = 'generated_music.mid'
generated_pm = notes_to_midi(
    generated_notes, out_file=generated_out_file, instrument_name=instrument_name)

display_audio(generated_pm, 30)




In [None]:
print(generated_notes)

      pitch      step  duration       start         end
0         0  0.220745  0.000000    0.220745    0.220745
1         0  0.231923  0.000000    0.452668    0.452668
2         0  0.232856  0.000000    0.685523    0.685523
3        60  0.232752  0.000000    0.918276    0.918276
4        33  0.084753  0.263543    1.003028    1.266572
...     ...       ...       ...         ...         ...
1195      9  0.145969  0.236752  197.835146  198.071899
1196     19  0.164980  0.177190  198.000126  198.177316
1197     41  0.118371  0.214963  198.118497  198.333460
1198      8  0.122919  0.236434  198.241416  198.477850
1199      0  0.155121  0.166824  198.396537  198.563361

[1200 rows x 5 columns]


In [None]:
!apt-get install -y timidity
!pip install midi2audio


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libao-common libao4
Suggested packages:
  libaudio2 libsndio6.1 fluid-soundfont-gs freepats pmidi timidity-daemon
The following NEW packages will be installed:
  libao-common libao4 timidity
0 upgraded, 3 newly installed, 0 to remove and 59 not upgraded.
Need to get 723 kB of archives.
After this operation, 1,848 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libao-common all 1.2.2+20180113-1.1ubuntu3 [6,568 B]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libao4 amd64 1.2.2+20180113-1.1ubuntu3 [35.2 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 timidity amd64 2.14.0-8ubuntu1.22.04.1 [681 kB]
Fetched 723 kB in 1s (625 kB/s)
Selecting previously unselected package libao-common.
(Reading database ... 124664 files and directories currently installed.)
Pr

In [None]:
midi_path = '/generated_midi.mid'
generated_pm.write(midi_path)


In [None]:
from midi2audio import FluidSynth

# Initialize FluidSynth with the timidity backend
fs = FluidSynth()

# Convert the MIDI file to an audio file (WAV format)
output_wav_path = '/content/generated_audio.wav'
fs.midi_to_audio(midi_path, output_wav_path)

print(f'Audio saved to {output_wav_path}')


Audio saved to /content/generated_audio.wav


In [None]:
from google.colab import files

# Download the WAV file
files.download(output_wav_path)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>