The program is referenced and modified from:
> https://towardsdatascience.com/how-to-generate-music-using-a-lstm-neural-network-in-keras-68786834d4c5  

Reference article explaining how to improve the program:
> https://david-exiga.medium.com/music-generation-using-lstm-neural-networks-44f6780a4c5  

Additional Chinese program explanation:
> https://github.com/xitu/gold-miner/blob/master/TODO1/how-to-generate-music-using-a-lstm-neural-network-in-keras.md

In [35]:
# Install dependency 
# music21 Introduction: https://juejin.cn/post/7063827463058489352
! pip install music21 keras tensorflow[and-cuda] 

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [36]:
# For reading files
import glob
# array processing
import numpy
from matplotlib import pyplot
# keras for building deep learning model
import keras
from keras.models import Sequential
from keras.layers import Dense, TimeDistributed
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Activation
from keras.layers import BatchNormalization as BatchNorm
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint


### Get all notes and chords from midi files in a directory

In [37]:
"""
Extract all notes and chords from MIDI files in a directory.

This script uses music21 to process MIDI files. It reads all `.mid` files in the 
specified directory, extracts notes and chords, and appends them to the `notes` list.
"""

# Import necessary modules from music21 for MIDI processing
from music21 import converter, instrument, note, chord
import glob

# List to store all notes and chords from the MIDI files
notes = []

# Specify the path to the MIDI files (modify as needed)
midi_path = "./midi_songs/*.mid"

# Loop through all MIDI files in the specified directory
for file in glob.glob(midi_path):
    print(f"Parsing {file}")
    
    # Parse the MIDI file using music21
    midi = converter.parse(file)
    
    # Initialize a variable to hold notes and chords to be parsed
    notes_to_parse = None

    try:
        # If the MIDI file contains instrument parts, extract the first part
        s2 = instrument.partitionByInstrument(midi)
        notes_to_parse = s2.parts[0].recurse()  # Access notes recursively
    except AttributeError:
        # If no instrument parts, use the flat structure to access notes
        notes_to_parse = midi.flat.notes

    # Extract notes and chords from the parsed MIDI data
    for element in notes_to_parse:
        if isinstance(element, note.Note):
            # If the element is a Note, extract its pitch as a string
            notes.append(str(element.pitch))
        elif isinstance(element, chord.Chord):
            # If the element is a Chord, extract its normal order as a string
            notes.append('.'.join(str(n) for n in element.normalOrder))

# Output the total number of notes and chords extracted
print(f"Total notes and chords extracted: {len(notes)}")


Parsing ./midi_songs/Bohemian Rhapsody.mid
Parsing ./midi_songs/Never-Gonna-Give-You-Up.mid
Parsing ./midi_songs/Tokyo Ghoul - Unravel.mid
Parsing ./midi_songs/One Night In Tokyo.mid
Parsing ./midi_songs/Linkin Park - One Step Closer.mid
Parsing ./midi_songs/The-Final-Countdown.mid
Parsing ./midi_songs/Daft Punk - Aerodynamic.mid
Total notes and chords extracted: 897


### Prepare input and output for neural network use

In [38]:
# Prepare inputs and outputs for neural network

# Get the number of unique note names
n_vocab = len(set(notes))

# Get the sorted list of unique note names
pitchnames = sorted(set(item for item in notes))

# Create a dictionary mapping each note to a corresponding numeric ID (e.g., C4 -> 25)
note_to_int = {note: number for number, note in enumerate(pitchnames)}

print("\n===== Explanation of Variables =====\n")
print("notes: A list containing all the musical notes as strings.")
print(f"Total number of notes in the score: {len(notes)}")
print(f"Total unique note types in the score: {n_vocab}")
print(f"Unique note types: {pitchnames}")
print(f"Mapping of note types to IDs: {note_to_int}")

# Length of the input sequence for training
sequence_length = 100

# Create input and output sequences
network_input = []
network_output = []

# Ensure the notes list is long enough for the sequence length
if len(notes) > sequence_length:
    for i in range(len(notes) - sequence_length):
        # Input sequence of notes
        sequence_in = notes[i:i + sequence_length]
        # Corresponding output note
        sequence_out = notes[i + sequence_length]

        # Convert input sequence to numeric format
        network_input.append([note_to_int[char] for char in sequence_in])
        # Convert output note to numeric format
        network_output.append(note_to_int[sequence_out])

    print("\n===================\n")
    print(f"Total notes: {len(notes)}")
    print(f"Each {sequence_length} notes are converted into a training data set.")
    print(f"network_input: {len(network_input)} sequences, each containing {len(network_input[0])} numeric IDs.")
    print(f"network_output: {len(network_output)} numeric IDs, each corresponding to the next note in the sequence.")
    print("\n===================\n")
    print("Notes from index sequence_length - 10 to sequence_length:")
    print(notes[sequence_length-10:sequence_length])
    print("Corresponding numeric IDs:")
    print([note_to_int[char] for char in notes[sequence_length-10:sequence_length]])
    print("")
    print(f"Last 10 IDs of the 0th sequence in network_input: {network_input[0][sequence_length-10:sequence_length]}")
    print(f"Last 10 IDs of the 1st sequence in network_input: {network_input[1][sequence_length-10:sequence_length]}")
    print(f"Last 10 IDs of the 2nd sequence in network_input: {network_input[2][sequence_length-10:sequence_length]}")
    print("First three outputs in network_output:", network_output[0:3])

    # Number of patterns
    n_patterns = len(network_input)

    # Reshape input for LSTM compatibility
    normalized_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))

    # Normalize input
    normalized_input = normalized_input / float(n_vocab)

    # Convert output to categorical format
    network_output = to_categorical(network_output, n_vocab)

    print("\n===== After Reshaping Data =====\n")
    print("normalized_input.shape:", normalized_input.shape)
    print("network_output.shape:", network_output.shape)

else:
    print(f"Error: The notes list must be longer than the sequence length ({sequence_length}).")



===== Explanation of Variables =====

notes: A list containing all the musical notes as strings.
Total number of notes in the score: 897
Total unique note types in the score: 51
Unique note types: ['0', '0.5', '2.7', '3.7', '3.9', '5.10', '5.8', '7.0', '8.0', '9.0', 'A2', 'A4', 'A5', 'A6', 'B-4', 'B-5', 'B1', 'B2', 'B3', 'B4', 'B5', 'C#2', 'C#4', 'C#5', 'C1', 'C3', 'C4', 'C5', 'C6', 'D2', 'D5', 'D6', 'E-5', 'E-6', 'E2', 'E4', 'E5', 'E6', 'F#2', 'F#4', 'F#5', 'F#6', 'F2', 'F5', 'F6', 'G#2', 'G#4', 'G#5', 'G4', 'G5', 'G6']
Mapping of note types to IDs: {'0': 0, '0.5': 1, '2.7': 2, '3.7': 3, '3.9': 4, '5.10': 5, '5.8': 6, '7.0': 7, '8.0': 8, '9.0': 9, 'A2': 10, 'A4': 11, 'A5': 12, 'A6': 13, 'B-4': 14, 'B-5': 15, 'B1': 16, 'B2': 17, 'B3': 18, 'B4': 19, 'B5': 20, 'C#2': 21, 'C#4': 22, 'C#5': 23, 'C1': 24, 'C3': 25, 'C4': 26, 'C5': 27, 'C6': 28, 'D2': 29, 'D5': 30, 'D6': 31, 'E-5': 32, 'E-6': 33, 'E2': 34, 'E4': 35, 'E5': 36, 'E6': 37, 'F#2': 38, 'F#4': 39, 'F#5': 40, 'F#6': 41, 'F2': 42, '

### Create the structure of a neural network 
### LSTM

In [39]:
"""
Create the structure of the neural network using LSTM layers.

This model is designed for sequence prediction tasks, leveraging the strengths of
LSTM layers for processing sequential data like music notes and chords.
"""

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Activation, BatchNormalization
from tensorflow.keras.optimizers import RMSprop

# Define the model
model = Sequential()

# Add the first LSTM layer with return_sequences=True for stacked LSTM
model.add(LSTM(
    512,  # Number of units in the LSTM layer
    input_shape=(normalized_input.shape[1], normalized_input.shape[2]),  # Input shape
    recurrent_dropout=0.1,  # Dropout for recurrent connections
    return_sequences=True  # Return sequences for stacking LSTM layers
))

# Add the second LSTM layer
model.add(LSTM(512, return_sequences=True, recurrent_dropout=0.1))

# Add the third LSTM layer (no return_sequences since it's the last LSTM layer)
model.add(LSTM(512))

# Add Batch Normalization
model.add(BatchNormalization())

# Add a Dropout layer to reduce overfitting
model.add(Dropout(0.1))

# Add a Dense layer with 256 units and ReLU activation
model.add(Dense(256))
model.add(Activation('relu'))

# Add Batch Normalization
model.add(BatchNormalization())

# Add another Dropout layer
model.add(Dropout(0.1))

# Add the output layer with softmax activation
model.add(Dense(n_vocab))  # n_vocab is the number of unique notes/chords
model.add(Activation('softmax'))

# Compile the model with categorical crossentropy loss and RMSprop optimizer
model.compile(
    loss='categorical_crossentropy', 
    optimizer=RMSprop(learning_rate=0.001)  # Specify learning rate
)

# Display the model summary
model.summary()


### Train a neural network

In [40]:
"""
Train the neural network for generating music sequences.

This process adjusts the weights of the model based on the provided input
and output, enabling it to learn patterns in the musical dataset. Only the best model
based on training loss will be saved.
"""

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Define a callback to save the best model based on training loss
callbacks = [
    # Save only the best model based on minimum loss
    ModelCheckpoint(
        filepath='best_model.keras',  # Filepath to save the best model in .keras format
        monitor='loss',               # Monitor training loss for improvement
        save_best_only=True,          # Save only the best model weights
        mode='min',                   # Minimize the monitored value (loss)
        verbose=1
    ),
    # Stop training early if the loss stagnates
    EarlyStopping(
        monitor='loss', 
        patience=10,                  # Wait for 10 epochs of no improvement
        restore_best_weights=True     # Load the best weights when stopping
    )
]

# Train the model
history = model.fit(
    normalized_input,  # Input data
    network_output,    # Expected output
    epochs=50,         # Total number of training epochs
    batch_size=128,    # Size of each training batch
    callbacks=callbacks,  # Attach callbacks
    verbose=1          # Print progress during training
)

print("Training complete! The best model has been saved as 'best_model.keras'.")


Epoch 1/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step - loss: 4.3516
Epoch 1: loss improved from inf to 4.18886, saving model to best_model.keras
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 188ms/step - loss: 4.3312
Epoch 2/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - loss: 4.0272
Epoch 2: loss improved from 4.18886 to 3.85156, saving model to best_model.keras
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 193ms/step - loss: 4.0052
Epoch 3/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step - loss: 3.6239
Epoch 3: loss improved from 3.85156 to 3.56712, saving model to best_model.keras
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 197ms/step - loss: 3.6168
Epoch 4/50
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step - loss: 3.5192
Epoch 4: loss improved from 3.56712 to 3.46361, saving model to best_model.keras
[1m7/7[0m [32m━━━

Based on the selected note starting point, predict the next note from the neural network and generate the score

In [41]:
"""
Generate music based on a starting sequence using the trained neural network.

This process uses the model to predict the next notes and constructs
a new sequence that can be converted into a MIDI file.
"""

import random

# Choose a random sequence from network_input as the starting point for generation
start = numpy.random.randint(0, len(network_input) - 1)
pattern = network_input[start]

# Map integers back to their corresponding notes/chords
int_to_note = {number: note for number, note in enumerate(pitchnames)}

# Store the generated sequence
prediction_output = []

print("Generating notes...")

# Generate a sequence of notes (adjust the range for sequence length)
for note_index in range(200):  # Generate more notes for richer output
    # Prepare the input for prediction
    prediction_input = numpy.reshape(pattern, (1, len(pattern), 1))
    prediction_input = prediction_input / float(n_vocab)  # Normalize input

    # Predict probabilities for the next note
    prediction = model.predict(prediction_input, verbose=0)

    # Add some randomness to predictions for creativity
    top_indices = numpy.argsort(prediction[0])[-3:]  # Pick the top 3 predictions
    index = random.choices(top_indices, weights=prediction[0][top_indices])[0]

    # Map the predicted index to the corresponding note
    result = int_to_note[index]
    prediction_output.append(result)

    print(f"Note {note_index}: {result}")

    # Shift the prediction window and append the new note
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

print("Note generation complete!")


Generating notes...
Note 0: F#4
Note 1: G#4
Note 2: E4
Note 3: E4
Note 4: F#4
Note 5: E4
Note 6: F#4
Note 7: E4
Note 8: G#4
Note 9: E4
Note 10: F#4
Note 11: F#4
Note 12: G#4
Note 13: E4
Note 14: G#4
Note 15: E4
Note 16: F#4
Note 17: E4
Note 18: G#4
Note 19: E4
Note 20: F#4
Note 21: F#4
Note 22: F#4
Note 23: E4
Note 24: E4
Note 25: G#4
Note 26: F#4
Note 27: E4
Note 28: G#4
Note 29: G#4
Note 30: F#4
Note 31: G#4
Note 32: E4
Note 33: G#4
Note 34: F#4
Note 35: G#4
Note 36: F#4
Note 37: E4
Note 38: G#4
Note 39: G#4
Note 40: E4
Note 41: F#4
Note 42: F#4
Note 43: E4
Note 44: G#4
Note 45: F#4
Note 46: E4
Note 47: E4
Note 48: F#4
Note 49: E4
Note 50: F#4
Note 51: G#4
Note 52: G#4
Note 53: G#4
Note 54: F#4
Note 55: F#4
Note 56: F#4
Note 57: E4
Note 58: F#4
Note 59: E4
Note 60: F#4
Note 61: F#4
Note 62: G#4
Note 63: G#4
Note 64: F#4
Note 65: E4
Note 66: F#4
Note 67: E4
Note 68: E4
Note 69: G#4
Note 70: F#4
Note 71: F#4
Note 72: G#4
Note 73: F#4
Note 74: G#4
Note 75: F#4
Note 76: G#4
Note 77: E4
N

Convert predicted output to notes and create a MIDI file from the notes

In [42]:
"""
Convert the predicted output into a MIDI file.

This script takes the generated sequence of notes and chords, creates
corresponding MIDI objects, and saves them as a MIDI file.
"""

from music21 import stream, note, chord, instrument

# Initialize variables for MIDI creation
offset = 0  # Time spacing between notes/chords
output_notes = []

# Convert the predicted patterns into notes and chords
for pattern in prediction_output:
    # If the pattern represents a chord
    if ('.' in pattern) or pattern.isdigit():
        notes_in_chord = pattern.split('.')
        notes = []
        for current_note in notes_in_chord:
            try:
                # Convert the note number into a Note object
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            except ValueError:
                print(f"Skipped invalid note: {current_note}")
        # Create a Chord object from the notes
        if notes:
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
    # If the pattern represents a single note
    else:
        try:
            # Convert the pattern into a Note object
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        except Exception as e:
            print(f"Error creating note '{pattern}': {e}")

    # Increment the offset for spacing
    offset += 0.5

# Create a music21 stream from the generated notes and chords
midi_stream = stream.Stream(output_notes)

# Save the stream as a MIDI file
output_filename = 'generated_music.mid'
try:
    midi_stream.write('midi', fp=output_filename)
    print(f"MIDI file successfully created: {output_filename}")
except Exception as e:
    print(f"Error writing MIDI file: {e}")


MIDI file successfully created: generated_music.mid
