The program is referenced and modified from:
> https://towardsdatascience.com/how-to-generate-music-using-a-lstm-neural-network-in-keras-68786834d4c5  

Reference article explaining how to improve the program:
> https://david-exiga.medium.com/music-generation-using-lstm-neural-networks-44f6780a4c5  

Additional Chinese program explanation:
> https://github.com/xitu/gold-miner/blob/master/TODO1/how-to-generate-music-using-a-lstm-neural-network-in-keras.md

In [1]:
# Install dependency 
# music21 Introduction: https://juejin.cn/post/7063827463058489352
! pip install music21 keras tensorflow[and-cuda] 

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [2]:
# For reading files
import glob
# array processing
import numpy
from matplotlib import pyplot
# keras for building deep learning model
import keras
from keras.models import Sequential
from keras.layers import Dense, TimeDistributed
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Activation
from keras.layers import BatchNormalization as BatchNorm
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint

# 使用music21來進行midi檔案的操作
from music21 import converter, instrument, note, chord, stream, duration
# music21 介紹: https://juejin.cn/post/7063827463058489352


2024-12-02 14:48:38.716375: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1733150918.730050  208020 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1733150918.734204  208020 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-02 14:48:38.750595: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### 從目錄下的 midi 文件中獲取所有的音符和和弦

In [3]:
"""
Extract all notes and chords from MIDI files in a directory
"""

import glob
from music21 import converter, instrument, note, chord

# Initialize lists to store notes, rests, and other data
notes = []
rests = []
notes_without_rests = []
rest_to_duration = {}

# Use glob to read MIDI files from the specified path:
# "./"               - Current directory
# "midi_songs/"      - Folder named midi_songs
# "*.mid"            - All files with a .mid extension
# Notes are the musical pitches; this script reads notes from MIDI files into a list

for file in glob.glob("./midi_songs/*.mid"):  # Read all MIDI files from the folder
    # Parse the MIDI file using music21
    midi = converter.parse(file)

    print(f"Parsing {file}")

    notes_to_parse = None

    try:  # If the MIDI has instrument parts, select the first one
        s2 = instrument.partitionByInstrument(midi)
        notes_to_parse = s2.parts[0].recurse()
    except:  # If no instrument parts, get notes directly
        notes_to_parse = midi.flat.notes

    for element in notes_to_parse:
        # If the element is a Note, get its pitch
        if isinstance(element, note.Note):
            notes.append(str(element.pitch))
            notes_without_rests.append(str(element.pitch))
        # If the element is a Rest, get its name and duration
        elif isinstance(element, note.Rest):
            notes.append(str(element.fullName))
            rests.append(str(element.fullName))
            rest_to_duration[element.fullName] = element.duration.quarterLength
        # If the element is a Chord, get its pitches as integers for easier processing
        elif isinstance(element, chord.Chord):
            pitches = '.'.join(str(n) for n in element.normalOrder)
            notes.append(pitches)
            notes_without_rests.append(pitches)


Parsing ./midi_songs/Boys.mid
Parsing ./midi_songs/Because.mid
Parsing ./midi_songs/Bungalow Bill.mid
Parsing ./midi_songs/All Together Now.mid
Parsing ./midi_songs/All I've Got To Do.mid
Parsing ./midi_songs/A Hard Days Night.mid
Parsing ./midi_songs/And I Love Her.mid
Parsing ./midi_songs/Ask Me Why.mid
Parsing ./midi_songs/Anna (Go To Him).mid
Parsing ./midi_songs/Act Naturally.mid
Parsing ./midi_songs/Across The Universe.mid
Parsing ./midi_songs/Call Your Name.mid
Parsing ./midi_songs/Chains.mid
Parsing ./midi_songs/Bad Boy.mid
Parsing ./midi_songs/Doctor Robert.mid
Parsing ./midi_songs/Devil In Her Heart.mid
Parsing ./midi_songs/Don't Bother Me.mid
Parsing ./midi_songs/Baby You're A Rich Man.mid
Parsing ./midi_songs/Can't Buy Me Love.mid
Parsing ./midi_songs/Baby It's You.mid
Parsing ./midi_songs/All My Loving.mid
Parsing ./midi_songs/Any Time At All.mid
Parsing ./midi_songs/Carry That Weight.mid
Parsing ./midi_songs/Baby's In Black.mid
Parsing ./midi_songs/And Your Bird Can Sing.



Parsing ./midi_songs/It's All Too Much.mid
Parsing ./midi_songs/Sgt Pepper Reprise.mid
Parsing ./midi_songs/Maggie Mae.mid
Parsing ./midi_songs/It's Only Love.mid
Parsing ./midi_songs/If I Needed Someone.mid
Parsing ./midi_songs/Love Me Do.mid
Parsing ./midi_songs/Lovely Rita.mid
Parsing ./midi_songs/Mean Mr Mustard.mid
Parsing ./midi_songs/Let It Be.mid
Parsing ./midi_songs/Lady Madonna.mid
Parsing ./midi_songs/I've Got A Feeling.mid
Parsing ./midi_songs/Long And Winding Road.mid
Parsing ./midi_songs/Sexy Sadie.mid
Parsing ./midi_songs/Long Long Long.mid
Parsing ./midi_songs/Magical Mystery Tour.mid
Parsing ./midi_songs/Maxwell's Silver Hammer.mid
Parsing ./midi_songs/Sun King.mid
Parsing ./midi_songs/Wild Honey_Pie.mid
Parsing ./midi_songs/Sgt Pepper.mid
Parsing ./midi_songs/The Inner Light.mid
Parsing ./midi_songs/Nowhere Man.mid
Parsing ./midi_songs/Penny Lane.mid
Parsing ./midi_songs/PS I Love You.mid
Parsing ./midi_songs/Matchbox.mid
Parsing ./midi_songs/No Reply.mid
Parsing ./mi



Parsing ./midi_songs/Ob La_Di_Ob_La_Da.mid
Parsing ./midi_songs/Slow Down.mid
Parsing ./midi_songs/Tell Me What You See.mid
Parsing ./midi_songs/Till There Was You.mid
Parsing ./midi_songs/Yellow Submarine.mid
Parsing ./midi_songs/Michelle.mid
Parsing ./midi_songs/Oh Darling.mid
Parsing ./midi_songs/Misery.mid
Parsing ./midi_songs/She Loves You.mid
Parsing ./midi_songs/She Said She Said.mid
Parsing ./midi_songs/You've Got To Hide Your Love Away.mid
Parsing ./midi_songs/Yes It Is.mid
Parsing ./midi_songs/Roll Over Beethoven.mid
Parsing ./midi_songs/Twist And Shout.mid
Parsing ./midi_songs/You Can't Do That.mid
Parsing ./midi_songs/Yesterday.mid
Parsing ./midi_songs/When I Get Home.mid
Parsing ./midi_songs/We Can Work It Out.mid
Parsing ./midi_songs/You Never Give Me Your Money.mid
Parsing ./midi_songs/The Fool On The Hill.mid
Parsing ./midi_songs/Long Tall Sally.mid
Parsing ./midi_songs/Thank You_Girl.mid
Parsing ./midi_songs/The Night Before.mid
Parsing ./midi_songs/Your Mother Should 

### 準備神經網絡使用的輸入輸出

In [4]:
"""
Prepare input and output for the neural network
"""

import numpy
from tensorflow.keras.utils import to_categorical

# Get the number of unique note names (including rests)
n_vocab = len(set(notes))
# Sorted list of unique note names (including rests)
pitch_and_rest = sorted(set(item for item in notes))
# Sorted list of unique note names (excluding rests)
pitchnames = sorted(set(item for item in notes_without_rests))
# Sorted list of unique rest names
restnames = sorted(set(item for item in rests))
# Create a dictionary mapping each note/rest to an integer for training
note_to_int = {note: number for number, note in enumerate(pitch_and_rest)}

"""
# Alternative: Map notes and rests separately
note_to_int = {note: number for number, note in enumerate(pitchnames)}
note_to_int.update({rest: number + len(pitchnames) for number, rest in enumerate(restnames)})
"""

print("\n===== Explanation of Variables =====\n")
print("notes: A list storing all notes in the sheet music as strings.")
print(f"Total number of notes (including rests): {len(notes)}")
print(f"Total number of notes (excluding rests): {len(pitchnames)}")
print(f"Total number of unique note/rest names: {n_vocab}")
print(f"Total number of unique rest names: {len(set(rests))}")
print(f"Unique note names: {pitchnames}")
print(f"Mapping of note names to integers: {note_to_int}")

# Length of the input sequence for training (number of notes per sequence)
sequence_length = 100

# Initialize input and output sequences for training
network_input = []
network_output = []

# ===== Create input sequences and corresponding outputs from the notes =====
for i in range(0, len(notes) - sequence_length, 1):
    sequence_in = notes[i:i + sequence_length]  # Input sequence of notes
    sequence_out = notes[i + sequence_length]  # The next note as output
    
    # Convert notes in the input sequence to their corresponding integer values
    network_input.append([note_to_int[char] for char in sequence_in])
    # Convert the output note to its corresponding integer value
    network_output.append(note_to_int[sequence_out])

print("\n===================\n")
print(f"Total notes in 'notes': {len(notes)}")
print(f"Each {sequence_length} notes form one training sample.")
print(f"Total training samples (network_input): {len(network_input)}, each with {len(network_input[0])} integers.")
print(f"Total output samples (network_output): {len(network_output)}, each representing the next note's integer.")
print("\n===================\n")
print("Notes from position sequence_length-10 to sequence_length:", notes[sequence_length-10:sequence_length])
print("Their corresponding integers:", [note_to_int[char] for char in notes[sequence_length-10:sequence_length]])
print("")
print(f"Last 10 integers of network_input[0]: {network_input[0][sequence_length-10:sequence_length]}")
print(f"Last 10 integers of network_input[1]: {network_input[1][sequence_length-10:sequence_length]}")
print(f"Last 10 integers of network_input[2]: {network_input[2][sequence_length-10:sequence_length]}")
print("First three integers in network_output:", network_output[0:3])

# Total number of training patterns
n_patterns = len(network_input)

# ===== Reshape input to be compatible with LSTM layers =====
normalized_input = numpy.reshape(network_input, (n_patterns, sequence_length, 1))

# Normalize input values
normalized_input = normalized_input / float(n_vocab)

# Convert output to a one-hot encoded format suitable for categorical_crossentropy
network_output = to_categorical(network_output, n_vocab)

print("\n===== Reshaped Data =====\n")
print("Shape of normalized_input:", normalized_input.shape)
print("Shape of network_output:", network_output.shape)



===== Explanation of Variables =====

notes: A list storing all notes in the sheet music as strings.
Total number of notes (including rests): 63844
Total number of notes (excluding rests): 234
Total number of unique note/rest names: 285
Total number of unique rest names: 51
Unique note names: ['0', '0.1', '0.2', '0.2.4.7', '0.2.5', '0.2.7', '0.3', '0.3.5', '0.4', '0.4.7', '0.5', '1', '1.2', '1.2.4', '1.2.6', '1.3', '1.3.7', '1.4', '1.4.6.9', '1.4.7', '1.4.7.9', '1.4.8', '1.5', '1.5.7', '1.5.8', '1.6', '1.7', '10', '10.0.2.5', '10.0.4', '10.0.4.5', '10.0.5', '10.1', '10.1.2.6', '10.1.4', '10.1.5', '10.11', '10.2', '10.2.5', '10.3', '11', '11.0', '11.0.3', '11.0.4.7', '11.1', '11.2', '11.2.4', '11.2.4.6', '11.2.4.7', '11.2.5', '11.2.5.7', '11.2.6', '11.3', '11.3.6', '11.4', '2', '2.3', '2.3.4', '2.4', '2.4.6.8.11', '2.4.6.9', '2.4.7.10', '2.5', '2.5.7', '2.5.7.10', '2.5.7.9', '2.5.8', '2.5.9', '2.6', '2.6.10', '2.6.9', '2.7', '2.8', '3', '3.4', '3.4.7.11', '3.5', '3.5.7', '3.5.7.9', '3.

### 創建神經網絡的結構 
### LSTM

In [5]:
"""
Define the Neural Network Architecture
"""
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, BatchNormalization as BatchNorm, Dropout, Dense, Activation

# Reference for LSTM parameters:
# https://huhuhang.com/post/machine-learning/lstm-return-sequences-state

# Initialize the model
model = Sequential()

# Add the first LSTM layer with 512 units
model.add(LSTM(
    512,
    input_shape=(normalized_input.shape[1], normalized_input.shape[2]),
    recurrent_dropout=0.1,  # Dropout applied to recurrent connections
    return_sequences=True   # Ensure the output is a sequence for stacking
))

# Add the second LSTM layer with 512 units
model.add(LSTM(
    512,
    return_sequences=True,  # Continue outputting sequences
    recurrent_dropout=0.1
))

# Add the third LSTM layer with 512 units (no sequences returned)
model.add(LSTM(512))

# Add a Batch Normalization layer to normalize activations
model.add(BatchNorm())

# Add a Dropout layer to prevent overfitting
model.add(Dropout(0.1))

# Add a Dense (fully connected) layer with 256 units and ReLU activation
model.add(Dense(256))
model.add(Activation('relu'))

# Add another Batch Normalization layer
model.add(BatchNorm())

# Add another Dropout layer
model.add(Dropout(0.1))

# Add the final Dense layer with `n_vocab` units and softmax activation
# This maps the output to probabilities for each note
model.add(Dense(n_vocab))
model.add(Activation('softmax'))

# Compile the model with categorical cross-entropy loss and RMSprop optimizer
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

# Display a summary of the model architecture
model.summary()


I0000 00:00:1733151509.720012  208020 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 46872 MB memory:  -> device: 0, name: NVIDIA RTX A6000, pci bus id: 0000:c9:00.0, compute capability: 8.6
  super().__init__(**kwargs)


### 訓練神經網絡

In [6]:
"""
Train the neural network for generating music sequences.

This process adjusts the weights of the model based on the provided input
and output, enabling it to learn patterns in the musical dataset. Only the best model
based on training loss will be saved.
"""

from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Define a callback to save the best model based on training loss
callbacks = [
    # Save only the best model based on minimum loss
    ModelCheckpoint(
        filepath='best_model.keras',  # Filepath to save the best model in .keras format
        monitor='loss',               # Monitor training loss for improvement
        save_best_only=True,          # Save only the best model weights
        mode='min',                   # Minimize the monitored value (loss)
        verbose=1
    ),
    # Stop training early if the loss stagnates
    EarlyStopping(
        monitor='loss', 
        patience=10,                  # Wait for 10 epochs of no improvement
        restore_best_weights=True     # Load the best weights when stopping
    )
]

# Train the model
history = model.fit(
    normalized_input,  # Input data
    network_output,    # Expected output
    epochs=50,         # Total number of training epochs
    batch_size=128,    # Size of each training batch
    callbacks=callbacks,  # Attach callbacks
    verbose=1          # Print progress during training
)

print("Training complete! The best model has been saved as 'best_model.keras'.")


Epoch 1/50


I0000 00:00:1733151514.403303  208434 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m498/498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step - loss: 4.1141
Epoch 1: loss improved from inf to 3.84187, saving model to best_model.keras
[1m498/498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 124ms/step - loss: 4.1136
Epoch 2/50
[1m498/498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step - loss: 3.4879
Epoch 2: loss improved from 3.84187 to 3.45177, saving model to best_model.keras
[1m498/498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 128ms/step - loss: 3.4878
Epoch 3/50
[1m498/498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step - loss: 3.3172
Epoch 3: loss improved from 3.45177 to 3.26850, saving model to best_model.keras
[1m498/498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 125ms/step - loss: 3.3171
Epoch 4/50
[1m498/498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step - loss: 3.0970
Epoch 4: loss improved from 3.26850 to 3.05645, saving model to best_model.keras


根據選定的音符起始點，從神經網絡預測下一個音符並生成樂譜

In [9]:
"""
Generate Notes Using the Neural Network
"""
import numpy as np

# Randomly select a starting sequence from the training data
start = np.random.randint(0, len(network_input) - 1)
pattern = network_input[start]  # A sequence of length `sequence_length` as the prediction starting point

# Create a dictionary to map integers back to notes
int_to_note = {number: note for number, note in enumerate(pitch_and_rest)}

# Store the generated notes
prediction_output = []

print("Generated notes:")

# Generate 100 notes (adjust range to generate more or fewer notes)
for note_index in range(400):
    # Reshape the pattern to the format expected by the model
    prediction_input = np.reshape(pattern, (1, len(pattern), 1))
    prediction_input = prediction_input / float(n_vocab)  # Normalize input data

    # Predict the probabilities of the next note
    prediction = model.predict(prediction_input, verbose=0)

    # Select the note with the highest probability
    index = np.argmax(prediction)

    # Map the integer back to the corresponding note
    result = int_to_note[index]
    print(result)

    # Append the generated note to the output
    prediction_output.append(result)

    # Slide the prediction window: append the new note and drop the oldest one
    pattern.append(index)
    pattern = pattern[1:len(pattern)]


Generated notes:
B2
B2
A2
G2
G2
G2
G2
A2
B2
B2
B2
B2
A2
G2
G2
G2
G2
A2
B2
B2
B2
B2
E2
F#2
G2
E2
A2
G2
F#2
E2
D3
D3
A2
B2
D3
B2
G2
A2
Double Dotted Half Rest
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3
F#3
E3
C#3
D3
A2
B2
D3
B2
G2
A2
A2
A2
C#3
E3

將預測的輸出轉換為音符，並從音符中創建一個MIDI文件 

In [8]:
"""
Convert Predicted Outputs into Notes and Create a MIDI File
"""

from music21 import stream, note, chord, instrument

offset = 0  # Time offset between notes
output_notes = []  # List to store the generated musical elements

# Create note, chord, or rest objects based on the model's generated output
for element in prediction_output:
    # If the element represents a chord (e.g., "60.64.67")
    if ('.' in element) or element.isdigit():
        notes_in_chord = element.split('.')
        notes = []
        for current_note in notes_in_chord:
            try:
                new_note = note.Note(int(current_note))  # Convert to a note object
                new_note.storedInstrument = instrument.Piano()  # Assign an instrument
                notes.append(new_note)
            except ValueError:
                pass  # Skip invalid notes
        new_chord = chord.Chord(notes)  # Create a chord from the notes
        new_chord.offset = offset  # Set the time offset
        output_notes.append(new_chord)

    # If the element represents a single note (e.g., "C4")
    elif element in pitchnames:
        new_note = note.Note(element)
        new_note.offset = offset
        new_note.storedInstrument = instrument.Piano()
        output_notes.append(new_note)

    # If the element represents a rest
    elif element in restnames:
        # Convert rest representation to duration if necessary
        new_rest = note.Rest()
        new_rest.quarterLength = rest_to_duration[element]  # Map rest name to duration
        new_rest.offset = offset
        new_rest.storedInstrument = instrument.Piano()
        output_notes.append(new_rest)

    # Increment the offset for the next musical element
    offset += 0.5

# Create a music21 Stream object to hold the notes, chords, and rests
midi_stream = stream.Stream(output_notes)

# Write the Stream to a MIDI file
midi_stream.write('midi', fp='LSTM_generated_music_with_rests.mid')


'LSTM_generated_music_with_rests.mid'