Data Preparation

In [None]:
import music21
import numpy as np

def midi_to_notes(midi_file):
    midi = music21.converter.parse(midi_file)
    notes = []
    for element in midi.flat.notes:
        if isinstance(element, music21.note.Note):
            notes.append(str(element.pitch))
        elif isinstance(element, music21.chord.Chord):
            notes.append('.'.join(str(n) for n in element.normalOrder))
    return notes

# Example usage:
# notes = midi_to_notes('path/to/midi/file.mid')


Model Design

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, MultiHeadAttention, LayerNormalization, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras import Input

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = LayerNormalization(epsilon=1e-6)(res)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    return x + res

def build_transformer_model(input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
    inputs = Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = LayerNormalization(epsilon=1e-6)(x)
    x = tf.keras.layers.Flatten()(x)
    for dim in mlp_units:
        x = Dense(dim, activation="relu")(x)
        x = Dropout(mlp_dropout)(x)
    outputs = Dense(input_shape[0], activation="softmax")(x)
    return Model(inputs, outputs)

# Define constants
SEQ_LENGTH = 100  # Sequence length
VOCAB_SIZE = 128  # Number of unique notes

# Build the model
model = build_transformer_model(
    input_shape=(SEQ_LENGTH, VOCAB_SIZE),
    head_size=256,
    num_heads=4,
    ff_dim=256,
    num_transformer_blocks=4,
    mlp_units=[256],
    dropout=0.1,
    mlp_dropout=0.1,
)

model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")

model.summary()


Training

In [None]:
def prepare_sequences(notes, seq_length):
    pitch_names = sorted(set(notes))
    note_to_int = {note: number for number, note in enumerate(pitch_names)}
    
    network_input = []
    network_output = []
    
    for i in range(0, len(notes) - seq_length):
        seq_in = notes[i:i + seq_length]
        seq_out = notes[i + seq_length]
        network_input.append([note_to_int[char] for char in seq_in])
        network_output.append(note_to_int[seq_out])
    
    n_patterns = len(network_input)
    
    network_input = np.reshape(network_input, (n_patterns, seq_length, 1))
    network_output = np.array(network_output)
    
    return network_input, network_output

# Prepare data
# notes = midi_to_notes('path/to/midi/file.mid')
# X, y = prepare_sequences(notes, SEQ_LENGTH)

# Train the model
# model.fit(X, y, epochs=100, batch_size=64)


Generation

In [None]:
def generate_notes(model, start_sequence, num_generate, pitch_names):
    int_to_note = {number: note for number, note in enumerate(pitch_names)}
    note_to_int = {note: number for number, note in enumerate(pitch_names)}
    
    pattern = [note_to_int[char] for char in start_sequence]
    prediction_output = []

    for note_index in range(num_generate):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction = model.predict(prediction_input, verbose=0)
        index = np.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)
        pattern.append(index)
        pattern = pattern[1:len(pattern)]
    
    return prediction_output

# Example usage:
# start_sequence = notes[:SEQ_LENGTH]
# generated_notes = generate_notes(model, start_sequence, 500, pitch_names)
