In [1]:
import os
import numpy as np
from music21 import converter, instrument, note, chord, stream
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Activation
from tensorflow.keras.utils import to_categorical
     
# Load and preprocess MIDI files
def get_notes(data_folder):
    notes = []
    for file in os.listdir(data_folder):
        if file.endswith(".mid"):
            midi = converter.parse(os.path.join(data_folder, file))
            parts = instrument.partitionByInstrument(midi)
            if parts:  # File has instrument parts
                notes_to_parse = parts.parts[0].recurse()
            else:  # File has no instrument parts
                notes_to_parse = midi.flat.notes
            for element in notes_to_parse:
                if isinstance(element, note.Note):
                    notes.append(str(element.pitch))
                elif isinstance(element, chord.Chord):
                    notes.append('.'.join(str(n) for n in element.normalOrder))
    return notes    

# Prepare input sequences for the model
def prepare_sequences(notes, n_vocab):
    sequence_length = 100
    pitchnames = sorted(set(notes))
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    network_input = []
    network_output = []
    for i in range(0, len(notes) - sequence_length):
        seq_in = notes[i:i + sequence_length]
        seq_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in seq_in])
        network_output.append(note_to_int[seq_out])
    n_patterns = len(network_input)
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    network_input = network_input / float(n_vocab)
    network_output = to_categorical(network_output, num_classes=n_vocab)
    return network_input, network_output

# Prepare input sequences for the model
def prepare_sequences(notes, n_vocab):
    sequence_length = 100
    pitchnames = sorted(set(notes))
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))
    network_input = []
    network_output = []
    for i in range(0, len(notes) - sequence_length):
        seq_in = notes[i:i + sequence_length]
        seq_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in seq_in])
        network_output.append(note_to_int[seq_out])
    n_patterns = len(network_input)
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    network_input = network_input / float(n_vocab)
    network_output = to_categorical(network_output, num_classes=n_vocab)
    return network_input, network_output
     
# Build the LSTM model
def build_model(network_input, n_vocab):
    model = Sequential()
    model.add(LSTM(512, input_shape=(network_input.shape[1], network_input.shape[2]), return_sequences=True))
    model.add(Dropout(0.3))
    model.add(LSTM(512, return_sequences=True))
    model.add(Dropout(0.3))
    model.add(LSTM(512))
    model.add(Dense(256))
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model

# Generate new music
def generate_notes(model, network_input, pitchnames, n_vocab):
    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))
    start = np.random.randint(0, len(network_input) - 1)
    pattern = network_input[start]
    prediction_output = []
    for note_index in range(500):  # Generate 500 notes
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)
        prediction = model.predict(prediction_input, verbose=0)
        index = np.argmax(prediction)
        result = int_to_note[index]
        prediction_output.append(result)
        pattern = np.append(pattern, index)
        pattern = pattern[1:]
    return prediction_output

# Convert generated notes to MIDI
def create_midi(prediction_output, output_file="output.mid"):
    offset = 0
    output_notes = []
    for pattern in prediction_output:
        if '.' in pattern or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = [note.Note(int(n)) for n in notes_in_chord]
            for n in notes:
                n.storedInstrument = instrument.Piano()
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)
        offset += 0.5
    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp=output_file)

# Main execution
DATA_FOLDER = "lofi-music"
notes = get_notes(DATA_FOLDER)
n_vocab = len(set(notes))
network_input, network_output = prepare_sequences(notes, n_vocab)
pitchnames = sorted(set(notes))

model = build_model(network_input, n_vocab)
model.fit(network_input, network_output, epochs=100, batch_size=64)

prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
create_midi(prediction_output)
print("Music generated and saved as output.mid!")

  super().__init__(**kwargs)


Epoch 1/100
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 2s/step - loss: 4.6001
Epoch 2/100
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 3s/step - loss: 4.1217
Epoch 3/100
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m245s[0m 3s/step - loss: 4.0423
Epoch 4/100
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 3s/step - loss: 4.0047
Epoch 5/100
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 3s/step - loss: 4.0074
Epoch 6/100
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 3s/step - loss: 4.0000
Epoch 7/100
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 3s/step - loss: 4.0797
Epoch 8/100
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m254s[0m 3s/step - loss: 3.8965
Epoch 9/100
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 3s/step - loss: 3.8460
Epoch 10/100
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 3s/step - lo