# Get the data

In [3]:
from get_notes import NotesParser
import pickle
import os

In [4]:
DATA_PATH = '../../data/Chopin'
notes = []
notes_file_path = os.path.join(DATA_PATH, 'notes')

# If notes have already been dumped, load them, else run parser
if os.path.exists(notes_file_path):
    with open(notes_file_path, 'rb') as notes_file:
        notes = pickle.load(notes_file)
else:
    parser = NotesParser(DATA_PATH)
    notes = parser.get_notes_from_files()
    
    with open(notes_file_path, 'wb') as notes_file:
        pickle.dump(notes, notes_file)

Parsing 111 files from Chopin
1/111 Chopin, Frédéric, Nocturnes, Op.55, e3yrEEM5j_s.mid
2/111 Chopin, Frédéric, Variations in A major, B.37, asJtkFFWHzs.mid
3/111 Chopin, Frédéric, Waltzes, Op.64, hOcryGEw1NY.mid
4/111 Chopin, Frédéric, Variations brillantes, Op.12, m7Sxe1y0U7M.mid
5/111 Chopin, Frédéric, Casta Diva, 4aojKDKm1BE.mid
6/111 Chopin, Frédéric, Bolero, Op.19, 8XQXE7SeYkc.mid
7/111 Chopin, Frédéric, Ballade No.3, Op.47, BkPLDoZXlHQ.mid
8/111 Lysenko, Mykola, Impromptu in the Style of Chopin, Op.38, FRbprtd1WfU.mid
9/111 Chopin, Frédéric, Mazurkas, Op.68, -2wScu6HtcY.mid
10/111 Chopin, Frédéric, Piano Sonata No.2, Op.35, hZY5DBmgC_A.mid
11/111 Chopin, Frédéric, Mazurka in C major, B.82, lhPMZyfdQbQ.mid
12/111 Chopin, Frédéric, Largo in E-flat major, B.109, ZuayOEDAKKc.mid
13/111 Chopin, Frédéric, Waltz in A-flat major, Op.42, 4b9vilZx4dc.mid
14/111 Chopin, Frédéric, Mazurkas, Op.63, 8wa4w6lfrpw.mid
15/111 Chopin, Frédéric, Mazurkas, Op.30, H3s-NCcC8sM.mid
16/111 Chopin, Frédé

# Set up the model

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Activation
from keras.utils import to_categorical
from keras.layers import BatchNormalization
from keras.callbacks import ModelCheckpoint
from music21 import converter, note, chord
import numpy as np

### Define hyperparameters

In [None]:
SEQUENCE_LENGTH = 100
EPOCHS = 1  # Adjust as needed
BATCH_SIZE = 128  # Adjust as needed

### Prepare sequences for LSTM

In [None]:
# Create input sequences and corresponding output
unique_notes = sorted(set(notes))
note_to_int = dict((note, number) for number, note in enumerate(unique_notes))
int_to_note = dict((number, note) for number, note in enumerate(unique_notes))

input_sequences = []
output_sequences = []

for i in range(len(notes) - SEQUENCE_LENGTH):
    sequence_in = notes[i:i + SEQUENCE_LENGTH]
    sequence_out = notes[i + SEQUENCE_LENGTH]
    input_sequences.append([note_to_int[char] for char in sequence_in])
    output_sequences.append(note_to_int[sequence_out])

In [None]:
print(len(unique_notes))

### Configure the model

In [None]:
# Reshape input sequences
x = np.reshape(input_sequences, (len(input_sequences), SEQUENCE_LENGTH, 1))
x = x / float(len(unique_notes))

# One-hot encode output sequences
y = to_categorical(output_sequences)

model = Sequential()
model.add(LSTM(512, input_shape=(x.shape[1], x.shape[2]), return_sequences=True))
model.add(LSTM(512, return_sequences=True))
model.add(LSTM(512))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

check_point_path = "model/cp_{epoch:04d}.keras"
check_point = ModelCheckpoint(filepath=check_point_path, save_best_only=True, monitor='loss', save_freq='epoch')


### Train the model

In [None]:
model.fit(x, y, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[check_point])

In [None]:
import importlib
import generate
importlib.reload(generate)
generate.generate(model, input_sequences, int_to_note, unique_notes, "out.mid", 200)