In [1]:
import logging
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import argparse 
from pathlib import Path


logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
log = logging.getLogger()

%config Completer.use_jedi = False # make autocompletion works in jupyter

args = argparse.Namespace()
args.data_folder = './data/bach-next-note/'
args.train_folder = args.data_folder + 'train/'
args.val_folder = args.data_folder + 'valid/'
args.test_folder = args.data_folder + 'test/'
# args.train_fraction = 0.8
args.seed = 101
args.batch_size = 32
args.epochs = 7

paths = Path(args.train_folder).glob('**/chorale_*.csv')
train_np_list = [pd.read_csv(p).values.tolist() for p in paths]
# print(len(train_np_list[0]))

paths = Path(args.val_folder).glob('**/chorale_*.csv')
val_np_list = [pd.read_csv(p).values.tolist() for p in paths]

paths = Path(args.test_folder).glob('**/chorale_*.csv')
test_np_list = [pd.read_csv(p).values.tolist() for p in paths]

print(train_np_list[0])

[[66, 61, 57, 54], [66, 61, 57, 54], [68, 61, 59, 54], [68, 61, 59, 54], [69, 66, 61, 54], [69, 66, 61, 56], [69, 66, 61, 57], [69, 66, 61, 59], [68, 65, 61, 61], [68, 65, 61, 61], [68, 65, 59, 49], [68, 65, 59, 49], [66, 66, 57, 50], [66, 66, 57, 50], [66, 66, 57, 50], [66, 66, 57, 50], [66, 66, 59, 50], [66, 66, 59, 50], [68, 66, 59, 50], [68, 66, 59, 50], [69, 66, 61, 49], [69, 66, 61, 49], [69, 66, 61, 47], [69, 66, 61, 47], [68, 65, 61, 49], [68, 65, 61, 49], [68, 65, 59, 49], [68, 65, 59, 49], [66, 61, 57, 42], [66, 61, 57, 42], [66, 61, 57, 42], [66, 61, 57, 42], [73, 66, 57, 54], [73, 66, 57, 54], [73, 66, 57, 54], [73, 66, 57, 54], [71, 66, 59, 56], [71, 66, 59, 56], [71, 65, 59, 56], [71, 65, 59, 56], [69, 66, 61, 57], [69, 66, 61, 57], [69, 66, 61, 59], [69, 66, 61, 59], [68, 65, 61, 61], [68, 65, 61, 61], [68, 65, 61, 61], [68, 65, 61, 61], [68, 65, 61, 49], [68, 65, 61, 49], [68, 65, 61, 49], [68, 65, 61, 49], [69, 66, 61, 54], [69, 66, 61, 54], [69, 66, 61, 52], [69, 66, 

 ### Baseline

In [2]:
def baseline():
    def build_dataset_baseline(chorales):
        min_len = len(min(chorales, key=len))
        print(min_len)

        def reshape(chorale):
            chorale = tf.reshape(chorale.to_tensor(), [-1, 1])
            return chorale
        def trunc(chorale):
            return chorale[:min_len+1]

        def target(chorale):
            X = chorale[:-1]
            Y = chorale[-1]
            return X, Y

        ragged_chorales = tf.ragged.constant(chorales)
        dataset = tf.data.Dataset.from_tensor_slices(ragged_chorales)
        dataset = dataset.map(reshape)
        dataset = dataset.map(trunc)
        dataset = dataset.map(target)
        dataset = dataset.batch(32)
    #     for d in dataset:
    #         tf.print(d)
    #         break
        return dataset


    train_dataset = build_dataset_baseline(train_np_list)
    val_dataset = build_dataset_baseline(val_np_list)
    test_dataset = build_dataset_baseline(test_np_list)
    model = keras.models.Sequential([
        keras.layers.SimpleRNN(1, input_shape=[None, 1])
    ])
    model.compile(loss='mse',
                 optimizer=keras.optimizers.Nadam(learning_rate=0.01),
                 metrics=[keras.metrics.mean_squared_error])
    model.fit(train_dataset, epochs=args.epochs, validation_data=val_dataset)
    
    model = keras.models.Sequential([
        keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),
        keras.layers.SimpleRNN(20, return_sequences=True),
        keras.layers.SimpleRNN(1),
    ])
    model.compile(loss='mse',
                 optimizer=keras.optimizers.Nadam(learning_rate=0.01),
                 metrics=[keras.metrics.mean_squared_error])
    model.fit(train_dataset, epochs=args.epochs, validation_data=val_dataset)
    
baseline()

100


2021-10-04 22:20:43.366175: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


128
128


2021-10-04 22:20:44.023725: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


### Sparse categorical entropy

In [3]:
import sys

def min_max_notes(all_dataset_chorales=()):
    min_note = None
    max_note = None
    def _min_max_notes():
        nonlocal min_note
        nonlocal max_note
        if min_note:
            return min_note, max_note
        min_note = sys.maxsize
        max_note = -sys.maxsize - 1
        for chorales in all_dataset_chorales:
            for ch in chorales:
                ch = np.array(ch)
                min_note = ch[ch > 0].min() if ch[ch > 0].min() < min_note else min_note
                max_note = ch.max() if ch.max() > max_note else max_note
            
        return min_note, max_note

    return _min_max_notes()

min_note, max_note = min_max_notes((train_np_list, val_np_list, test_np_list))
(min_note, max_note)

(36, 81)

In [9]:
def sparse_categorical_entropy():
    def build_dataset_sce(chorales):

        min_len = len(min(chorales, key=len))
        print(min_len)

        def reshape(chorale):
            chorale = tf.reshape(chorale.to_tensor(), [-1, 1])
            return chorale
        def trunc(chorale):
            return chorale[:min_len+1]
        def transform(chorale):
            return tf.where(chorale == 0, 0, chorale - min_note)
        def target(chorale):
            X = chorale[:-1]
            Y = chorale[-1]
            return X, Y

        ragged_chorales = tf.ragged.constant(chorales)
        dataset = tf.data.Dataset.from_tensor_slices(ragged_chorales)
        dataset = dataset.map(reshape)
        dataset = dataset.map(trunc)
        dataset = dataset.map(transform)
        dataset = dataset.map(target)
        dataset = dataset.batch(32)
        for d in dataset:
            tf.print(d[0].shape)
            tf.print(d[1].shape)
            break
        return dataset


    train_dataset = build_dataset_sce(train_np_list)
    val_dataset = build_dataset_sce(val_np_list)
    test_dataset = build_dataset_sce(test_np_list)
    
#     model = keras.models.Sequential([
#         keras.layers.SimpleRNN(1, input_shape=[None, 1])
#     ])
#     model.compile(loss='sparse_categorical_entropy',
#                  optimizer=keras.optimizers.Nadam(learning_rate=0.01),
#                  metrics=['accuracy'])
#     model.fit(train_dataset, epochs=args.epochs, validation_data=val_dataset)
    
    model = keras.models.Sequential([
        keras.layers.SimpleRNN(20, return_sequences=True, input_shape=[None, 1]),
        keras.layers.SimpleRNN(20, return_sequences=True),
        keras.layers.Dense(1, activation="softmax")
    ])
    model.compile(loss='sparse_categorical_crossentropy',
                 optimizer=keras.optimizers.Nadam(learning_rate=0.01),
                 metrics=['accuracy'])
    model.fit(train_dataset, epochs=50, validation_data=val_dataset)
    
sparse_categorical_entropy()

100
TensorShape([32, 100, 1])
TensorShape([32, 1])
128
TensorShape([32, 128, 1])
TensorShape([32, 1])
128
TensorShape([32, 128, 1])
TensorShape([32, 1])
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
