In [3]:
import logging
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import argparse 
from pathlib import Path


logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
log = logging.getLogger()

%config Completer.use_jedi = False # make autocompletion works in jupyter

args = argparse.Namespace()
args.data_folder = './data/bach-next-note/'
args.train_folder = args.data_folder + 'train/'
args.val_folder = args.data_folder + 'valid/'
args.test_folder = args.data_folder + 'test/'
# args.train_fraction = 0.8
args.seed = 101
args.batch_size = 32
args.epochs = 7

paths = Path(args.train_folder).glob('**/chorale_*.csv')
train_np_list = [pd.read_csv(p).values.tolist() for p in paths]
# print(len(train_np_list[0]))

paths = Path(args.val_folder).glob('**/chorale_*.csv')
val_np_list = [pd.read_csv(p).values.tolist() for p in paths]

paths = Path(args.test_folder).glob('**/chorale_*.csv')
test_np_list = [pd.read_csv(p).values.tolist() for p in paths]

print(train_np_list[0])

[[66, 61, 57, 54], [66, 61, 57, 54], [68, 61, 59, 54], [68, 61, 59, 54], [69, 66, 61, 54], [69, 66, 61, 56], [69, 66, 61, 57], [69, 66, 61, 59], [68, 65, 61, 61], [68, 65, 61, 61], [68, 65, 59, 49], [68, 65, 59, 49], [66, 66, 57, 50], [66, 66, 57, 50], [66, 66, 57, 50], [66, 66, 57, 50], [66, 66, 59, 50], [66, 66, 59, 50], [68, 66, 59, 50], [68, 66, 59, 50], [69, 66, 61, 49], [69, 66, 61, 49], [69, 66, 61, 47], [69, 66, 61, 47], [68, 65, 61, 49], [68, 65, 61, 49], [68, 65, 59, 49], [68, 65, 59, 49], [66, 61, 57, 42], [66, 61, 57, 42], [66, 61, 57, 42], [66, 61, 57, 42], [73, 66, 57, 54], [73, 66, 57, 54], [73, 66, 57, 54], [73, 66, 57, 54], [71, 66, 59, 56], [71, 66, 59, 56], [71, 65, 59, 56], [71, 65, 59, 56], [69, 66, 61, 57], [69, 66, 61, 57], [69, 66, 61, 59], [69, 66, 61, 59], [68, 65, 61, 61], [68, 65, 61, 61], [68, 65, 61, 61], [68, 65, 61, 61], [68, 65, 61, 49], [68, 65, 61, 49], [68, 65, 61, 49], [68, 65, 61, 49], [69, 66, 61, 54], [69, 66, 61, 54], [69, 66, 61, 52], [69, 66, 

In [11]:
def build_dataset(chorales):
    min_len = len(min(chorales, key=len))
    print(min_len)
    
    def reshape(chorale):
        chorale = tf.reshape(chorale.to_tensor(), [-1])
        return chorale
    def trunc(chorale):
        return chorale[:min_len+1]
        
    def target(chorale):
        X = chorale[:-1]
        Y = chorale[-1]
        return X, Y
    
    ragged_chorales = tf.ragged.constant(chorales)
    dataset = tf.data.Dataset.from_tensor_slices(ragged_chorales)
    dataset = dataset.map(reshape)
    dataset = dataset.map(trunc)
    dataset = dataset.map(target)
#     dataset = dataset.batch(3)
    for d in dataset:
        tf.print(d)
    return dataset

        
# train_dataset = build_dataset(train_np_list)
# val_dataset = build_dataset(val_np_list)
# test_dataset = build_dataset(test_np_list)
build_dataset(train_np_list)

100
([66 61 57 ... 65 61 49], 68)
([64 59 55 ... 67 64 49], 69)
([69 65 62 ... 65 60 60], 67)
([62 57 53 ... 68 64 52], 72)
([67 62 59 ... 64 60 48], 69)
([74 71 67 ... 67 62 55], 74)
([74 69 65 ... 67 60 60], 76)
([69 64 61 ... 66 59 51], 69)
([69 64 61 ... 68 59 52], 76)
([71 67 64 ... 71 61 52], 73)
([63 58 55 ... 63 57 42], 69)
([71 66 62 ... 67 64 52], 71)
([69 64 60 ... 64 57 52], 71)
([73 69 64 ... 64 56 52], 74)
([71 64 56 ... 64 64 48], 67)
([67 62 58 ... 62 55 50], 69)
([71 66 62 ... 68 65 61], 73)
([67 62 58 ... 62 54 50], 69)
([69 66 62 ... 66 57 50], 74)
([64 60 57 ... 57 59 53], 62)
([62 57 53 ... 69 65 57], 74)
([66 62 59 ... 69 64 45], 73)
([67 62 59 ... 67 59 43], 74)
([65 60 57 ... 70 65 50], 74)
([71 66 62 ... 71 66 59], 74)
([70 65 62 ... 65 60 60], 67)
([67 62 59 ... 67 57 45], 73)
([74 65 58 ... 69 65 53], 72)
([69 64 60 ... 67 55 55], 71)
([67 62 58 ... 62 55 50], 69)
([72 67 64 ... 67 59 55], 74)
([66 61 57 ... 68 59 56], 76)
([67 62 58 ... 62 55 46], 62)
([65 6

<MapDataset shapes: ((None,), ()), types: (tf.int32, tf.int32)>

In [25]:
model = keras.models.Sequential([
    keras.layers.SimpleRNN(1, input_shape=[None, 1])
])
model.compile(loss='mse',
             optimizer=keras.optimizers.Nadam(learning_rate=0.01),
             metrics=[keras.metrics.mean_squared_error])
model.fit(train_dataset, epochs=1, validation_data=val_dataset)

ValueError: in user code:

    /Users/mkhokhlush/github/ml-experiments/.venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:855 train_function  *
        return step_function(self, iterator)
    /Users/mkhokhlush/github/ml-experiments/.venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:845 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /Users/mkhokhlush/github/ml-experiments/.venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1285 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /Users/mkhokhlush/github/ml-experiments/.venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2833 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /Users/mkhokhlush/github/ml-experiments/.venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3608 _call_for_each_replica
        return fn(*args, **kwargs)
    /Users/mkhokhlush/github/ml-experiments/.venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:838 run_step  **
        outputs = model.train_step(data)
    /Users/mkhokhlush/github/ml-experiments/.venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 train_step
        y_pred = self(x, training=True)
    /Users/mkhokhlush/github/ml-experiments/.venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1013 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /Users/mkhokhlush/github/ml-experiments/.venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:215 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer sequential_1 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 1)
