In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
from pathlib import Path

2023-12-14 12:04:19.057457: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-14 12:04:19.093779: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-14 12:04:19.093818: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-14 12:04:19.094559: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-14 12:04:19.099895: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-14 12:04:19.100942: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [2]:
DOWNLOAD_ROOT = "https://github.com/ageron/handson-ml2/raw/master/datasets/jsb_chorales/"
FILENAME = "jsb_chorales.tgz"
filepath = keras.utils.get_file(FILENAME, DOWNLOAD_ROOT + FILENAME, 
                     cache_subdir="datasets/jsb_chorales", 
                     extract=True)

In [3]:
jsb_chorales_dir = Path(filepath).parent

In [4]:
train_files = sorted([str(file) for file in jsb_chorales_dir.glob("train/chorale_*")])
test_files = sorted([str(file) for file in jsb_chorales_dir.glob("test/chorale_*")])
val_files = sorted([str(file) for file in jsb_chorales_dir.glob("valid/chorale_*")])

In [5]:
import pandas as pd

In [6]:
train_files

['/home/nick/.keras/datasets/jsb_chorales/train/chorale_000.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_001.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_002.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_003.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_004.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_005.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_006.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_007.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_008.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_009.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_010.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_011.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_012.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_013.csv',
 '/home/nick/.keras/datasets/jsb_chorales/train/chorale_014.csv',
 '/home/ni

In [7]:
def load_chorales(filepaths):
    return [pd.read_csv(filepath).values.tolist() for filepath in filepaths]

train_chorales = load_chorales(train_files)
test_chorales = load_chorales(test_files)
val_chorales = load_chorales(val_files)

In [8]:
notes = set()
for chorales in (train_chorales, test_chorales, val_chorales):
    for chorale in chorales:
        for chord in chorale:
            notes = notes | set(chord)
            
n_notes = len(notes)
min_note = min(notes - {0})
max_note = max(notes)

assert min_note == 36
assert max_note == 81

In [9]:
def create_target(batch):
    X = batch[:, :-1]
    Y = batch[:, 1:] # predict next note in each arpegio, at each step
    return X, Y

def preprocess(window):
    """
    Converts the values of the notes from 36-81 to 1-46
    """
    window = tf.where(window == 0, window, window - min_note + 1) # every note that is not 0 reduce by 36 + 1(for 0 note)
    return tf.reshape(window, [-1]) # convert to arpegio

def bach_dataset(chorales, batch_size=32, shuffle_buffer_size=None,
                 window_size=32, window_shift=16, cache=True):
    
    def batch_window(window):
        return window.batch(window_size + 1)

    def to_windows(chorale):
        dataset = tf.data.Dataset.from_tensor_slices(chorale)
        dataset = dataset.window(window_size + 1, window_shift, drop_remainder=True)
        return dataset.flat_map(batch_window)

    chorales = tf.ragged.constant(chorales, ragged_rank=1)
    dataset = tf.data.Dataset.from_tensor_slices(chorales)
    dataset = dataset.flat_map(to_windows).map(preprocess)
    if cache:
        dataset = dataset.cache()
    if shuffle_buffer_size:
        dataset = dataset.shuffle(shuffle_buffer_size)
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(create_target)
    return dataset.prefetch(1)

In [12]:
train_set = bach_dataset(train_chorales, shuffle_buffer_size=1000)
valid_set = bach_dataset(val_chorales)
test_set = bach_dataset(test_chorales)
for i in test_set.take(1):
    print(i)

(<tf.Tensor: shape=(32, 131), dtype=int32, numpy=
array([[30, 25, 22, ..., 37, 32, 25],
       [37, 30, 25, ..., 41, 32, 29],
       [37, 32, 25, ..., 34, 30, 25],
       ...,
       [39, 34, 31, ..., 41, 31, 26],
       [38, 33, 21, ..., 38, 29, 22],
       [41, 31, 26, ..., 38, 29, 21]], dtype=int32)>, <tf.Tensor: shape=(32, 131), dtype=int32, numpy=
array([[25, 22, 18, ..., 32, 25, 17],
       [30, 25, 10, ..., 32, 29, 25],
       [32, 25, 17, ..., 30, 25, 18],
       ...,
       [34, 31, 15, ..., 31, 26, 23],
       [33, 21, 18, ..., 29, 22, 22],
       [31, 26, 23, ..., 29, 21, 17]], dtype=int32)>)


2023-12-14 12:04:43.329359: W tensorflow/core/kernels/data/cache_dataset_ops.cc:858] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [11]:
n_dims = 5
model = keras.models.Sequential([
    keras.layers.Embedding(input_dim=n_notes, output_dim=n_dims, input_shape=[None]),
    keras.layers.Conv1D(32, kernel_size=2, padding="causal", activation="relu"),
    keras.layers.BatchNormalization(),
    keras.layers.Conv1D(48, kernel_size=2, padding="causal", activation="relu", dilation_rate=2),
    keras.layers.BatchNormalization(),
    keras.layers.Conv1D(64, kernel_size=2, padding="causal",activation="relu", dilation_rate=4),
    keras.layers.BatchNormalization(),
    keras.layers.Conv1D(96, kernel_size=2, padding="causal", activation="relu", dilation_rate=8),
    keras.layers.BatchNormalization(),
    keras.layers.LSTM(256, return_sequences=True),
    keras.layers.Dense(n_notes, activation="softmax")
])

In [None]:
optimizer = keras.optimizers.Nadam(learning_rate=1e-3)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
              metrics=["accuracy"])
model.fit(train_set, epochs=20, validation_data=valid_set)

Epoch 1/20


In [None]:
def generate_chorale_v2(model, seed_chords, length, temperature=1):
    arpegio = preprocess(tf.constant(seed_chords, dtype=tf.int64))
    arpegio = tf.reshape(arpegio, [1, -1])
    for chord in range(length):
        for note in range(4):
            next_note_probas = model.predict(arpegio)[0, -1:]
            rescaled_logits = tf.math.log(next_note_probas) / temperature
            next_note = tf.random.categorical(rescaled_logits, num_samples=1)
            arpegio = tf.concat([arpegio, next_note], axis=1)
    arpegio = tf.where(arpegio == 0, arpegio, arpegio + min_note - 1)
    return tf.reshape(arpegio, shape=[-1, 4])