In [4]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras

def dsprint(ds, num=1):
  # print(len(ds))
  for item in ds.take(num):
    print(item)

In [263]:
# Exercise 10
from IPython.display import Audio

def notes_to_frequencies(notes):
  # Frequency doubles when you go up one octave; there are 12 semi-tones
  # per octave; Note A on octave 4 is 440 Hz, and it is note number 69.
  return 2 ** ((np.array(notes) - 69) / 12) * 440

def frequencies_to_samples(frequencies, tempo, sample_rate):
  note_duration = 60 / tempo # the tempo is measured in beats per minutes
  # To reduce click sound at every beat, we round the frequencies to try to
  # get the samples close to zero at the end of each note.
  frequencies = np.round(note_duration * frequencies) / note_duration
  n_samples = int(note_duration * sample_rate)
  time = np.linspace(0, note_duration, n_samples)
  sine_waves = np.sin(2 * np.pi * frequencies.reshape(-1, 1) * time)
  # Removing all notes with frequencies ≤ 9 Hz (includes note 0 = silence)
  sine_waves *= (frequencies > 9.).reshape(-1, 1)
  return sine_waves.reshape(-1)

def chords_to_samples(chords, tempo, sample_rate):
  freqs = notes_to_frequencies(chords)
  freqs = np.r_[freqs, freqs[-1:]] # make last note a bit longer
  merged = np.mean([frequencies_to_samples(melody, tempo, sample_rate)
                    for melody in freqs.T], axis=0)
  n_fade_out_samples = sample_rate * 60 // tempo # fade out last note
  fade_out = np.linspace(1., 0., n_fade_out_samples)**2
  merged[-n_fade_out_samples:] *= fade_out
  return merged

def play_chords(chords, tempo=160, amplitude=0.1, sample_rate=44100, filepath=None):
  samples = amplitude * chords_to_samples(chords, tempo, sample_rate)
  if filepath:
    from scipy.io import wavfile
    samples = (2**15 * samples).astype(np.int16)
    wavfile.write(filepath, sample_rate, samples)
    return display(Audio(filepath))
  else:
    return display(Audio(samples, rate=sample_rate))

from pandas import read_csv

min_note = 36
max_note = 82
category_count = (max_note - min_note) + 2

def read_file_to_ds(filename, number_steps=128, batch_size=32):
  X = tf.convert_to_tensor(read_csv(str(filename)).values, tf.int8)
  X = tf.reshape(X, (len(X) * 4, 1))
  X = tf.where(X == 0, X, (X - min_note) + 2)
  Y = X[1:]
  Y = tf.convert_to_tensor(np.append(Y, [[0]], axis=0))
  X = [X[i:number_steps + i] for i in range(len(X) - number_steps)]
  Y = [Y[i:number_steps + i] for i in range(len(Y) - number_steps)]
  return tf.data.Dataset.from_tensor_slices((X, Y))

def load_set(dir, batch_size=32):
  filenames = [os.path.join(dir, filename) for filename in os.listdir(dir)]
  ds_list = [read_file_to_ds(file) for file in filenames]
  ds = tf.data.Dataset.from_tensor_slices(ds_list)
  ds = ds.interleave(
    lambda x: x,
    cycle_length=len(filenames),
    block_length=1,
  ).shuffle(len(filenames) * 10)
  return ds.batch(batch_size).prefetch(1)

In [264]:
train = load_set('../data/jsb_chorales/train/')
val = load_set('../data/jsb_chorales/valid/')
test = load_set('../data/jsb_chorales/test/')

dsprint(train)

(<tf.Tensor: shape=(32, 128, 1), dtype=int8, numpy=
array([[[16],
        [35],
        [28],
        ...,
        [35],
        [28],
        [31]],

       [[31],
        [28],
        [16],
        ...,
        [23],
        [19],
        [35]],

       [[35],
        [32],
        [28],
        ...,
        [20],
        [13],
        [40]],

       ...,

       [[35],
        [27],
        [23],
        ...,
        [29],
        [25],
        [13]],

       [[31],
        [28],
        [24],
        ...,
        [24],
        [19],
        [38]],

       [[37],
        [33],
        [30],
        ...,
        [32],
        [25],
        [17]]], dtype=int8)>, <tf.Tensor: shape=(32, 128, 1), dtype=int64, numpy=
array([[[35],
        [28],
        [19],
        ...,
        [28],
        [31],
        [16]],

       [[28],
        [16],
        [35],
        ...,
        [19],
        [35],
        [35]],

       [[32],
        [28],
        [40],
        ...,
        [13],
        

In [206]:
for item in train.take(1):
  play_chords(item[0])

In [265]:
def last_time_step_mse(Y_true, Y_pred):
  return keras.metrics.mean_squared_error(Y_true[:, -1], Y_pred[:, -1])

# dsprint(train)

model = keras.models.Sequential()
# model.add(keras.layers.InputLayer(input_shape=[None, 1]))
model.add(keras.layers.Embedding(input_dim=category_count, output_dim=5))
model.add(keras.layers.BatchNormalization())
for dilation in [1, 2, 4, 8, 16] * 2:
  model.add(keras.layers.Conv1D(filters=dilation * 10, kernel_size=2, padding='causal', activation='relu', dilation_rate=dilation))
  model.add(keras.layers.BatchNormalization())
model.add(keras.layers.LSTM(100, return_sequences=True))
model.add((keras.layers.Dense(category_count, activation='softmax')))

model.compile(
  loss=keras.losses.sparse_categorical_crossentropy,
  metrics=[keras.metrics.sparse_categorical_accuracy],
  optimizer=keras.optimizers.Nadam()
)

# print(tf.argmax(model.predict(batch), axis=1))

model.fit(train, validation_data=val, epochs=8, callbacks=[
  keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
  keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=1),
])                                                        

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
   8/5988 [..............................] - ETA: 10:02 - loss: 0.9352 - sparse_categorical_accuracy: 0.7948

KeyboardInterrupt: 

In [266]:
model.evaluate(test)



[1.2584731578826904, 0.74320387840271]

In [None]:
model.fit(train, validation_data=val, epochs=8, callbacks=[
  keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
  keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=1),
])

In [267]:
pred = tf.cast(tf.math.round(model.predict(test)), tf.int8)
print(pred)

    345/Unknown - 11s 31ms/step

KeyboardInterrupt: 

In [268]:
for item in test.take(1):
  preds = model.predict(item[0])
  print(item[1])
  print(tf.cast(tf.math.round(preds[0]), tf.int8))

tf.Tensor(
[[[14]
  [33]
  [29]
  ...
  [33]
  [28]
  [25]]

 [[35]
  [31]
  [28]
  ...
  [30]
  [23]
  [14]]

 [[26]
  [23]
  [38]
  ...
  [23]
  [38]
  [32]]

 ...

 [[35]
  [32]
  [23]
  ...
  [30]
  [25]
  [21]]

 [[37]
  [32]
  [28]
  ...
  [32]
  [24]
  [18]]

 [[27]
  [20]
  [35]
  ...
  [16]
  [37]
  [34]]], shape=(32, 128, 1), dtype=int64)
tf.Tensor(
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]], shape=(128, 48), dtype=int8)


In [328]:
def convert_preds(preds):
  conv = preds
  conv = preds + min_note - 2
  conv = tf.reshape(conv, (conv.shape[1] // 4, 4))
  return conv

def predict_note(prev_notes):
  return tf.cast(tf.math.round(model.predict(prev_notes, verbose=0)), tf.int8)

In [331]:
total_notes = 640
first_note_count = 4 * 64

first_note = None
for item in test.take(1):
  first_note = item[0][0][:first_note_count]

current = first_note
new_notes = first_note
for i in range(total_notes - first_note_count):
  current = tf.argmax(predict_note(tf.convert_to_tensor([new_notes]))[-1][-1])
  new_notes = np.append(new_notes, [[current]], axis=0)
  print(i + 1, end='\r')

song = convert_preds(tf.convert_to_tensor([new_notes]))
print(song)
  

tf.Tensor(
[[67 62 55 70]
 [67 62 55 70]
 [67 62 55 70]
 [67 62 55 70]
 [65 62 50 70]
 [65 62 50 70]
 [65 60 50 70]
 [65 60 50 67]
 [63 58 51 67]
 [63 58 51 69]
 [63 58 53 69]
 [63 58 53 70]
 [62 58 55 70]
 [62 58 55 72]
 [62 58 57 72]
 [62 58 57 74]
 [62 53 58 74]
 [62 53 58 74]
 [62 53 58 74]
 [62 53 58 72]
 [63 55 51 72]
 [63 55 51 72]
 [63 55 51 72]
 [63 55 51 72]
 [63 57 53 72]
 [62 57 53 72]
 [63 57 53 72]
 [63 57 53 70]
 [62 53 46 70]
 [62 53 46 70]
 [62 53 46 70]
 [62 53 46 70]
 [62 53 46 70]
 [62 53 46 70]
 [62 53 46 70]
 [62 53 46 34]
 [67 58 34 75]
 [67 58 46 75]
 [67 60 46 75]
 [67 60 46 75]
 [66 60 45 75]
 [66 60 45 75]
 [67 60 45 75]
 [67 60 45 75]
 [67 60 45 75]
 [67 60 45 75]
 [67 60 45 75]
 [67 60 45 75]
 [67 60 45 75]
 [67 34 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]
 [67 58 46 75]

In [332]:
play_chords(song, tempo=320)