In [8]:
import tensorflow as tf
import os
from os.path import isfile, join
import numpy as np
import shutil
from tensorflow import keras
from pathlib import Path
from IPython.display import display, Audio
import subprocess
from tqdm import tqdm

In [2]:
DATASET_ROOT = os.path.join( "dataset/16000_pcm_speeches")

my_audios_folder = 'audio'
my_noises_folder = 'noise'

DATASET_AUDIOS_PATH = os.path.join(DATASET_ROOT, my_audios_folder)
DATASET_NOISES_PATH = os.path.join(DATASET_ROOT, my_noises_folder)


# Percentage of samples to use for validation
VALID_SPLIT = 0.1

# Seed to use when shuffling the dataset and the noise
SHUFFLE_SEED = 43

# The sampling rate to use.
# This is the one used in all of the audio samples.
# We will resample all of the noise to this sampling rate.
# This will also be the output size of the audio wave samples
# (since all samples are of 1 second long)
SAMPLING_RATE = 16000

# The factor to multiply the noise with according to:
#   noisy_sample = sample + noise * prop * scale
#      where prop = sample_amplitude / noise_amplitude
SCALE = 0.5

BATCH_SIZE = 128
EPOCHS = 30

In [4]:
# If folder `audio`, does not exist, create it, otherwise do nothing
if os.path.exists(DATASET_AUDIOS_PATH) is False:
    os.makedirs(DATASET_AUDIOS_PATH)

# If folder `noise`, does not exist, create it, otherwise do nothing
# MY OWN NOTE : LET's SEE tf.io.gfile
if tf.io.gfile.exists(DATASET_NOISES_PATH) is False:
    tf.io.gfile.makedirs(DATASET_NOISES_PATH)

for folder in os.listdir(DATASET_ROOT):
    if os.path.isdir(os.path.join(DATASET_ROOT, folder)):
        if folder in [my_audios_folder, my_noises_folder]:
            # If folder is `audio` or `noise`, do nothing
            continue
        elif folder in ["other", "_background_noise_"]:
            # If folder is one of the folders that contains noise samples,
            # move it to the `noise` folder
            shutil.move(
                os.path.join(DATASET_ROOT, folder),
                os.path.join(DATASET_NOISES_PATH, folder),
            )
        else:
            # Otherwise, it should be a speaker folder, then move it to
            # `audio` folder
            shutil.move(
                os.path.join(DATASET_ROOT, folder),
                os.path.join(DATASET_AUDIOS_PATH, folder),
            )

In [6]:
noise_paths = []
for subdir in tf.io.gfile.listdir(DATASET_NOISES_PATH):
    subdir_path = Path(DATASET_NOISES_PATH) / subdir
    if os.path.isdir(subdir_path):
        noise_paths += [
            os.path.join(subdir_path, filepath)
            for filepath in os.listdir(subdir_path)
            if filepath.endswith(".wav")
        ]

print(
    "Found {} files belonging to {} directories".format(
        len(noise_paths), len(os.listdir(DATASET_NOISES_PATH))
    )
)

Found 6 files belonging to 2 directories


In [10]:
DATASET_NOISES_PATH

'dataset/16000_pcm_speeches\\noise'

In [12]:
from pydub import AudioSegment as am
for paths in noise_paths:
    resample = am.from_file(paths, format="wav")
    resample = resample.set_frame_rate(16000)
    resample.export(paths, format="wav")

In [13]:
command = (
    "for dir in `ls -1 " + DATASET_NOISES_PATH + "`; do "
    "for file in `ls -1 " + DATASET_NOISES_PATH + "/$dir/*.wav`; do "
    "sample_rate=`ffprobe -hide_banner -loglevel panic -show_streams "
    "$file | grep sample_rate | cut -f2 -d=`; "
    "if [ $sample_rate -ne 16000 ]; then "
    "ffmpeg -hide_banner -loglevel panic -y "
    "-i $file -ar 16000 temp.wav; "
    "mv temp.wav $file; "
    "fi; done; done"
)

os.system(command)

# Split noise into chunks of 16000 each
def load_noise_sample(path):
    sample, sampling_rate = tf.audio.decode_wav(
        tf.io.read_file(path), desired_channels=1
    )
    print(sampling_rate)
    if sampling_rate == SAMPLING_RATE:
        # Number of slices of 16000 each that can be generated from the noise sample
        slices = int(sample.shape[0] / SAMPLING_RATE)
        sample = tf.split(sample[: slices * SAMPLING_RATE], slices)
        return sample
    else:
        print("Sampling rate for {} is incorrect. Ignoring it".format(path))
        return None


noises = []
for path in tqdm(noise_paths):
    sample = load_noise_sample(path)
    if sample:
        noises.extend(sample)
noises = tf.stack(noises)

print(
    "{} noise files were split into {} noise samples where each is {} sec. long".format(
        len(noise_paths), noises.shape[0], noises.shape[1] // SAMPLING_RATE
    )
)

  0%|          | 0/6 [00:00<?, ?it/s]

 67%|██████▋   | 4/6 [00:00<00:00, 27.33it/s]

tf.Tensor(16000, shape=(), dtype=int32)
tf.Tensor(16000, shape=(), dtype=int32)
tf.Tensor(16000, shape=(), dtype=int32)
tf.Tensor(16000, shape=(), dtype=int32)
tf.Tensor(16000, shape=(), dtype=int32)


100%|██████████| 6/6 [00:00<00:00, 28.92it/s]

tf.Tensor(16000, shape=(), dtype=int32)
6 noise files were split into 354 noise samples where each is 1 sec. long





In [14]:
def paths_and_labels_to_dataset(audio_paths, labels):
    """Constructs a dataset of audios and labels."""
    # giving an array so we can create tf.data.Dataset.from_tensor_slice(list)
    path_ds = tf.data.Dataset.from_tensor_slices(audio_paths)
    audio_ds = path_ds.map(lambda x : path_to_audio(x))
    label_ds = tf.data.Dataset.from_tensor_slices(labels)
    return tf.data.Dataset.zip((audio_ds, label_ds))


def path_to_audio(path):
    """Reads and decodes an audio file."""
      # tf.audio.decode_wav(
#     contents, desired_channels=-1, desired_samples=-1, name=None
# )
    audio = tf.io.read_file(path)
    audio, _ = tf.audio.decode_wav(audio, 1, SAMPLING_RATE)
    return audio


#TODO. Need to print each part for one sample
def add_noise(audio, noises=None, scale=0.5):
    if noises is not None:
        # Create a random tensor of the same size as audio ranging from
        # 0 to the number of noise stream samples that we have.
        tf_rnd = tf.random.uniform(
            (tf.shape(audio)[0],), 0, noises.shape[0], dtype=tf.int32
        )
        # https://www.geeksforgeeks.org/python-tensorflow-gather/ این ایندکس هایی که میگمو از این ارایه به من بده
        noise = tf.gather(noises, tf_rnd, axis=0)

        # Get the amplitude proportion between the audio and the noise
        prop = tf.math.reduce_max(audio, axis=1) / tf.math.reduce_max(noise, axis=1)
        prop = tf.repeat(tf.expand_dims(prop, axis=1), tf.shape(audio)[1], axis=1)

        # Adding the rescaled noise to audio
        audio = audio + noise * prop * scale

    return audio

# TODO. shape of output.   each line output
def audio_to_fft(audio):
    # Since tf.signal.fft applies FFT on the innermost dimension,
    # we need to squeeze the dimensions and then expand them again
    # after FFT
    audio = tf.squeeze(audio, axis=-1)
    # https://blog.faradars.org/complex-numbers/   برای محاسبه فوریه باید ورودی بصورت اعداد مختلط باشن
    fft = tf.signal.fft(
        tf.cast(tf.complex(real=audio, imag=tf.zeros_like(audio)), tf.complex64)
    )
    fft = tf.expand_dims(fft, axis=-1)

    # Return the absolute value of the first half of the FFT
    # which represents the positive frequencies
    return tf.math.abs(fft[:, : (audio.shape[1] // 2), :])


In [16]:
audio = tf.io.read_file('dataset/16000_pcm_speeches/audio/Benjamin_Netanyau/0.wav')
audio, _ = tf.audio.decode_wav(audio, 1, SAMPLING_RATE)

# what is this fft doing ? 
# https://stackoverflow.com/questions/31662854/why-should-i-discard-half-of-what-a-fft-returns
# https://stackoverflow.com/questions/6740545/understanding-fft-output


# https://www.google.com/search?q=what+is+first+half+of+FFT+result+site:stackoverflow.com&sxsrf=ALeKk03HyI7BG_P0_4dnCO3H5v5namISfg:1612258300719&sa=X&ved=2ahUKEwjdqLOa8sruAhVkoFwKHSQFA1cQrQIoBHoECAMQBQ&biw=1345&bih=667
# https://www.google.com/search?q=what+is+first+half+of+FFT+result+site:dsp.stackexchange.com&sxsrf=ALeKk03HyI7BG_P0_4dnCO3H5v5namISfg:1612258300719&sa=X&ved=2ahUKEwjdqLOa8sruAhVkoFwKHSQFA1cQrQIoBHoECBYQBQ

def audio_to_fft_example(audio):

    audio = tf.squeeze(audio, axis=-1)
    fft = tf.signal.fft(
        tf.cast(tf.complex(real=audio, imag=tf.zeros_like(audio)), tf.complex64)
    )
    print(fft[2])
    print(fft[15998])

    # print(fft)
    # print(audio.shape[0] // 2)
    
    fft = tf.expand_dims(fft, axis=-1)

    return tf.math.abs(fft[:, : (audio.shape[0] // 2)])



# test_audio_to_fft_example = audio_to_fft_example(audio)



# How does this add noise works?
def add_noise_example(audio, noises=None, scale=0.5):
    if noises is not None:
        # Create a random tensor of the same size as audio ranging from
        # 0 to the number of noise stream samples that we have.

        tf_rnd = tf.random.uniform(
            (tf.shape(audio)[0],), 0, noises.shape[0], dtype=tf.int32
        )
        # https://www.geeksforgeeks.org/python-tensorflow-gather/ این ایندکس هایی که میگمو از این ارایه به من بده
        noise = tf.gather(noises, tf_rnd, axis=0)

        # Get the amplitude proportion between the audio and the noise
        prop = tf.math.reduce_max(audio, axis=1) / tf.math.reduce_max(noise, axis=1)
        prop = tf.repeat(tf.expand_dims(prop, axis=1), tf.shape(audio)[1], axis=1)

        # Adding the rescaled noise to audio
        audio = audio + noise * prop * scale

    return audio

In [17]:
print(audio.shape) # (16000, 1)
print(noises.shape) # (354, 16000, 1)

'''
tf.random.uniform(
    shape, minval=0, maxval=None, dtype=tf.dtypes.float32, seed=None, name=None
)
'''
# Note it will create random 16000 numbers between 0 and 354   max(random_values_created) >> 353
random_values_created = tf.random.uniform((tf.shape(audio)[0],), 0, noises.shape[0], dtype=tf.int32)  # 16000 , 1


'''
tf.gather(
    params, indices, validate_indices=None, axis=None, batch_dims=0, name=None
)
axis = 0 means row (if array is 1D, means index ), 1 means column
'''
# https://www.geeksforgeeks.org/python-tensorflow-gather/ این ایندکس هایی که میگمو از این ارایه به من بده
noise = tf.gather(noises, random_values_created, axis=0) # 16000, 16000, 1


# tf.math.reduce_max([[1],[2],[3],[4]], axis=1)  # <tf.Tensor: shape=(4,), dtype=int32, numpy=array([1, 2, 3, 4], dtype=int32)>
# tf.math.reduce_max([[1],[2],[3],[4]], axis=0)  # <tf.Tensor: shape=(1,), dtype=int32, numpy=array([4], dtype=int32)>

"""
 tf.math.reduce_max(audio, axis=1).shape, tf.math.reduce_max(noise, axis=1).shape

 TensorShape([16000])), (TensorShape([16000, 1])
"""
# این خط نسبت هر دامنه از صوت به تمام دامنه های نویز
tf.math.reduce_max(audio, axis=1) / tf.math.reduce_max(noise, axis=1) # 16000, 16000

(16000, 1)
(354, 16000, 1)


<tf.Tensor: shape=(16000, 16000), dtype=float32, numpy=
array([[ 0.0000000e+00,  0.0000000e+00, -5.8986610e-05, ...,
         2.3594644e-04,  7.1373796e-03,  7.0783934e-03],
       [ 0.0000000e+00,  0.0000000e+00, -1.8385732e-04, ...,
         7.3542929e-04,  2.2246737e-02,  2.2062879e-02],
       [ 0.0000000e+00,  0.0000000e+00, -1.9723865e-04, ...,
         7.8895461e-04,  2.3865879e-02,  2.3668639e-02],
       ...,
       [ 0.0000000e+00,  0.0000000e+00, -1.2755102e-03, ...,
         5.1020407e-03,  1.5433674e-01,  1.5306123e-01],
       [ 0.0000000e+00,  0.0000000e+00, -7.3195726e-05, ...,
         2.9278290e-04,  8.8566830e-03,  8.7834867e-03],
       [ 0.0000000e+00,  0.0000000e+00, -3.5556819e-05, ...,
         1.4222728e-04,  4.3023750e-03,  4.2668185e-03]], dtype=float32)>

In [18]:
class_names = tf.io.gfile.listdir(DATASET_AUDIOS_PATH)
print("Our class names: {}".format(class_names,))

audio_paths = []
labels = []

for label, name in enumerate(class_names):
    print("Processing speaker {}".format(name,))
    dir_path = Path(DATASET_AUDIOS_PATH) / name
    speaker_sample_paths = [
        os.path.join(dir_path, filepath)
        for filepath in os.listdir(dir_path)
        if filepath.endswith(".wav")
    ]
    audio_paths += speaker_sample_paths
    labels += [label] * len(speaker_sample_paths)

print(labels)

print(
    "Found {} files belonging to {} classes.".format(len(audio_paths), len(class_names))
)

# Shuffle

'''
why does it work? 
arr = [1,2,3,4,5,6,7,8,9]
arr2 = [11,22,33,44,55,66,77,88,99]

rng = np.random.RandomState(SHUFFLE_SEED)
rng.shuffle(arr)
rng = np.random.RandomState(SHUFFLE_SEED)
rng.shuffle(arr2)

arr, arr2
>> ([4, 9, 7, 8, 3, 6, 2, 1, 5], [44, 99, 77, 88, 33, 66, 22, 11, 55])
'''
rng = np.random.RandomState(SHUFFLE_SEED)
rng.shuffle(audio_paths)
rng = np.random.RandomState(SHUFFLE_SEED)
rng.shuffle(labels)


# Split into training and validation
num_val_samples = int(VALID_SPLIT * len(audio_paths))
print("Using {} files for training.".format(len(audio_paths) - num_val_samples))
train_audio_paths = audio_paths[:-num_val_samples]
train_labels = labels[:-num_val_samples]

print("Using {} files for validation.".format(num_val_samples))
valid_audio_paths = audio_paths[-num_val_samples:]
valid_labels = labels[-num_val_samples:]

'''
This dataset fills a buffer with buffer_size elements, then randomly samples elements from this buffer, 
replacing the selected elements with new elements. For perfect shuffling, a buffer size greater than or equal to the full size of the dataset is required.

For instance, if your dataset contains 10,000 elements but buffer_size is set to 1,000, 
then shuffle will initially select a random element from only the first 1,000 elements in the buffer.
 Once an element is selected, its space in the buffer is replaced by the next (i.e. 1,001-st) element, maintaining the 1,000 element buffer.

reshuffle_each_iteration controls whether the shuffle order should be different for each epoch. In TF 1.X, 
the idiomatic way to create epochs was through the repeat transformation:
'''

# Create 2 datasets, one for training and the other for validation
train_ds = paths_and_labels_to_dataset(train_audio_paths, train_labels)
train_ds = train_ds.shuffle(buffer_size=BATCH_SIZE * 8, seed=SHUFFLE_SEED).batch(
    BATCH_SIZE
)

valid_ds = paths_and_labels_to_dataset(valid_audio_paths, valid_labels)
valid_ds = valid_ds.shuffle(buffer_size=32 * 8, seed=SHUFFLE_SEED).batch(32)

Our class names: ['Benjamin_Netanyau', 'Jens_Stoltenberg', 'Julia_Gillard', 'Magaret_Tarcher', 'Nelson_Mandela']
Processing speaker Benjamin_Netanyau
Processing speaker Jens_Stoltenberg
Processing speaker Julia_Gillard
Processing speaker Magaret_Tarcher
Processing speaker Nelson_Mandela
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

In [19]:
# Add noise to the training set
train_ds = train_ds.map(
    lambda x, y: (add_noise(x, noises, scale=SCALE), y),
    num_parallel_calls=tf.data.experimental.AUTOTUNE,
)

# Transform audio wave to the frequency domain using `audio_to_fft`
train_ds = train_ds.map(
    lambda x, y: (audio_to_fft(x), y), num_parallel_calls=tf.data.experimental.AUTOTUNE
)
'''
Creates a Dataset that prefetches elements from this dataset.

Most dataset input pipelines should end with a call to prefetch. This allows later elements to be prepared while the current
 element is being processed. This often improves latency and throughput, at the cost of using additional memory to store prefetched elements.

Note: Like other Dataset methods, prefetch operates on the elements of the input dataset. It has no concept of examples vs.
 batches. examples.prefetch(2) will prefetch two elements (2 examples), while examples.batch(20).prefetch(2) will prefetch 2
  elements (2 batches, of 20 examples each).
'''


'''
The tf.data API provides a software pipelining mechanism through the tf.data.Dataset.prefetch transformation, 
which can be used to decouple the time when data is produced from the time when data is consumed. In particular,
 the transformation uses a background thread and an internal buffer to prefetch elements from the input dataset ahead of
  the time they are requested. The number of elements to prefetch should be equal to (or possibly greater than) the number of
   batches consumed by a single training step. You could either manually tune this value, or set it 
   to tf.data.experimental.AUTOTUNE which will prompt the tf.data runtime to tune the value dynamically at runtime.
'''

train_ds = train_ds.prefetch(tf.data.experimental.AUTOTUNE)

valid_ds = valid_ds.map(
    lambda x, y: (audio_to_fft(x), y), num_parallel_calls=tf.data.experimental.AUTOTUNE
)
valid_ds = valid_ds.prefetch(tf.data.experimental.AUTOTUNE)

In [20]:
# TODO : write this in model subclassing model.

# TODO : 1D Convolutions
# TODO : نقاشی این 
def residual_block(x, filters, conv_num=3, activation='relu'):
  #shortcut
  s = keras.layers.Conv1D(filters, 1, padding='same', )(x)
  for i in range(conv_num -1):
    x = keras.layers.Conv1D(filters, 3, padding="same")(x)
    x = keras.layers.Activation(activation)(x)
  x = keras.layers.Conv1D(filters, 3, padding='same')(x)
  x = keras.layers.Add()([x, s])
  x = keras.layers.Activation(activation)(x)
  return keras.layers.MaxPool1D(pool_size=2, strides=2)(x)


def build_model(input_shape, num_classes):
  inputs = keras.layers.Input(shape=input_shape, name='audio_input')
  x = residual_block(inputs, 16, 2)
  x = residual_block(x, 32, 2)
  x = residual_block(x, 64, 3)
  x = residual_block(x, 128, 3)
  x = residual_block(x, 128, 3)

  x = keras.layers.AveragePooling1D(pool_size=3, strides=3)(x)
  # Flatten. I removed Flatten Layer. And saw next line error : 

  '''
  assertion failed: [Condition x == y did not hold element-wise:]
   [x (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [128 1]
    [y (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [128 83]
	 [[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert
    (defined at <ipython-input-39-90680b89c031>:5) ]] [Op:__inference_train_function_13032]
  
  '''
  x = keras.layers.Flatten()(x)
  x = keras.layers.Dense(256, activation="relu")(x)
  x = keras.layers.Dense(128, activation="relu")(x)
  outputs = keras.layers.Dense(num_classes, activation="softmax", name="output")(x)

  return keras.models.Model(inputs=inputs, outputs=outputs)


model = build_model((SAMPLING_RATE // 2, 1), len(class_names))

model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 audio_input (InputLayer)       [(None, 8000, 1)]    0           []                               
                                                                                                  
 conv1d_1 (Conv1D)              (None, 8000, 16)     64          ['audio_input[0][0]']            
                                                                                                  
 activation (Activation)        (None, 8000, 16)     0           ['conv1d_1[0][0]']               
                                                                                                  
 conv1d_2 (Conv1D)              (None, 8000, 16)     784         ['activation[0][0]']             
                                                                                              

In [23]:
dot_img_file = './train/model_1.png'
tf.keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=False)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [24]:
# TODO : USE Categorical_crossentropy. for this type of loss 
'''
source : https://stats.stackexchange.com/questions/326065/cross-entropy-vs-sparse-cross-entropy-when-to-use-one-over-the-other
Both, categorical cross entropy and sparse categorical cross entropy have the same loss function which you have mentioned above.
 The only difference is the format in which you mention Yi (i,e true labels).

If your Yi's are one-hot encoded, use categorical_crossentropy. Examples (for a 3-class classification): [1,0,0] , [0,1,0], [0,0,1]

But if your Yi's are integers, use sparse_categorical_crossentropy. Examples for above 3-class classification problem: [1] , [2], [3]

The usage entirely depends on how you load your dataset. One advantage of using sparse categorical cross entropy is it saves
 time in memory as well as computation because it simply uses a single integer for a class, rather than a whole vector.

 https://keras.io/api/losses/probabilistic_losses/#sparse_categorical_crossentropy-function
 https://keras.io/api/losses/probabilistic_losses/#categorical_crossentropy-function
'''

y_true = [[0, 1, 0], [0, 0, 1]]
y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
cce = keras.losses.CategoricalCrossentropy()
cce_output = cce(y_true, y_pred).numpy()


y_true = [1, 2]
y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] 
scce = tf.keras.losses.SparseCategoricalCrossentropy()
scce_output = scce(y_true, y_pred).numpy()

cce_output, scce_output

(1.1769392, 1.1769392)

In [25]:
#TODO Metric and GradientTape : https://keras.io/api/metrics/
model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# CallBacks 
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="./logs")  # https://keras.io/api/callbacks/tensorboard/
model_save_filename = "model.h5"
earlystopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
mdlcheckpoint_cb = keras.callbacks.ModelCheckpoint( model_save_filename, monitor='val_accuracy', save_best_only=True )

In [27]:
train_ds

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 8000, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))>

In [26]:
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=valid_ds,
    callbacks=[earlystopping_cb, mdlcheckpoint_cb, tensorboard_callback],
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
