To connect VSCode to a Colab jupyter notebook, open a notebook in Colab and do execute the following code:
# Install jupyterlab and ngrok
!pip install jupyterlab pyngrok -q

# Run jupyterlab in background
!nohup jupyter lab --ip=0.0.0.0 &

# Make jupyterlab accessible via ngrok
from pyngrok import ngrok
print(ngrok.connect(8888))

After that, the output shoud be something similar to: NgrokTunnel: "http://<uri_for_remote>". Copy the url ("http://<uri_for_remote>")
and in the command Palette of VSCode type ">jupyter:Specify local or remote Jupyter server for connections".
Select "Existing, Specify the URI of an existing server" and enter the previously copied uri. After that reload VSCode (as suggested)
and open a notebook. If you need to change runtime (cpu, tpu, gpu), repeat all the procedure.


In [1]:
!nvidia-smi

Thu Nov 26 16:52:46 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.38       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P8     9W /  70W |      0MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
import tensorflow as tf
import numpy as np
import os

class WindowGenerator:
    def __init__(self, input_width, label_options, mean, std):
        self.input_width = input_width
        self.label_options = label_options
        self.mean = tf.reshape(tf.convert_to_tensor(mean), [1, 1, 2])
        #print(self.mean)
        self.std = tf.reshape(tf.convert_to_tensor(std), [1, 1, 2])

    def split_window(self, features):
        input_indeces = np.arange(self.input_width)
        inputs = features[:, :-1, :]
        #print(inputs)
        #print(features)

        if self.label_options < 2:
            labels = features[:, -1, self.label_options]
            labels = tf.expand_dims(labels, -1)
            num_labels = 1
        else:
            labels = features[:, -1, :]
            num_labels = 2

        inputs.set_shape([None, self.input_width, 2])
        labels.set_shape([None, num_labels])

        return inputs, labels

    def normalize(self, features):
        # Adding a small number to the std so that if it's 0 it the program won't crash
        features = (features - self.mean) / (self.std + 1.e-6)

        return features

    def preprocess(self, features):
        inputs, labels = self.split_window(features)
        inputs = self.normalize(inputs)

        return inputs, labels

    def make_dataset(self, data, train):
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
                data=data,
                targets=None,
                sequence_length=self.input_width+1,
                sequence_stride=1,
                batch_size=32) 
        ds = ds.map(self.preprocess)
        ds = ds.cache()
        if train is True:
            ds = ds.shuffle(100, reshuffle_each_iteration=True)

        return ds


# To return the separated value, store the ouptput of the evalueate methods, in 2 variables,
# one for the loss and one for the error and this will give 2 separated value for the error:
# loss, error = model.evaluate(...) and print it manually
class thMAE(tf.keras.metrics.Metric):
    def __init__(self, name='thMAE', **kwargs):
        super(thMAE, self).__init__(name=name, **kwargs)
        # We need 2 at least 2 sensors, one to store the MAE computed so far (on the batch)
        # and another variable to store the nmber of batches computed so far, so we can average
        # the final error across the total number of samples processed.
        # I can also write shape=[2] instead of hape=(2, )
        self.total = self.add_weight(name='total', initializer='zeros', shape=(2, ))
        # Shape not needed becasue it's just a scalar value
        self.count = self.add_weight(name='count', initializer='zeros')

    def reset_states(self):
        self.count.assign(tf.zeros_like(self.count))
        self.total.assign(tf.zeros_like(self.total))

        return

    # Set sample_weight=None if I don't need it
    def update_state(self, y_true, y_pred, sample_weight=None):
        error = tf.abs(y_pred - y_true)
        error = tf.reduce_mean(error, axis=0)
        self.total.assign_add(error)
        self.count.assign_add(1)

        return

    def result(self):
        # Computes a safe divide which returns 0 if the y is zero.
        result = tf.math.divide_no_nan(self.total, self.count)

        return result


class SignalGenerator:
    def __init__(self, labels, sampling_rate, frame_length, frame_step,
        num_mel_bins=None, lower_frequency=None, upper_frequency=None,
        num_coefficients=None, mfcc=False):
        

        self.labels = labels
        self.sampling_rate = sampling_rate
        self.frame_length = frame_length
        self.frame_step = frame_step
        self.num_mel_bins = num_mel_bins
        self.lower_frequency = lower_frequency
        self.upper_frequency = upper_frequency
        self.num_coefficients = num_coefficients
        num_spectorgram_bin = (frame_length)//2 + 1

        if mfcc is True:
            self.linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
                    self.num_mel_bins, num_spectorgram_bin, self.sampling_rate,
                    self.lower_frequency, self.upper_frequency)
            self.preprocess = self.preprocess_with_mfcc

        else:
            self.preprocess = self.preprocess_with_stft



    def read(self, file_path):
        parts = tf.strings.split(file_path, os.path.sep)
        label = parts[-2]
        label_id = tf.argmax(label == self.labels)
        audio_binary = tf.io.read_file(file_path)
        audio, _ = tf.audio.decode_wav(audio_binary)
        audio = tf.squeeze(audio, axis=1)

        return audio, label_id

    def pad(self, audio):
        zero_padding = tf.zeros([self.sampling_rate] - tf.shape(audio), dtype=tf.float32)
        audio = tf.concat([audio, zero_padding], 0)
        audio.set_shape([self.sampling_rate])

        return audio

    def get_spectrogram(self, audio):
        stft = tf.signal.stft(audio, frame_length=self.frame_length,
                    frame_step=self.frame_step, fft_length=self.frame_length)
        spectrogram = tf.abs(stft)

        return spectrogram

    def get_mfccs(self, spectrogram):
        mel_spectrogram = tf.tensordot(spectrogram, self.linear_to_mel_weight_matrix, 1)
        log_mel_spectrogram = tf.math.log(mel_spectrogram + 1e-6)
        mfccs = tf.signal.mfccs_from_log_mel_spectrograms(log_mel_spectrogram)
        mfccs = mfccs[..., :self.num_coefficients]

        return mfccs

    def preprocess_with_stft(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        spectrogram = tf.expand_dims(spectrogram, -1)
        spectrogram = tf.image.resize(spectrogram, [32, 32])

        return spectrogram, label
    
    def preprocess_with_mfcc(self, file_path):
        audio, label = self.read(file_path)
        audio = self.pad(audio)
        spectrogram = self.get_spectrogram(audio)
        mfccs = self.get_mfccs(spectrogram)
        mfccs = tf.expand_dims(spectrogram, -1)

        return mfccs, label

    def make_dataset(self, files, train):
        ds = tf.data.Dataset.from_tensor_slices(files)
        ds = ds.map(self.preprocess, num_parallel_calls=4)
        ds = ds.batch(32)
        ds = ds.cache()

        if train is True:
            ds = ds.shuffle(100, reshuffle_each_iteration=True)

        return ds


In [7]:
import argparse
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
#from utils import *
import sys


#parser = argparse.ArgumentParser()
#parser.add_argument('--model', type=str, default='CNN')
#parser.add_argument('--mfcc', action='store_true')
#args = parser.parse_args()

MFCC = False
MODEL = "MLP"
print(MFCC)

seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

zip_path = tf.keras.utils.get_file(
         origin='http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip',
              fname='mini_speech_commands.zip',
     extract=True,
     cache_dir='.',
     cache_subdir='data',
 )

data_dir = os.path.join('.', 'data', 'mini_speech_commands')
filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
# Shuffle to have a normal distribution
filenames = tf.random.shuffle(filenames)
n = len(filenames)

train_file = filenames[:int(n*0.8)]
val_files = filenames[int(n*0.8):int(n*0.9)]
test_files = filenames[int(n*0.9):]

LABELS = np.array(tf.io.gfile.listdir(str(data_dir)))
LABELS = LABELS[LABELS != "README.md"]

# check the frequency
# frequency = 16000
# frame_length = 16
# frame_step = 8

STFT_OPTIONS = {'frame_length': 16, 'frame_step': 8, 'mfcc':False}
MFCC_OPTIONS =  {'frame_length': 40, 'frame_step': 20, 'mfcc':True,
                    'lower_frequency':20, 'upper_frequency':4000,
                    'num_mel_bins':40, 'num_coefficients':10}


if MFCC == True:
    options = MFCC_OPTIONS
    strides = [2,1]
else:
    options = STFT_OPTIONS
    strides = [2,2]


signal = SignalGenerator(LABELS, 16000, **options)
train_ds = signal.make_dataset(train_file, True)
val_ds = signal.make_dataset(val_files, False)
test_ds = signal.make_dataset(test_files, False)


# print(train_file)

# ds = tf.data.Dataset.from_tensor_slices(train_file)
# for element in ds:
#     parts = tf.strings.split(element, os.path.sep)
#     print(parts)
#     label = parts[-2]
#     print(label)
#     label_id = tf.argmax(label == LABELS)
#     print(label_id)
#     audio_binary = tf.io.read_file(element)
#     #print(audio_binary)
#     audio, _ = tf.audio.decode_wav(audio_binary)
#     print(audio)
#     audio = tf.squeeze(audio, axis=1)
#     print(audio)

#     zero_padding = tf.zeros([16000] - tf.shape(audio), dtype=tf.float32)
#     print(zero_padding)
#     audio = tf.concat([audio, zero_padding], 0)
#     print(audio)
#     audio.set_shape([16000])
#     print(audio)    
#     break



    

if MODEL == "MLP":
    model = keras.Sequential([
    keras.layers.Flatten(),
    keras.layers.Dense(256, activation='relu', name='first_dense'),
    keras.layers.Dense(256, activation='relu', name='second_dense'),
    keras.layers.Dense(256, activation='relu', name='third_dense'),
    keras.layers.Dense(8, name='classifier'),
    ])

elif MODEL == "CNN":
    model = keras.Sequential([
        keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides=strides, use_bias=False, name='first_conv'),
        keras.layers.BatchNormalization(momentum=0.1),
        keras.layers.Activation('relu'),
        keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides=[1, 1], use_bias=False, name='second_conv'),
        keras.layers.BatchNormalization(momentum=0.1),
        keras.layers.Activation('relu'),
        keras.layers.Conv2D(filters=128, kernel_size=[3,3], strides=[1, 1], use_bias=False, name='third_conv'),
        keras.layers.BatchNormalization(momentum=0.1),
        keras.layers.Activation('relu'),
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(8, name='classifier')
    ])

elif MODEL == "DS-CNN":
    model = keras.Sequential([
        keras.layers.Conv2D(filters=256, kernel_size=[3, 3], strides=strides, use_bias=False),
        keras.layers.BatchNormalization(momentum=0.1),
        keras.layers.Activation('relu'),
        keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], use_bias=False),
        keras.layers.Conv2D(filters=256, kernel_size=[1, 1], strides=[1, 1], use_bias=False),
        keras.layers.BatchNormalization(momentum=0.1),
        keras.layers.Activation('relu'),
        keras.layers.DepthwiseConv2D(kernel_size=[3, 3], strides=[1, 1], use_bias=False),
        keras.layers.Conv2D(filters=256, kernel_size=[1, 1], strides=[1, 1], use_bias=False),
        keras.layers.BatchNormalization(momentum=0.1),
        keras.layers.Activation('relu'),    
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(8)
    ])

else:
    print("Model not defined")
    sys.exit()



model.compile(optimizer='adam',
              loss=[tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)],
              metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

metric = 'val_sparse_categorical_accuracy'

checkpoint_filepath = './checkpoint/kws_{}_{}/weights'.format(MODEL, MFCC)
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_sparse_categorical_accuracy', # also metric is ok
    mode='max',
    save_best_only=True)


print("Fit model on training data")

history = model.fit(
    train_ds,
    batch_size=32,
    epochs=20,
    # We pass some validation for
    # monitoring validation loss and metrics
    # at the end of each epoch
    validation_data=(val_ds),
    callbacks=[model_checkpoint_callback]
)

print("Evaluate on test data")
results = model.evaluate(test_ds, verbose=2)

model.summary()

save_model_dir = './models/kws_{}_{}'.format(MODEL, MFCC)
if not os.path.exists(save_model_dir):
    os.makedirs(save_model_dir)

# for key in history.history:
#     print(key)

# for element in train_ds:
#     print(element)
#     break 

# y_true = [2, 1]
# y_pred = [[0], [0]]
# m = tf.keras.metrics.sparse_categorical_accuracy(y_true, y_pred)
# print(m)





False
Fit model on training data
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Evaluate on test data
25/25 - 1s - loss: 1.8647 - sparse_categorical_accuracy: 0.5850
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 1024)              0         
_________________________________________________________________
first_dense (Dense)          (None, 256)               262400    
_________________________________________________________________
second_dense (Dense)         (None, 256)               65792     
_________________________________________________________________
third_dense (Dense)          (None, 256)               65792     
__________________________________