In [1]:
!nvidia-smi

Sat Sep 18 05:08:31 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.63.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   64C    P8    31W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
KERAS_MODELS_ABSOLUTE_PATH = '/content/gdrive/My Drive/ecnn47'

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
import matplotlib.pyplot as plt
import numpy as np
from tensorflow import keras
import tensorflow as tf
print(tf.__version__)
plt.rc('figure', figsize=(13, 4))

2.6.0


In [5]:
if not tf.io.gfile.exists(KERAS_MODELS_ABSOLUTE_PATH):
  print('You should create', KERAS_MODELS_ABSOLUTE_PATH, 'directory manually')
  assert False

In [6]:
# https://github.com/42io/dataset/tree/master/google_speech_commands
# zero one two three four five six seven eight nine #unk# #pub#
DATASET_FILE = '1NgIw6JrdjDcRhnAxbheqtAcV5Fr-wUJu'

In [7]:
!pip install -qq gdown
!test -f "$DATASET_FILE" || gdown --id "$DATASET_FILE" -O "$DATASET_FILE"

Downloading...
From: https://drive.google.com/uc?id=1NgIw6JrdjDcRhnAxbheqtAcV5Fr-wUJu
To: /content/1NgIw6JrdjDcRhnAxbheqtAcV5Fr-wUJu
2.59GB [00:31, 81.7MB/s]


In [8]:
md5 = !md5sum "$DATASET_FILE"
assert '54821a74684e7bbb9b3acd853960d9cd' == md5[0].split()[0]

In [9]:
dset = np.load(DATASET_FILE)
print(dset['x_train'].shape)

(1095480, 637)


In [10]:
def create_ds(t):
  x = 'x_' + t
  y = 'y_' + t
  d = tf.data.Dataset.from_tensor_slices((dset[x], dset[y]))
  d = d.map(lambda x, y: [tf.cast(x, tf.float32), y])
  d = d.map(lambda x, y: [x, tf.cast(y, tf.int8)])
  d = d.map(lambda x, y: [tf.reshape(x, (49, 13))[1:-1], y])
  d = d.cache()
  return d

In [11]:
train_dataset = create_ds('train')
test_dataset  = create_ds('test')
valid_dataset = create_ds('valid')
train_dataset.cardinality().numpy()

1095480

In [12]:
def spectrogram_masking(spectrogram, dim=1, masks_number=2, mask_max_size=5):
  """Spectrogram masking on frequency or time dimension.
  Args:
    spectrogram: Input spectrum [batch, time, frequency]
    dim: dimension on which masking will be applied: 1 - time; 2 - frequency
    masks_number: number of masks
    mask_max_size: mask max size
  Returns:
    masked spectrogram
  """
  if dim not in (1, 2):
    raise ValueError('Wrong dim value: %d' % dim)
  input_shape = spectrogram.shape
  time_size, frequency_size = input_shape[1:3]
  dim_size = input_shape[dim]  # size of dimension on which mask is applied
  stripe_shape = [1, time_size, frequency_size]
  for _ in range(masks_number):
    mask_end = tf.random.uniform([], 0, mask_max_size, tf.int32)
    mask_start = tf.random.uniform([], 0, dim_size - mask_end, tf.int32)

    # initialize stripes with stripe_shape
    stripe_ones_left = list(stripe_shape)
    stripe_zeros_center = list(stripe_shape)
    stripe_ones_right = list(stripe_shape)

    # update stripes dim
    stripe_ones_left[dim] = dim_size - mask_start - mask_end
    stripe_zeros_center[dim] = mask_end
    stripe_ones_right[dim] = mask_start

    # generate mask
    mask = tf.concat((
        tf.ones(stripe_ones_left, spectrogram.dtype),
        tf.zeros(stripe_zeros_center, spectrogram.dtype),
        tf.ones(stripe_ones_right, spectrogram.dtype),
    ), dim)
    spectrogram = spectrogram * mask
  return spectrogram

In [13]:
def train_model(model, train_dataset, valid_dataset):

  early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss',
        mode='min',
        verbose=1,
        patience=100,
        restore_best_weights=True)

  train_batch = train_dataset.shuffle(train_dataset.cardinality())
  train_batch = train_batch.batch(128)
  train_batch = train_batch.map(lambda x, y: (spectrogram_masking(x, 1, 3, 3), y))
  train_batch = train_batch.map(lambda x, y: (spectrogram_masking(x, 2, 2, 2), y))
  train_batch = train_batch.prefetch(tf.data.AUTOTUNE)

  valid_batch = valid_dataset.batch(512)
  valid_batch = valid_batch.prefetch(tf.data.AUTOTUNE)

  # plt.imshow(list(train_batch.take(1))[0][0][0].numpy().T)
  # plt.show()

  history = model.fit(train_batch,
                      validation_data=valid_batch,
                      callbacks=[early_stopping],
                      verbose=2,
                      epochs=500) # play with google colab time limit

  model.set_weights(early_stopping.best_weights)
  return history

In [14]:
def build_first_ensemble(in_shape, activation, pooling):

  # resetting the layer name generation counter
  keras.backend.clear_session()

  x_in = keras.Input(shape=in_shape)

  x = keras.layers.Conv1D(128, 1, use_bias=False)(x_in)
  x = keras.layers.BatchNormalization()(x)
  x = activation()(x)
  x = keras.layers.SpatialDropout1D(0.2)(x)

  for i in range(4)[::-1]:
    r = keras.layers.Conv1D(x.shape[-1], 3, padding='same', use_bias=False)(x)
    r = keras.layers.BatchNormalization()(r)
    r = activation()(r)
    r = keras.layers.SpatialDropout1D(0.2)(r)
    r = keras.layers.Conv1D(x.shape[-1], 2, padding='same', use_bias=False)(r)
    r = keras.layers.BatchNormalization()(r)
    x = keras.layers.add([r, x])
    x = activation()(x)
    x = keras.layers.SpatialDropout1D(0.2)(x)
    x = pooling(2 if i else x.shape[1], padding='same')(x)

  x = keras.layers.Flatten()(x)

  for i in x.shape[-1] * np.array([2, 1]):
    x = keras.layers.Dense(i, use_bias=False)(x)
    x = keras.layers.BatchNormalization()(x)
    x = activation()(x)
    x = keras.layers.Dropout(0.2)(x)

  x = keras.layers.Dense(12)(x)
  return keras.Model(inputs=x_in, outputs=x, name='first_ensemble')

In [15]:
def build_second_ensemble(in_shape, activation, pooling, *in_steps):

  # resetting the layer name generation counter
  keras.backend.clear_session()

  x = x_in = keras.Input(shape=in_shape)

  for i in in_steps:
    x = keras.layers.Conv1D(i, 1, use_bias=False)(x)
    x = keras.layers.BatchNormalization()(x)
    x = activation()(x)
    x = keras.layers.SpatialDropout1D(i / 640)(x)

  for i in range(4):
    x = keras.layers.Conv1D(x.shape[-1], 2,
                            dilation_rate=2**i, use_bias=False)(x)
    x = keras.layers.BatchNormalization()(x)
    x = activation()(x)
    x = keras.layers.SpatialDropout1D(0.2)(x)

  x = pooling(x.shape[1])(x)
  x = keras.layers.Flatten()(x)

  for i in x.shape[-1] * np.array([2, 1]):
    x = keras.layers.Dense(i, use_bias=False)(x)
    x = keras.layers.BatchNormalization()(x)
    x = activation()(x)
    x = keras.layers.Dropout(0.2)(x)

  x = keras.layers.Dense(12)(x)
  return keras.Model(inputs=x_in, outputs=x, name='second_ensemble')

In [16]:
def build_ensemble(builders=None, models=None):
  in_shape = (47, 13)
  if models:
    x_in = keras.Input(shape=in_shape)
    x  = [keras.Model(m.inputs, m.outputs) for m in models]
    x  = [e(x_in) for e in x]
    x  = [keras.layers.Softmax()(e) for e in x]
    x  = keras.layers.Average()(x)
    return keras.Model(x_in, x)
  else:
    return tuple(b(in_shape, *a) for b, *a in builders)

In [17]:
ensemble_models = (
  (build_first_ensemble,  keras.layers.LeakyReLU, keras.layers.MaxPool1D),
  (build_first_ensemble,  keras.layers.ReLU,      keras.layers.AvgPool1D),
  (build_second_ensemble, keras.layers.PReLU,     keras.layers.MaxPool1D, 128),
  (build_second_ensemble, keras.layers.ELU,       keras.layers.AvgPool1D, 64, 128),
)
ensemble_models = build_ensemble(ensemble_models)

In [18]:
for model in ensemble_models:
  if model is not ensemble_models[0]:
    print("\n")
  model.summary()

Model: "first_ensemble"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 47, 13)]     0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 47, 128)      1664        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 47, 128)      512         conv1d[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu (LeakyReLU)         (None, 47, 128)      0           batch_normalization[0][0]        
_____________________________________________________________________________________

In [19]:
for model in ensemble_models:
  model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(),
    metrics=['accuracy']
  )
  md5 = str(model.get_config())
  md5 = !echo "$md5" | md5sum
  md5 = md5[0].split()[0]
  weights_file = "%s/%s.h5" % (KERAS_MODELS_ABSOLUTE_PATH, md5)
  if tf.io.gfile.exists(weights_file):
    print('Restoring model weights from', md5)
    model.load_weights(weights_file)
  else:
    history = train_model(model, train_dataset, valid_dataset)
    model.save_weights(weights_file)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Valid'], loc='upper right')
    plt.show()



Restoring model weights from e79cdaab005e8e3f5d4b279da0303ad0
Restoring model weights from c1f32401a2158eff6a5df2f16224b6e9
Restoring model weights from 95d3760c8cd90013bd074f2847cd1c80
Restoring model weights from 949a46be51a14342e6eef429cde2179f


In [20]:
for model in ensemble_models:
  if not model is ensemble_models[0]:
    print("")
  results = model.evaluate(train_dataset.batch(8192), verbose=0)
  print('train loss, train acc:', results)
  results = model.evaluate(test_dataset.batch(512), verbose=0)
  print('test loss, test acc:', results)
  results = model.evaluate(valid_dataset.batch(512), verbose=0)
  print('valid loss, valid acc:', results)

train loss, train acc: [0.012060952372848988, 0.9961350560188293]
test loss, test acc: [0.04674791917204857, 0.985687792301178]
valid loss, valid acc: [0.02573339268565178, 0.9910926222801208]

train loss, train acc: [0.010657472535967827, 0.9966891407966614]
test loss, test acc: [0.05282493680715561, 0.9859527945518494]
valid loss, valid acc: [0.024905672296881676, 0.9931710362434387]

train loss, train acc: [0.010551303625106812, 0.9966690540313721]
test loss, test acc: [0.05545267090201378, 0.983567476272583]
valid loss, valid acc: [0.026950230821967125, 0.9904988408088684]

train loss, train acc: [0.010406920686364174, 0.9966307282447815]
test loss, test acc: [0.04353712499141693, 0.9867479205131531]
valid loss, valid acc: [0.02554340846836567, 0.9937648177146912]


In [21]:
ensemble = build_ensemble(models=ensemble_models)

In [22]:
for model in ensemble_models + (ensemble,):
  if not model is ensemble_models[0]:
    print("")
  pred = model.predict(test_dataset.batch(512)).argmax(axis=-1)
  print(np.sum(pred != list(test_dataset.map(lambda x, y: y))), len(pred))
  pred = model.predict(valid_dataset.batch(512)).argmax(axis=-1)
  print(np.sum(pred != list(valid_dataset.map(lambda x, y: y))), len(pred))

54 3773
30 3368

53 3773
23 3368

62 3773
32 3368

50 3773
21 3368

45 3773
18 3368


In [23]:
matrix = tf.math.confusion_matrix(
  list(test_dataset.map(lambda x, y: y)),
  ensemble.predict(test_dataset.batch(4096)).argmax(axis=-1)).numpy()
for r in matrix:
  l = np.sum(r)
  for i in r:
    print(('%.2f' % (i / l)).replace('0.00', ' .  '), end = ' ')
  print("|", l)

0.99  .   0.01  .    .    .    .    .    .    .    .    .   | 315
 .   0.98  .    .    .    .    .    .    .   0.01  .    .   | 309
 .    .   0.99  .    .    .    .    .    .    .    .    .   | 304
 .    .    .   0.98  .    .    .    .    .    .   0.01  .   | 304
 .    .    .    .   0.99  .    .    .    .    .   0.01  .   | 310
 .    .    .    .    .   0.99  .    .    .    .    .    .   | 336
 .    .    .    .    .    .   1.00  .    .    .    .    .   | 249
 .    .    .    .    .    .    .   1.00  .    .    .    .   | 306
 .    .    .    .    .    .    .    .   0.99  .    .    .   | 298
 .    .    .    .    .   0.01  .    .    .   0.99  .    .   | 312
 .   0.01  .   0.01 0.01  .    .    .    .   0.01 0.96  .   | 365
 .    .    .    .    .    .    .    .    .    .    .   0.99 | 365


In [24]:
converter = tf.lite.TFLiteConverter.from_keras_model(ensemble)
tflite_model = converter.convert()
with open("ecnn47.tflite", "wb") as f:
  f.write(tflite_model)

INFO:tensorflow:Assets written to: /tmp/tmpx8__l2xy/assets




In [25]:
!md5sum 'ecnn47.tflite'
!mv 'ecnn47.tflite' "$KERAS_MODELS_ABSOLUTE_PATH"

7c58aa40956b4945c3ed4c13ee7232ab  ecnn47.tflite
