In [1]:
import tensorflow as tf
print(tf.__version__)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Activation
from tensorflow.keras.layers import LSTM,GRU
from tensorflow.keras.optimizers import Adam
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import urllib3
urllib3.disable_warnings()

import tensorflow_datasets as tfds
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.models import load_model
from IPython.display import clear_output
from tensorflow.keras.preprocessing import sequence

2.2.0


In [2]:
!pip install pydub



In [0]:
train = tfds.load('speech_commands',split = 'train')
test = tfds.load('speech_commands',split = 'test')
validation = tfds.load('speech_commands',split = 'validation')

In [0]:
BUFFER_SIZE = 10000
BATCH_SIZE = 32

In [5]:
shapes = []
cnt = 0
for sample in train.take(-1):
  if cnt%50 == 0:
    print(cnt/85511)
    clear_output()
  cnt += 1
  shapes.append(sample['audio'].shape)
shapes = np.array(shapes)
np.unique(shapes)

array([ 4096,  4458,  4778,  5201,  5461,  5803,  5944,  5945,  6144,
        6315,  6687,  6688,  6826,  6827,  7168,  7339,  7430,  7431,
        7509,  7510,  7680,  7851,  8022,  8173,  8174,  8192,  8363,
        8533,  8534,  8874,  8875,  8916,  8917,  9046,  9216,  9387,
        9557,  9558,  9659,  9660,  9728,  9899, 10032, 10069, 10070,
       10240, 10402, 10403, 10411, 10581, 10582, 10752, 10922, 10923,
       11093, 11094, 11145, 11146, 11264, 11435, 11605, 11606, 11776,
       11888, 11889, 11947, 11972, 11976, 12052, 12056, 12117, 12118,
       12132, 12136, 12212, 12216, 12261, 12288, 12292, 12296, 12376,
       12456, 12459, 12480, 12536, 12616, 12629, 12630, 12631, 12632,
       12696, 12800, 12852, 12970, 12971, 13004, 13141, 13142, 13312,
       13332, 13375, 13483, 13653, 13654, 13824, 13995, 14118, 14165,
       14166, 14336, 14372, 14490, 14507, 14677, 14678, 14692, 14772,
       14848, 14861, 15018, 15019, 15092, 15172, 15189, 15190, 15233,
       15360, 15412,

In [0]:
def audio_proc(audio):
  audio = tf.cast(audio,tf.float32)
  audio = tf.pad(audio,[[0,16000]])
  audio = tf.slice(audio,[0],[16000])

  def _tf_log10(x):
    numerator = tf.math.log(x)
    denominator = tf.math.log(tf.constant(10, dtype=numerator.dtype))
    return numerator / denominator

  def power_to_db(magnitude, amin=1e-16, top_db=80.0):
    ref_value = tf.reduce_max(magnitude)
    log_spec = 10.0 * _tf_log10(tf.maximum(amin, magnitude))
    log_spec -= 10.0 * _tf_log10(tf.maximum(amin, ref_value))
    log_spec = tf.maximum(log_spec, tf.reduce_max(log_spec) - top_db)

    return log_spec

  sample_rate = 16000
  fft_size = 1024
  hop_size = 512
  n_mels = 128
  f_min = 0
  f_max = sample_rate / 2
  spectrograms = tf.signal.stft(audio,
                                frame_length=fft_size,
                                frame_step=hop_size,
                                pad_end=False)

  magnitude_spectrograms = tf.abs(spectrograms)
  mel_filterbank = tf.signal.linear_to_mel_weight_matrix(
            num_mel_bins=n_mels,
            num_spectrogram_bins=fft_size // 2 + 1,
            sample_rate=sample_rate,
            lower_edge_hertz=f_min,
            upper_edge_hertz=f_max)
  
  mel_spectrograms = tf.matmul(tf.square(magnitude_spectrograms),
                              mel_filterbank)

  log_mel_spectrograms = power_to_db(mel_spectrograms)

  return log_mel_spectrograms

def prepare(ds):
  a = ds['audio']
  b = ds['label']
  label = tf.one_hot(tf.cast(b,tf.uint8),12)
  a = audio_proc(a)
  return a,label

train_set = train.map(prepare).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
test_set = test.map(prepare).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
validation_set = validation.map(prepare).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

In [0]:
model = Sequential(name = 'model')

model.add( LSTM(250,input_shape = (30, 128), return_sequences=True) )
model.add( LSTM(300) )
model.add(Flatten())
model.add( Dense(12, activation='softmax') )

In [8]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 30, 250)           379000    
_________________________________________________________________
lstm_1 (LSTM)                (None, 300)               661200    
_________________________________________________________________
flatten (Flatten)            (None, 300)               0         
_________________________________________________________________
dense (Dense)                (None, 12)                3612      
Total params: 1,043,812
Trainable params: 1,043,812
Non-trainable params: 0
_________________________________________________________________


In [9]:
adam_fine = Adam(lr = 0.0001,beta_1 = 0.9, beta_2 = 0.999,epsilon =1e-08,decay = 0.0)

model.compile(optimizer= adam_fine,
    loss=tf.keras.losses.categorical_crossentropy,
    metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', verbose=1, patience=5)
tensorboard = TensorBoard('logs/mlp-model')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', verbose=1, patience=2) 
model_checkpoint = ModelCheckpoint('model.h5', save_best_only=True)

model.fit(
    x = train_set, epochs=30, validation_data=validation_set,
    callbacks=[early_stopping, tensorboard, reduce_lr, model_checkpoint]
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 00013: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 00018: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-07.
Epoch 19/30
Epoch 20/30
Epoch 00020: ReduceLROnPlateau reducing learning rate to 9.999999974752428e-08.
Epoch 21/30
Epoch 00021: early stopping


<tensorflow.python.keras.callbacks.History at 0x7f927b15a908>

In [10]:
model.evaluate(test_set)



[0.3369446396827698, 0.8975459933280945]

In [11]:
from google.colab import files
!zip -r /content/file2.zip /content/logs

  adding: content/logs/ (stored 0%)
  adding: content/logs/mlp-model1/ (stored 0%)
  adding: content/logs/mlp-model1/train/ (stored 0%)
  adding: content/logs/mlp-model1/train/events.out.tfevents.1591460426.b1c363ddd4e5.121.267050.v2 (deflated 74%)
  adding: content/logs/mlp-model1/train/events.out.tfevents.1591460846.b1c363ddd4e5.121.271668.v2 (deflated 74%)
  adding: content/logs/mlp-model/ (stored 0%)
  adding: content/logs/mlp-model/train/ (stored 0%)
  adding: content/logs/mlp-model/train/plugins/ (stored 0%)
  adding: content/logs/mlp-model/train/plugins/profile/ (stored 0%)
  adding: content/logs/mlp-model/train/plugins/profile/2020_06_06_16_58_14/ (stored 0%)
  adding: content/logs/mlp-model/train/plugins/profile/2020_06_06_16_58_14/b1c363ddd4e5.trace.json.gz (deflated 0%)
  adding: content/logs/mlp-model/train/plugins/profile/2020_06_06_16_58_14/b1c363ddd4e5.input_pipeline.pb (deflated 69%)
  adding: content/logs/mlp-model/train/plugins/profile/2020_06_06_16_58_14/b1c363ddd4e5

In [0]:
files.download("/content/file2.zip")