In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#for using in google colab
!unzip "drive/MyDrive/train.zip" -d "/content/data/"
!unzip "drive/MyDrive/test.zip" -d "/content/data/"

In [33]:
import os 
import imageio
from IPython.display import display, Image
from sklearn.preprocessing import OneHotEncoder
import numpy as np 
import random
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [34]:
train_path = 'data/train/images/'
test_path = 'data/test/images/'

#ignore .ipynb_checkpoints
classes = [f for f in os.listdir(train_path) if not f.startswith('.')]
targets = ['yes','no','up','down','left','right','on','off','stop','go','_background_noise_','unknown']
# background noise is considered silence for predictions
prediction_classes = ['yes','no','up','down','left','right','on','off','stop','go','silence','unknown']

#128x87 with 1 channel
#dimensions flipped so time is first
image_shape = (87,128)
num_classes = len(targets)

In [35]:
def get_image_names():
  images = []
  for c in classes:
    if c in targets:
      label = targets.index(c)
    else:
      label = targets.index('unknown')
    class_path = train_path + c + '/'
    class_images = os.listdir(class_path)
    labeled = []
    for i in class_images:
      labeled.append([class_path + i,label])
    images.append(labeled)
  return images 

In [36]:
def split_names(image_list, num_batches=2):
  batches = [[] for _ in range(num_batches)]
  for i in image_list:
    num = len(i)
    random.shuffle(i)
    step = num // num_batches 
    #print(step)
    for k in range(0, num_batches):
      batches[k].append(i[ k * step : min( (k+1)*step, num) ])
  return batches 

In [37]:
def make_training_batch(image_names):
  train_x, train_y = [], []
  # training images are grouped by class
  for c in image_names:
    for i in c:
      # load the image as a np array
      x = np.array(imageio.imread(i[0]))
      # flip the image so the first axis is timesteps
      x = np.transpose(x)
      # reshape to a uniform dimension for the model
      x = np.resize(x,image_shape)
      train_x.append(x)
      train_y.append(i[1])
  # convert outer lists to np arrays
  train_x = np.array(train_x)
  train_y = np.array(train_y).reshape(-1,1)
  # transform the labels into one-hot vectors 
  onehot = OneHotEncoder()
  train_y = onehot.fit_transform(train_y).toarray()
  return (train_x, train_y)


In [38]:
def training_loop(num_times=1, num_batches=1, num_epochs=50, model=None):
  # build the model if needed
  if model is None:
    model = build_model()
  
  # callbacks used in training
  early_stop = EarlyStopping(monitor='val_accuracy',min_delta=.001, patience=10, restore_best_weights=True)
  checkpoint = ModelCheckpoint('best_model.h5',monitor='val_accuracy', save_best_only=True,mode='max')

  # get filenames  of training images, labeled with their classes
  name_list = get_image_names()
  for e in range(num_times):
    print("Loop {}".format(e))
    # divide training data into smaller chunks
    # prevents RAM issues with large models and helps with overfitting
    name_batches = split_names(name_list, num_batches)

    for k in range(num_batches):
      print("Batch {}".format(k))
      train_x, train_y = make_training_batch(name_batches[k])
      shuffler = np.random.permutation(len(train_x))
      train_x = train_x[shuffler]
      train_y = train_y[shuffler]
      model.fit(train_x,train_y,batch_size=50,epochs=num_epochs, callbacks=[early_stop,checkpoint], validation_split = 0.2)

In [39]:
def build_model():
  # CNN + LSTM model
  model = keras.Sequential()
  model.add(layers.Input(shape=image_shape))
  # features are extracted with two layers of convolution filters
  # 1D convolution is used to preserve the time series nature of the data
  model.add(layers.Conv1D(filters=128, kernel_size=7, strides=2))
  model.add(layers.BatchNormalization())
  model.add(layers.Conv1D(filters=128, kernel_size=7, strides=2))
  # normalization and relu activation
  model.add(layers.BatchNormalization())
  model.add(layers.Activation('relu'))
  # feature vector sequences are processed by three layers of LSTM
  model.add(layers.Bidirectional(layers.LSTM(128, activation='relu', return_sequences=True, dropout=0.3)))
  model.add(layers.Bidirectional(layers.LSTM(128, activation='relu', return_sequences=True, dropout=0.3)))
  model.add(layers.Bidirectional(layers.LSTM(128, activation='relu', return_sequences=False, dropout=0.3)))
  # final dense layer predicts class label
  model.add(layers.Dense(units=num_classes, activation='softmax',))

  return model

In [40]:
model = build_model()
opt = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=opt,loss="categorical_crossentropy",metrics=['accuracy'])
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_8 (Conv1D)            (None, 41, 128)           114816    
_________________________________________________________________
batch_normalization_6 (Batch (None, 41, 128)           512       
_________________________________________________________________
conv1d_9 (Conv1D)            (None, 18, 128)           114816    
_________________________________________________________________
batch_normalization_7 (Batch (None, 18, 128)           512       
_________________________________________________________________
activation_13 (Activation)   (None, 18, 128)           0         
_________________________________________________________________
bidirectional_23 (Bidirectio (None, 18, 256)           263168    
_________________________________________________________________
bidirectional_24 (Bidirectio (None, 18, 256)          

In [None]:
training_loop(num_times=1,num_batches=1,num_epochs=50,model=model)

Loop 0
Batch 0
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50

In [None]:
def read_test_images(num=2000,image_names=[]):
  images = []
  names = []
  for k in range(min(num,len(image_names))):
    if (image_names[k][0] == "."):
      continue
    if (not os.path.exists(test_path + image_names[k])):
      continue 
    names.append(image_names[k])
    x = np.array(imageio.imread(test_path + image_names[k]))
    x = np.transpose(x)
    x = np.resize(x,image_shape)
    images.append(x)
  return (names,np.array(images))

def predict_labels(model, test_x):
  predictions = model.predict(test_x)
  labels = []
  for p in predictions:
    i = np.argmax(p)
    labels.append(prediction_classes[i])
  return labels

def evaluate_model(model):
  test_images = os.listdir(test_path)
  test_num = len(test_images)
  batch_size = 2000
  pred_file = open("predictions.csv","w")
  pred_file.write("fname,label\n")
  for n in range(0,test_num,batch_size):
    image_names, image_batch = read_test_images(batch_size,test_images)
    print(n)
    label_batch = predict_labels(model,image_batch)
    for k in range(min(batch_size,len(image_batch))):
      label = label_batch[k]
      im = image_names[k].replace("png","wav")
      pred_file.write("{},{}\n".format(im,label))
    
    test_images = test_images[len(image_batch):]

  pred_file.close()

In [None]:
evaluate_model(model)