<a href="https://colab.research.google.com/github/mcui5/dl-final/blob/main/LSTM_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf 
from tensorflow.keras import Model
import numpy as np
import pickle 
import os
import pandas as pd 


from google.colab import drive 
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
def preprocess(filepath): 
  """
    1. Unpickle file
    2. Separate 
    3. One-hot encode labels 

    :inputs: 
    filepath: filepath to the pickle file in Drive 

    :returns: 
    (inputs, labels, folders)
  """
  
  with open(filepath, 'rb') as fo:
    pickle_output = pickle.load(fo, encoding='bytes')
  
  inputs = [row[0] for row in pickle_output]
  inputs = [inputs[i][:173] for i in range(len(inputs))]
  inputs = np.array(inputs)
  labels = np.array(pickle_output)[:, 1]
  folders = np.array(pickle_output)[:, 2]

  return (inputs, labels, folders)

def split(inputs, labels, folders, test_folder_idx):
  """
    Split data into training and testing data 

    :inputs: 
    the outputs from preprocess 
    test_folder_idx: index of the folder that will be used for testing

    :return: 
    one quadruple, (train_inputs, train_labels, test_inputs, test_labels)
  """
  test_indices = np.nonzero(folders == test_folder_idx)
  train_indices = np.nonzero(folders != test_folder_idx)

  return (inputs[train_indices], labels[train_indices], inputs[test_indices], labels[test_indices])

def shuffle(inputs, labels, test_fraction):
  '''
  shuffle collection of all data, and split into testing and training, 15%:85%

  :inputs: 
    the outputs from preprocess (inputs and labels)
    test_fraction: percentage of inputs that will be used for testing
  
  :return: 
    one quadruple, (train_inputs, train_labels, test_inputs, test_labels)
  '''
  indices = np.arange(labels.shape[0])
  np.random.shuffle(indices)
  inputs = np.take(inputs, indices, axis=0)
  labels = np.take(labels, indices, axis=0)

  num_test = int(test_fraction * labels.shape[0])
  test_inputs = inputs[:num_test]
  test_labels = labels[:num_test]
  train_inputs = inputs[num_test:]
  train_labels = labels[num_test:]

  return (train_inputs, train_labels, test_inputs, test_labels)

In [None]:
class LSTM3(tf.keras.Model):
  """
    Model based on LSTM3 in Table II of the paper
  """
  def __init__(self, batch_size): 

    super(LSTM3, self).__init__()

    self.num_classes = 10 
    self.lstm1_units = 256
    self.lstm2_units = 128
    self.lstm3_units = 64 
    self.dropout_rate = 0.2 # Paper default is 0.2
    self.dense_size = 10 

    self.learning_rate = 1e-4
    self.batch_size = batch_size 

    self.lstm1_layer = tf.keras.layers.LSTM(self.lstm1_units, return_sequences=True, dropout=self.dropout_rate)
    self.lstm2_layer = tf.keras.layers.LSTM(self.lstm2_units, return_sequences=True, dropout=self.dropout_rate)
    self.lstm3_layer = tf.keras.layers.LSTM(self.lstm3_units, dropout=self.dropout_rate)

    self.dense_layer = tf.keras.layers.Dense(self.num_classes, activation='softmax')
    self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)

  def call(self, inputs): 
    layer1_output = self.lstm1_layer(inputs, None)
    layer2_output = self.lstm2_layer(layer1_output, None)
    layer3_output = self.lstm3_layer(layer2_output, None) 
    probabilities = self.dense_layer(layer3_output) 

    return probabilities 

  def loss(self, probabilities, labels):
    losses = tf.keras.losses.categorical_crossentropy(labels, probabilities, from_logits=False)
    return tf.reduce_mean(losses)

  def accuracy(self, probabilities, labels):
    """
      returns TOTAL NUMBER correct over a batch (does not average)
    """ 
    correct_predictions = tf.equal(tf.argmax(probabilities, 1), tf.argmax(labels, 1))    
    return tf.reduce_sum(tf.cast(correct_predictions, tf.float32))
  
  def accuracy_2(self, probabilities, labels): 
    """
      returns the TOTAL NUMBER correct for our binary classification 
      (dangerous versus non-dangerous sounds)
    """
    # dangerous labels include car_horn, dog_bark, drilling, gun_shot, jackhammer, siren
    DANGEROUS_LABELS = [1, 3, 4, 6, 7, 8]  

    # get the correct classification
    classified = tf.argmax(probabilities, 1)
    # for each classification, classify it as dangerous or not dangerous 
    classified_binary = tf.map_fn(lambda x: x in DANGEROUS_LABELS, classified)
    
    # get the label, and for each label classify it as dangerous or not dangerous
    labels_classes = tf.argmax(labels, 1)
    labels_binary = tf.map_fn(lambda x: x in DANGEROUS_LABELS, labels_classes)

    # count the overlap and return the number correct in the given batch 
    correct_predictions = tf.equal(classified_binary, labels_binary)
    return tf.reduce_sum(tf.cast(correct_predictions, tf.float32))


def train(model, train_inputs, train_labels):
  """
    trains model by batching train_inputs and updates weights based on loss
  """
  for i in range(len(train_inputs) // model.batch_size):
      # getting the proper batch 
      start = i * model.batch_size 
      inputs = train_inputs[start : start + model.batch_size]
      labels = train_labels[start : start + model.batch_size]

      with tf.GradientTape() as tape:
          # forward pass 
          probabilities = model.call(inputs)
          loss = model.loss(probabilities, labels)
                  
      # backprop 
      gradients = tape.gradient(loss, model.trainable_variables)
      model.optimizer.apply_gradients(zip(gradients, model.trainable_variables))

def test(model, test_inputs, test_labels, binary=False): 
  """
    returns the TOTAL accuracy for a single FOLDER 
  """
  num_batches = len(test_inputs) // model.batch_size
  total_right = 0

  for i in range(num_batches):
      # getting the proper batch 
      start = i * model.batch_size 
      inputs = test_inputs[start : start + model.batch_size]
      labels = test_labels[start : start + model.batch_size]

      # calling the model to get our probabilities 
      probabilities = model.call(inputs)
      if binary: 
        total_right += model.accuracy_2(probabilities, labels)
      else: 
        total_right += model.accuracy(probabilities, labels)
  
  return total_right

In [None]:
pickled_path = '/content/gdrive/Shared drives/CS1470-Final/mfccs.pkl'
inputs, labels, folders = preprocess(pickled_path)

batch_size, num_epochs = 50, 500
  
accuracy, binary_accuracy = 0, 0 
total_tested = 0 
for i in range(10): # change this back to 10 when everything is done testing 
  print("Split/test folder: ", i + 1) 
  model = LSTM3(batch_size)
  tr_in, tr_lb, te_in, te_lb = split(inputs, labels, folders, i + 1)
  tr_in = tf.convert_to_tensor(tr_in, dtype=tf.float32)
  te_in = tf.convert_to_tensor(te_in, dtype=tf.float32)
  tr_lb = tf.one_hot(tr_lb, 10, dtype=tf.int64)
  te_lb = tf.one_hot(te_lb, 10, dtype=tf.int64)


  for _ in range(num_epochs): 
    train(model, tr_in, tr_lb)

  per_fold_acc, binary_fold_acc = test(model, te_in, te_lb), test(model, te_in, te_lb, True)
  accuracy += per_fold_acc
  binary_accuracy += binary_fold_acc
     
  per_fold_tested = (len(te_lb) - (len(te_lb) % batch_size))
  total_tested += per_fold_tested
  print('per-fold acc (10-class): ' + str(per_fold_acc / per_fold_tested))
  print('per-fold acc (BINARY):   ' + str(binary_fold_acc / per_fold_tested))
  
print("Total Average Accuracy (10-class): ", accuracy / total_tested)
print("Total Average Accuracy (BINARY):   ", binary_accuracy / total_tested)

Split/test folder:  1
per-fold acc (10-class): tf.Tensor(0.48307693, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.8261539, shape=(), dtype=float32)
Split/test folder:  2
per-fold acc (10-class): tf.Tensor(0.5030769, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.7876923, shape=(), dtype=float32)
Split/test folder:  3
per-fold acc (10-class): tf.Tensor(0.4757143, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.63714284, shape=(), dtype=float32)
Split/test folder:  4
per-fold acc (10-class): tf.Tensor(0.56857145, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.73142856, shape=(), dtype=float32)
Split/test folder:  5
per-fold acc (10-class): tf.Tensor(0.6742857, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.8642857, shape=(), dtype=float32)
Split/test folder:  6
per-fold acc (10-class): tf.Tensor(0.505, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.805, shape=(), dtype=float32)
Split/test folder:  

In [None]:
'''
Shuffled version: extracted 15% for testing (follows paper) - Inflated scores
'''
pickled_path = '/content/gdrive/Shared drives/CS1470-Final/mfccs.pkl'
inputs, labels, folders = preprocess(pickled_path)

batch_size, num_epochs = 50, 250

model = LSTM3(batch_size)
print(np.shape(inputs))
tr_in, tr_lb, te_in, te_lb = shuffle(inputs, labels, 0.15)

tr_in = tf.convert_to_tensor(tr_in, dtype=tf.float32)
te_in = tf.convert_to_tensor(te_in, dtype=tf.float32)
tr_lb = tf.one_hot(tr_lb, 10, dtype=tf.int64)
te_lb = tf.one_hot(te_lb, 10, dtype=tf.int64)

for _ in range(num_epochs): 
  train(model, tr_in, tr_lb)

acc, binary_acc = test(model, te_in, te_lb), test(model, te_in, te_lb, True)
print(np.shape(te_in))
print(np.shape(te_lb))
tested = (len(te_lb) - (len(te_lb) % batch_size))
print("10 class Accuracy: ", acc / tested)
print("BINARY Accuracy: ", binary_acc / tested)

(6573, 173, 40)
(985, 173, 40)
(985, 10)
10 class Accuracy:  tf.Tensor(0.91473687, shape=(), dtype=float32)
BINARY Accuracy:  tf.Tensor(0.9631579, shape=(), dtype=float32)
