<a href="https://colab.research.google.com/github/mcui5/dl-final/blob/main/Ensemble_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import tensorflow as tf 
from tensorflow.keras import Model
import numpy as np
import pickle 
import os

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
'''
Functions for preprocessing and splitting/shuffling the data for the different
measures of accuracy 
'''
def preprocess(filepath): 
  """
    1. Unpickle file
    2. Separate 
    3. One-hot encode labels 

    :inputs: 
    filepath: filepath to the pickle file in Drive 

    :returns: 
    (inputs, labels, folders)
  """
  
  with open(filepath, 'rb') as fo:
    pickle_output = pickle.load(fo, encoding='bytes')
  
  inputs = [row[0] for row in pickle_output]
  inputs = [inputs[i][:173] for i in range(len(inputs))]
  inputs = np.array(inputs)
  labels = np.array(pickle_output)[:, 1]
  folders = np.array(pickle_output)[:, 2]

  return (inputs, labels, folders)

'''
Moved to LSTM_3 file
'''
def split(inputs, labels, folders, test_folder_idx):
  """
    Split data into training and testing data 

    :inputs: 
    the outputs from preprocess 
    test_folder_idx: index of the folder that will be used for testing

    :return: 
    one quadruple, (train_inputs, train_labels, test_inputs, test_labels)
  """
  test_indices = np.nonzero(folders == test_folder_idx)
  train_indices = np.nonzero(folders != test_folder_idx)

  return (inputs[train_indices], labels[train_indices], inputs[test_indices], labels[test_indices])

  '''
Move into model files 
'''
def shuffle(inputs, labels, test_fraction):
  '''
  shuffle collection of all data, and split into testing and training, 15%:85%

  :inputs: 
    the outputs from preprocess (inputs and labels)
    test_fraction: percentage of inputs that will be used for testing
  
  :return: 
    one quadruple, (train_inputs, train_labels, test_inputs, test_labels)
  '''
  indices = np.arange(labels.shape[0])
  np.random.shuffle(indices)
  inputs = np.take(inputs, indices, axis=0)
  labels = np.take(labels, indices, axis=0)

  num_test = int(test_fraction * labels.shape[0])
  test_inputs = inputs[:num_test]
  test_labels = labels[:num_test]
  train_inputs = inputs[num_test:]
  train_labels = labels[num_test:]

  return (train_inputs, train_labels, test_inputs, test_labels)

In [4]:
# LSTM3 MODEL 
class LSTM3(tf.keras.Model):
  """
    Model based on LSTM3 in Table II of the paper
  """
  def __init__(self, batch_size): 

    super(LSTM3, self).__init__()

    self.num_classes = 10 
    self.lstm1_units = 256
    self.lstm2_units = 128
    self.lstm3_units = 64 
    self.dropout_rate = 0.2
    self.dense_size = 10 

    self.learning_rate = 1e-4
    self.batch_size = batch_size 

    self.lstm1_layer = tf.keras.layers.LSTM(self.lstm1_units, return_sequences=True, dropout=self.dropout_rate)
    self.lstm2_layer = tf.keras.layers.LSTM(self.lstm2_units, return_sequences=True, dropout=self.dropout_rate)
    self.lstm3_layer = tf.keras.layers.LSTM(self.lstm3_units, dropout=self.dropout_rate)

    self.dense_layer = tf.keras.layers.Dense(self.num_classes, activation='softmax')
    self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)

  '''
  Calls all layers in model
  Function returns probabilities
  '''
  def call(self, inputs): 
    layer1_output = self.lstm1_layer(inputs, None)
    layer2_output = self.lstm2_layer(layer1_output, None)
    layer3_output = self.lstm3_layer(layer2_output, None) 
    probabilities = self.dense_layer(layer3_output) 

    return probabilities 
  
  '''
  Computes categorical cross-entropy loss (per the paper)
  Returns the average loss of a batch
  '''
  def loss(self, probabilities, labels):
    losses = tf.keras.losses.categorical_crossentropy(labels, probabilities)
    return tf.reduce_mean(losses)

In [7]:

class CNNLSTMModel(tf.keras.Model):
  """
    Model based on CNN-LSTM in Table III of the paper
  """
  def __init__(self, num_batches):
    super(CNNLSTMModel, self).__init__()

    self.lstm_dropout = 0.2
    self.dropout_rate = 0.25
    self.lstm_size = 256
    self.batch_size = num_batches

    #adam optimizer
    self.optimizer = tf.keras.optimizers.Adam(lr = 1e-4)

    #initialize layers
    self.lstm1 = tf.keras.layers.LSTM(self.lstm_size, dropout=self.lstm_dropout)
    self.dense1 = tf.keras.layers.Dense(10, activation='softmax')

    self.conv1 = tf.keras.layers.Conv2D(filters=4, kernel_size=(5,5), strides=(4,1), activation='relu')
    self.conv2 = tf.keras.layers.Conv2D(filters=16, kernel_size=(3,3), strides=(2,1), activation='relu')
    self.conv3 = tf.keras.layers.Conv2D(filters=64, kernel_size=(2,2), strides=(2,1), activation='relu')
   
    self.conv4 = tf.keras.layers.Conv2D(filters=300, kernel_size=(2,2), strides=(1,1), activation='relu')

    self.maxpool1 = tf.keras.layers.MaxPooling2D(pool_size=(3, 1), strides=(2,1))
    self.maxpool2 = tf.keras.layers.MaxPooling2D(pool_size=(3, 1), strides=(2,1))

    self.dropout1 = tf.keras.layers.Dropout(self.dropout_rate)
    self.dropout2 = tf.keras.layers.Dropout(self.dropout_rate)
    self.dropout3 = tf.keras.layers.Dropout(self.dropout_rate)
  
  '''
  Calls all layers in model
  Function returns probabilities
  '''
  def call(self, inputs):
    inputs = tf.expand_dims(inputs, axis=3)

    convlayer1 = self.conv1(inputs)
    maxpool1 = self.maxpool1(convlayer1)

    drop1 = self.dropout1(maxpool1)
    convlayer3 = self.conv3(drop1)
    maxpool2 = self.maxpool2(convlayer3)

    drop2 = self.dropout2(maxpool2)
    convlayer4 = self.conv4(drop2)
    drop3 = self.dropout3(convlayer4)

    reshape = tf.reshape(drop3, (self.batch_size,300,-1))


    lstm = self.lstm1(reshape)
    dense = self.dense1(lstm)

    return dense

  def loss(self, probabilities, labels):
    """
    Calculates average categorical cross entropy loss of the prediction

    :param probabilities: a matrix of logits as a tensor
    :param labels: matrix of labels containing the labels
    :return: the loss of the model as a tensor of size 1

    As cited in the paper, Table IV Experimental Results, authors used Categorical
  cross entropy loss to measure their CNN+LSTM models
    """

    losses = tf.keras.losses.categorical_crossentropy(labels, probabilities, from_logits=False)
    return tf.reduce_mean(losses)

In [8]:
# CNN MODEL
class CNNModel(tf.keras.Model):

  def __init__(self, num_batches):
    super(CNNModel, self).__init__()

    self.dropout_rate = 0.25
    self.batch_size = num_batches

    #adam optimizer
    self.optimizer = tf.keras.optimizers.Adam(lr = 1e-4)

    #initialize layers
    self.dense1 = tf.keras.layers.Dense(10, activation='softmax')

    self.conv1 = tf.keras.layers.Conv2D(filters=4, kernel_size=(5,5), strides=(4,1), activation='relu')
    self.conv2 = tf.keras.layers.Conv2D(filters=16, kernel_size=(3,3), strides=(2,1), activation='relu')
    self.conv3 = tf.keras.layers.Conv2D(filters=64, kernel_size=(2,2), strides=(2,1), activation='relu')
   
    self.conv4 = tf.keras.layers.Conv2D(filters=300, kernel_size=(2,2), strides=(1,1), activation='relu')

    self.maxpool1 = tf.keras.layers.MaxPooling2D(pool_size=(3, 1), strides=(2,1))
    self.maxpool2 = tf.keras.layers.MaxPooling2D(pool_size=(3, 1), strides=(2,1))

    self.dropout1 = tf.keras.layers.Dropout(self.dropout_rate)
    self.dropout2 = tf.keras.layers.Dropout(self.dropout_rate)
    self.dropout3 = tf.keras.layers.Dropout(self.dropout_rate)
  
  '''
  Calls all layers in model
  Function returns probabilities
  '''
  def call(self, inputs):
    inputs = tf.expand_dims(inputs, axis=3)

    convlayer1 = self.conv1(inputs)
    convlayer2 = self.conv2(convlayer1)
    maxpool1 = self.maxpool1(convlayer2)

    drop1 = self.dropout1(maxpool1)
    convlayer3 = self.conv3(drop1)
    maxpool2 = self.maxpool2(convlayer3)

    drop2 = self.dropout2(maxpool2)
    convlayer4 = self.conv4(drop2)
    drop3 = self.dropout3(convlayer4)
    drop3 = tf.reshape(drop3, [self.batch_size, -1])
    dense = self.dense1(drop3)

    return dense

  def loss(self, probabilities, labels):
    """
    Calculates average cross entropy loss of the prediction

    :param probabilities: a matrix of logits as a tensor
    :param labels: matrix of labels containing the labels
    :return: the loss of the model as a tensor of size 1

    As cited in the paper, Table IV Experimental Results, authors used Categorical
    cross entropy loss to measure their CNN+LSTM models
    """

    losses = tf.keras.losses.categorical_crossentropy(labels, probabilities, from_logits=False)
    return tf.reduce_mean(losses)

In [9]:
# GENERAL TRAIN FUNCTION FOR ALL THREE OF OUR MODELS 
def train(model, train_inputs, train_labels):
  """
    trains model by batching train_inputs and updates weights based on loss
  """
  for i in range(len(train_inputs) // model.batch_size):
      # getting the proper batch 
      start = i * model.batch_size 
      inputs = train_inputs[start : start + model.batch_size]
      labels = train_labels[start : start + model.batch_size]

      with tf.GradientTape() as tape:
          # forward pass 
          logits = model.call(inputs)
          loss = model.loss(logits, labels)
                  
      # backprop 
      gradients = tape.gradient(loss, model.trainable_variables)
      model.optimizer.apply_gradients(zip(gradients, model.trainable_variables))

In [10]:
def accuracy_prediction_integration(model, probabilities, oh_labels, noh_labels): 
  """
    :inputs: 
    probabilities (batch_sz, num_classes) -> (100, 10)
    for the ten classes, 
    oh-labels for each batch -> (100, 10) (one hotted)
    noh-labels for each batch -> (100) (indices)

    :returns: 
    a list of 10 accuracies, 0-9 represent the accuracies for each class -> (10,)
  """
  num_right_per_class = np.zeros(10)
  class_predictions = tf.argmax(probabilities, 1) # 100 predictions
  # for each of the actual labels, count the number of each class 
  number_per_class = tf.reduce_sum(oh_labels, axis=0) 

  for i in range(model.batch_size): 
    # check if predictions is same as label 
    correct_class = noh_labels[i]
    if class_predictions[i] == correct_class: 
      num_right_per_class[correct_class] += 1 

  broadcasted_acc= num_right_per_class / tf.reshape(number_per_class, (1, 10))
  broadcasted_acc = tf.cast(broadcasted_acc, dtype=tf.float32)

  # removed nans, which may occur if there has been no samples from specific class
  # for a given batch
  nans_removed = np.where(tf.math.is_nan(broadcasted_acc), tf.zeros(broadcasted_acc.shape, dtype=tf.float32), broadcasted_acc)
  return nans_removed * probabilities # (100, 10)


In [11]:
def test(models, test_inputs, test_oh_labels, test_noh_labels, is_binary=False): 
  """
    returns NUMBER CORRECT PREDICTIONS for a single FOLDER 
  """
  DANGEROUS_CLASSES = tf.convert_to_tensor([1, 3, 4, 6, 7, 8], dtype=tf.int64)
  NONDANGEROUS_CLASSES = tf.convert_to_tensor([0, 2, 5, 9], dtype=tf.int64)
  BATCH_SIZE = models[0].batch_size
  num_batches = len(test_inputs) // BATCH_SIZE
  total_right = 0
  num_right = 0 
  for i in range(num_batches):
      # getting the proper batch 
      start = i * BATCH_SIZE
      inputs = test_inputs[start : start + BATCH_SIZE]
      oh_labels = test_oh_labels[start : start + BATCH_SIZE]
      noh_labels = test_noh_labels[start : start + BATCH_SIZE]

      # calling the model to get our probabilities
      ensemble_output = ensemble(models, inputs, oh_labels, noh_labels, is_binary)

      if is_binary: 
        noh_labels = tf.map_fn(lambda x: x in DANGEROUS_CLASSES, noh_labels)

      correct_predictions = tf.equal(ensemble_output, noh_labels)    
      num_right += tf.reduce_sum(tf.cast(correct_predictions, tf.float32)) 

  return num_right 

In [12]:
def ensemble(models, inputs, oh_labels, noh_labels, is_binary=False): 
  """
    :inputs: 
      list of models to be included in the ensemble + the labels 

    :return: 
      Accuracy-Prediction Integration Method ensemble will return predictions for a batch 
  """
  if is_binary: 
    BATCH_SIZE, NUM_CLASSES = inputs.shape[0], 2
  else: 
    BATCH_SIZE, NUM_CLASSES = inputs.shape[0], oh_labels.shape[1]

  model_integration_products = np.zeros((BATCH_SIZE, NUM_CLASSES))

  DANGEROUS_CLASSES = tf.convert_to_tensor([1, 3, 4, 6, 7, 8], dtype=tf.int64)
  NONDANGEROUS_CLASSES = tf.convert_to_tensor([0, 2, 5, 9], dtype=tf.int64)

  for model in models: 
    probabilities = model.call(inputs)
    accuracy_by_class = accuracy_prediction_integration(model, probabilities, oh_labels, noh_labels)

    if is_binary: 
      dangerous_accuracy = tf.reduce_sum(tf.gather(accuracy_by_class, DANGEROUS_CLASSES, axis=1), axis=1)
      
      nondangerous_accuracy = tf.reduce_sum(tf.gather(accuracy_by_class, NONDANGEROUS_CLASSES, axis=1), axis=1)
      
      combined = tf.stack([nondangerous_accuracy, dangerous_accuracy], axis=1)
      model_integration_products += combined
    else: 
      model_integration_products += accuracy_by_class
      
    return tf.argmax(model_integration_products, axis=1)   

In [13]:
'''
main function running all 3 models with 10-fold cross-validation
'''
pickled_path = '/content/gdrive/Shared drives/CS1470-Final/mfccs.pkl'
inputs, labels, folders = preprocess(pickled_path)

batch_size, num_epochs = 50, 100

accuracy, binary_accuracy = 0, 0 
total_tested = 0 
for i in range(10): 
  print("Split/test folder: ", i + 1) 
  models = [LSTM3(batch_size), CNNLSTMModel(batch_size), CNNModel(batch_size)] 

  tr_in, tr_lb, te_in, te_lb = split(inputs, labels, folders, i + 1)
  tr_in = tf.convert_to_tensor(tr_in, dtype=tf.float32)
  te_in = tf.convert_to_tensor(te_in, dtype=tf.float32)
  te_lb_noh = tf.convert_to_tensor(te_lb, dtype=tf.int64)
  tr_lb = tf.one_hot(tr_lb, 10, dtype=tf.int64)
  te_lb_oh = tf.one_hot(te_lb, 10, dtype=tf.int64)

  for model in models: 
    for _ in range(num_epochs): 
      train(model, tr_in, tr_lb)

  per_fold_acc = test(models, te_in, te_lb_oh, te_lb_noh)
  accuracy += per_fold_acc
  binary_fold_acc = test(models, te_in, te_lb_oh, te_lb_noh, is_binary=True)
  binary_accuracy += binary_fold_acc
     
  per_fold_tested = (len(te_lb) - (len(te_lb) % batch_size))
  total_tested += per_fold_tested
  print('per-fold acc (10-class): ' + str(per_fold_acc / per_fold_tested))
  print('per-fold acc (BINARY):   ' + str(binary_fold_acc / per_fold_tested))
  
print("Total Average Accuracy (10-class): ", accuracy / total_tested)
print("Total Average Accuracy (BINARY):   ", binary_accuracy / total_tested)

Split/test folder:  1
per-fold acc (10-class): tf.Tensor(0.52615386, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.7784615, shape=(), dtype=float32)
Split/test folder:  2
per-fold acc (10-class): tf.Tensor(0.6476923, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.83692306, shape=(), dtype=float32)
Split/test folder:  3
per-fold acc (10-class): tf.Tensor(0.6, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.82, shape=(), dtype=float32)
Split/test folder:  4
per-fold acc (10-class): tf.Tensor(0.60571426, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.71428573, shape=(), dtype=float32)
Split/test folder:  5
per-fold acc (10-class): tf.Tensor(0.62142855, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.85571426, shape=(), dtype=float32)
Split/test folder:  6
per-fold acc (10-class): tf.Tensor(0.595, shape=(), dtype=float32)
per-fold acc (BINARY):   tf.Tensor(0.775, shape=(), dtype=float32)
Split/test folder:  7
per-fol

In [None]:
'''
Running shuffled computations for all 3 models:
extracted 15% for testing (follows paper) - Inflated scores
'''
pickled_path = '/content/gdrive/Shared drives/CS1470-Final/mfccs.pkl'
inputs, labels, folders = preprocess(pickled_path)

batch_size, num_epochs = 50, 50

models = [LSTM3(batch_size), CNNLSTMModel(batch_size), CNNModel(batch_size)] 
print(np.shape(inputs))
tr_in, tr_lb, te_in, te_lb = shuffle(inputs, labels, 0.15)

tr_in = tf.convert_to_tensor(tr_in, dtype=tf.float32)
te_in = tf.convert_to_tensor(te_in, dtype=tf.float32)
te_lb_noh = tf.convert_to_tensor(te_lb, dtype=tf.int64)
tr_lb = tf.one_hot(tr_lb, 10, dtype=tf.int64)
te_lb_oh = tf.one_hot(te_lb, 10, dtype=tf.int64)

for model in models: 
    for _ in range(num_epochs): 
      train(model, tr_in, tr_lb)

acc, binary_acc = test(models, te_in, te_lb_oh, te_lb_noh), test(models, te_in, te_lb_oh, te_lb_noh, is_binary=True)
print(np.shape(te_in))
print(np.shape(te_lb))
tested = (len(te_lb) - (len(te_lb) % batch_size))
print("10 class Accuracy: ", acc / tested)
print("BINARY Accuracy: ", binary_acc / tested)

(6573, 173, 40)
(985, 173, 40)
(985,)
10 class Accuracy:  tf.Tensor(0.8736842, shape=(), dtype=float32)
BINARY Accuracy:  tf.Tensor(0.9442105, shape=(), dtype=float32)
