In [None]:
!pip install python_speech_features

In [None]:
import os
import pathlib
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
import os
from scipy.io import wavfile
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from keras.layers import Conv2D,MaxPooling2D,Flatten,LSTM,BatchNormalization,GlobalAveragePooling2D
from keras.layers import Dropout,Dense,TimeDistributed
from keras.models import Sequential
from keras.applications.resnet import ResNet50
from keras.utils.np_utils import to_categorical
from sklearn.utils.class_weight import compute_class_weight
from tqdm import tqdm
from python_speech_features import mfcc
import pickle
from keras.callbacks import ModelCheckpoint
 
import librosa as lr

In [None]:
import tensorflow as tf
tf.__version__


In [None]:
data_dir = pathlib.Path('data/mini_speech_commands')
if not data_dir.exists():
  tf.keras.utils.get_file(
      'mini_speech_commands.zip',
      origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
      extract=True,
      cache_dir='.', cache_subdir='data')
 
commands = np.array(tf.io.gfile.listdir(str(data_dir)))
commands = commands[commands != 'README.md']
print('Commands:', commands)
 
 
filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
filenames = tf.random.shuffle(filenames)
num_samples = len(filenames)
print('Number of total examples:', num_samples)
print('Number of examples per label:',
      len(tf.io.gfile.listdir(str(data_dir/commands[0]))))
print('Example file tensor:', filenames[0])

In [None]:
train_files = filenames[:6400]
val_files = filenames[6400: 6400 + 1000]
test_files = filenames[-600:]
 
print('Training set size', len(train_files))
print('Validation set size', len(val_files))
print('Test set size', len(test_files))
 
 
def decode_audio(audio_binary):
  audio, _ = tf.audio.decode_wav(audio_binary)
  return tf.squeeze(audio, axis=-1)
 
def get_label(file_path):
  parts = tf.strings.split(file_path, os.path.sep)
 
  # Note: You'll use indexing here instead of tuple unpacking to enable this 
  # to work in a TensorFlow graph.
  return parts[-2] 

In [None]:
def get_waveform_and_label(file_path):
  label = get_label(file_path)
  print("label")
  print(label)
  audio_binary = tf.io.read_file(file_path)
  waveform = decode_audio(audio_binary)
  print("waveform")
  print(waveform)
  return waveform, label
 
 
 
AUTOTUNE = tf.data.AUTOTUNE
files_ds = tf.data.Dataset.from_tensor_slices(train_files)
waveform_ds = files_ds.map(get_waveform_and_label, num_parallel_calls=AUTOTUNE)
 
 
 
rows = 3
cols = 3
n = rows*cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 12))
for i, (audio, label) in enumerate(waveform_ds.take(n)):
  r = i // cols
  c = i % cols
  ax = axes[r][c]
  ax.plot(audio.numpy())
  ax.set_yticks(np.arange(-1.2, 1.2, 0.2))
  label = label.numpy().decode('utf-8')
  ax.set_title(label)
 
plt.show()
 
 
 
def get_spectrogram(waveform):
  # Padding for files with less than 16000 samples
  zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32)
 
  # Concatenate audio with padding so that all audio clips will be of the 
  # same length
  waveform = tf.cast(waveform, tf.float32)
  equal_length = tf.concat([waveform, zero_padding], 0)
  spectrogram = tf.signal.stft(
      equal_length, frame_length=255, frame_step=128)
      
  spectrogram = tf.abs(spectrogram)
 
  return spectrogram
 
 
for waveform, label in waveform_ds.take(1):
  label = label.numpy().decode('utf-8')
  spectrogram = get_spectrogram(waveform)
 
print('Label:', label)
print('Waveform shape:', waveform.shape)
print('Spectrogram shape:', spectrogram.shape)
print('Audio playback')
display.display(display.Audio(waveform, rate=16000))
 
 
def plot_spectrogram(spectrogram, ax):
  # Convert to frequencies to log scale and transpose so that the time is
  # represented in the x-axis (columns).
  log_spec = np.log(spectrogram.T)
  height = log_spec.shape[0]
  width = log_spec.shape[1]
  X = np.linspace(0, np.size(spectrogram), num=width, dtype=int)
  Y = range(height)
  ax.pcolormesh(X, Y, log_spec)
 
 
fig, axes = plt.subplots(2, figsize=(12, 8))
timescale = np.arange(waveform.shape[0])
axes[0].plot(timescale, waveform.numpy())
axes[0].set_title('Waveform')
axes[0].set_xlim([0, 16000])
plot_spectrogram(spectrogram.numpy(), axes[1])
axes[1].set_title('Spectrogram')
plt.show()
 
 
def get_spectrogram_and_label_id(audio, label):
  spectrogram = get_spectrogram(audio)
  spectrogram = tf.expand_dims(spectrogram, -1)
  label_id = tf.argmax(label == commands)
  return spectrogram, label_id
 
 
spectrogram_ds = waveform_ds.map(
    get_spectrogram_and_label_id, num_parallel_calls=AUTOTUNE)
 
 
rows = 3
cols = 3
n = rows*cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 10))
for i, (spectrogram, label_id) in enumerate(spectrogram_ds.take(n)):
  r = i // cols
  c = i % cols
  ax = axes[r][c]
  plot_spectrogram(np.squeeze(spectrogram.numpy()), ax)
  ax.set_title(commands[label_id.numpy()])
  ax.axis('off')
  
plt.show()
 
 
def preprocess_dataset(files):
  files_ds = tf.data.Dataset.from_tensor_slices(files)
  output_ds = files_ds.map(get_waveform_and_label, num_parallel_calls=AUTOTUNE)
  output_ds = output_ds.map(
      get_spectrogram_and_label_id,  num_parallel_calls=AUTOTUNE)
  return output_ds
 
 
train_ds = spectrogram_ds
val_ds = preprocess_dataset(val_files)
test_ds = preprocess_dataset(test_files)
print("test_ds")
print(type(train_ds)) 
 
batch_size = 64
train_ds = train_ds.batch(batch_size)
val_ds = val_ds.batch(batch_size)
test_ds = test_ds.batch(batch_size) 
 
train_ds = train_ds.cache().prefetch(AUTOTUNE)
val_ds = val_ds.cache().prefetch(AUTOTUNE)
test_ds = test_ds.cache().prefetch(AUTOTUNE)

In [None]:
iterator = train_ds.__iter__()
next_element = iterator.get_next()
pt = next_element[0]
en = next_element[1]
print(pt.numpy().shape)
print(en.numpy())

In [None]:
iterator1 = val_ds.__iter__()
next_element1 = iterator1.get_next()
pt1 = next_element1[0]
en1 = next_element1[1]
print(pt1.numpy().shape)
print(en1.numpy().shape)

In [None]:
for spectrogram, _ in spectrogram_ds.take(1):
  input_shape = spectrogram.shape
print('Input shape:', input_shape)
num_labels = len(commands)

norm_layer = preprocessing.Normalization()
norm_layer.adapt(spectrogram_ds.map(lambda x, _: x))

In [None]:
from keras import layers
from keras import models
from keras.callbacks import EarlyStopping

In [None]:
def CNN_model(f1, f2, f3, f4, k, a1, a2, d1, d2, op, ep, fitness):
  model = models.Sequential([
    layers.Input(shape=input_shape),
    preprocessing.Resizing(32, 32), 
    norm_layer,
  ])

  model.add(Conv2D(input_shape=(32,32, 1),filters=f1,kernel_size=(k,k),padding="same", activation=a1))
  model.add(Conv2D(filters=f1,kernel_size=(k,k),padding="same", activation=a1))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2,2),strides=(1,1)))
  model.add(Dropout(d1))

  model.add(Conv2D(filters=f2, kernel_size=(k,k), padding="same", activation=a2))
  model.add(Conv2D(filters=f2, kernel_size=(k,k), padding="same", activation=a2))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2,2),strides=(1,1)))
  model.add(Dropout(d2))

  model.add(Conv2D(filters=f3, kernel_size=(k,k), padding="same", activation=a2))
  model.add(Conv2D(filters=f3, kernel_size=(k,k), padding="same", activation=a2))
  model.add(Conv2D(filters=f3, kernel_size=(k,k), padding="same", activation=a2))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2,2),strides=(1,1)))
  model.add(Dropout(d2))

  model.add(Conv2D(filters=f2, kernel_size=(k,k), padding="same", activation=a1))
  model.add(Conv2D(filters=f2, kernel_size=(k,k), padding="same", activation=a1))
  model.add(Conv2D(filters=f2, kernel_size=(k,k), padding="same", activation=a1))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2,2),strides=(1,1)))
  model.add(Dropout(d1))


  model.add(Conv2D(filters=f3, kernel_size=(k,k), padding="same", activation=a1))
  model.add(Conv2D(filters=f3, kernel_size=(k,k), padding="same", activation=a1))
  model.add(Conv2D(filters=f3, kernel_size=(k,k), padding="same", activation=a1))
  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2,2),strides=(1,1)))
  model.add(Dropout(d1))

  model.add(Flatten())
  model.add(BatchNormalization())
  model.add(Dense(units=f4,activation=a1))
  model.add(BatchNormalization())
  model.add(Dense(units=f4,activation=a1))
  model.add(Dense(units=num_labels, activation="softmax"))

  model.compile(
    optimizer=op,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
  )

  EPOCHS = ep
  
  history = model.fit(
    train_ds, 
    validation_data=val_ds,  
    epochs=EPOCHS,
    callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=10),
  )
  fitness.append((history.history["val_accuracy"][-1], history.history["accuracy"][-1]))
  return model, history

In [None]:
# Generating the bounds list for every hyperparameter
bounds = [
    [16, 32, 64],                               # f1
    [32, 64, 128],                              # f2
    [32, 64, 128],                              # f3
    [128, 256, 512],                            # f4
    [3, 5],                                      # k
    ["relu", "selu", "elu"],                    # a1
    ["relu", "selu", "elu"],                    # a2
    (0.1, 0.5),                                 # d1
    (0.1, 0.5),                                 # d2
    ["adamax", "adadelta", "adam", "adagrad"],  # op
    [50, 60, 70, 80, 90, 100]                   # ep
]

In [None]:
import random

pop_size = 15
# Initializing a population of size 15
population = [[random.choice(item) if type(item) is list else round(random.uniform(item[0], item[1]), 1) if type(item) is tuple else item for item in bounds] for _ in range(pop_size)]
print("Population:")
for i, hyperparameters in enumerate(population):
    print("Hyperparameters set", i+1, ":", hyperparameters)

In [None]:
population

In [None]:
def mutation(individual, population, bounds, mutation_factor=0.8):

    population_copy = population.copy()
    population_copy.remove(individual)
    a, b, c = random.sample(population_copy, 3)

        
    # Compute the difference between b and c
    diff = [round(b_i - c_i, 1) if isinstance(b_i, (int, float)) else b_i for b_i, c_i in zip(b, c)]
    
    # Mutate the individual x by adding the difference multiplied by the mutation factor
    mut_individual = [int(a_i + mutation_factor * d) if i in [0,1,2,3,4,10] and 
                        isinstance(a_i, (int, float)) else random.choice(bounds[i])  
                        if i in [0,1,2,3,4,10] and not isinstance(a_i, (int, float)) else round(a_i + mutation_factor * d, 1)
                        if isinstance(a_i, (int, float)) else random.choice(bounds[i]) for i,(a_i, d) in enumerate(zip(a, diff))]
    
    # make sure that f1, f2, f3, f4 are within (32, 256) bounds
    for j in range(4):
        if mut_individual[j] < 32:
            mut_individual[j] = 32
        elif mut_individual[j] > 256:
            mut_individual[j] = 256
    # Make sure dropout rate stays between (0.1, 0.5)
    for j in [7,8]:
        if mut_individual[j] <= 0:
            mut_individual[j] = 0.1
        elif mut_individual[j] >= 0.5:
            mut_individual[j] = 0.5
            
    if mut_individual[4] < 3:
        mut_individual[4] = 3
            
            

    # Min 50 epochs
    if mut_individual[10] < 50:
        mut_individual[10] = 50
    return mut_individual

In [None]:
def recombination(individual, population, bounds, CR=0.9):
    # Mutate the individual first
    new_individual = mutation(individual, population, bounds)
    
    # Pick a random index R in range 1 to n where n is the dimensionality of the problem being optimized.
    R = random.randint(1, len(bounds))
    
    # Compute the agent's potentially new position
    y = []
    for i in range(len(bounds)):
        # Pick a uniformly distributed random number r_i in range(0,1)
        r = random.uniform(0, 1)
        if (r < CR) or (i == R):
            y_i = new_individual[i]
        else:
            y_i = individual[i]
        y.append(y_i)
    
    # If f(y)>=f(x) then replace the agent x in the population with the improved or equal candidate solution y

    fitness_y = []
    fitness_x = []
    print(y)
    print(individual)
    CNN_model(*y, fitness_y)
    CNN_model(*individual, fitness_x)

    # Comparing based on validation accuracy
    if fitness_y[0][0] >= fitness_x[0][0]:
        return y
    else:
        return individual

In [None]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [None]:
iterations = 10

with open("generation_info.txt", "w") as f:
    for iterator in range(iterations):  
        for i in range(len(population)):
            # The below call to recombination also has the mutation call within it so it mutates
            new_individual = recombination(population[i], population, bounds)
            population[i] = new_individual
        print("Iteration", iterator + 1, "over")
        print("Current population: ", population)


        fitness_gen = []
        for item in population: # Do this for the current population of a generation
            CNN_model(*item, fitness_gen)
            max_valaccuracy_index = fitness_gen.index(max(fitness_gen))
        f.write("Generation: " + str(iterator + 1) + "\n")
        f.write(str(fitness_gen[max_valaccuracy_index]) + "\n")
        f.write(str(population[max_valaccuracy_index]) + "\n")

In [None]:
best = [36, 183, 256, 56, 6, 'elu', 'relu', 0.1, 0.2, 'adagrad', 114]

In [None]:
fitnessbest = []
best_model, history = CNN_model(*best, fitnessbest)
best_model.evaluate(test_ds)