# Training
(Common for both tasks)

In [None]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 
import tensorflow as tf 
try:
  import utils
except:
  !pip3 install python-Levenshtein
  import utils

print(tf.__version__) 

Collecting python-Levenshtein
[?25l  Downloading https://files.pythonhosted.org/packages/42/a9/d1785c85ebf9b7dfacd08938dd028209c34a0ea3b1bcdb895208bd40a67d/python-Levenshtein-0.12.0.tar.gz (48kB)
[K     |██████▊                         | 10kB 28.0MB/s eta 0:00:01[K     |█████████████▌                  | 20kB 26.4MB/s eta 0:00:01[K     |████████████████████▏           | 30kB 12.9MB/s eta 0:00:01[K     |███████████████████████████     | 40kB 5.7MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 4.3MB/s 
Building wheels for collected packages: python-Levenshtein
  Building wheel for python-Levenshtein (setup.py) ... [?25l[?25hdone
  Created wheel for python-Levenshtein: filename=python_Levenshtein-0.12.0-cp36-cp36m-linux_x86_64.whl size=144789 sha256=f3905edc9961dc3a7d5c5a910f7c4d6cbf3a1f58ccf8141a3fefe0f717f5d8fb
  Stored in directory: /root/.cache/pip/wheels/de/c2/93/660fd5f7559049268ad2dc6d81c4e39e9e36518766eaf7e342
Successfully built python-Levenshtein
Install

In [None]:
train_dat = pd.read_csv("MLSP_AED/labels_train.csv")
train_dat.head()

Unnamed: 0,slice_file_name,class
0,100652-3-0-0.wav,dog_bark
1,100652-3-0-1.wav,dog_bark
2,100652-3-0-2.wav,dog_bark
3,100652-3-0-3.wav,dog_bark
4,101415-3-0-2.wav,dog_bark


In [None]:
a = np.array(train_dat)
print(a.shape)
print(a[:5])
audio_files = a[:, 0]
audio_labels = a[:, 1]

(1761, 2)
[['100652-3-0-0.wav' 'dog_bark']
 ['100652-3-0-1.wav' 'dog_bark']
 ['100652-3-0-2.wav' 'dog_bark']
 ['100652-3-0-3.wav' 'dog_bark']
 ['101415-3-0-2.wav' 'dog_bark']]


In [None]:
from IPython.display import Audio
Audio('MLSP_AED/audio_train_1ch/' + np.random.choice(audio_files) )

In [None]:
# finding the classes
print(len(np.unique(audio_labels)))
print(np.unique(audio_labels))

10
['air_conditioner' 'car_horn' 'children_playing' 'dog_bark' 'drilling'
 'engine_idling' 'gun_shot' 'jackhammer' 'siren' 'street_music']


In [None]:
try:
  X_specs_padded = np.load('drive/MyDrive/pad_data.npz')['arr_0']
except:
  dev_size = -1
  X_specs = []
  for i, f in enumerate(audio_files[:dev_size]):
    X_specs.append(utils.wav2feat('MLSP_AED/audio_train_1ch/'+f))
    print(f"\rProgress: {(i+1)*100/len(audio_files[:dev_size]):0.2f}%", end = "")
  print()
  X_specs_padded = []
  for spec in X_specs:
    padded = np.zeros((513, 400))
    spec = spec[:513, :400]
    padded[:spec.shape[0], :spec.shape[1]] = spec
    X_specs_padded.append(padded[:, :, np.newaxis])
  del X_specs
  X_specs_padded = np.array(X_specs_padded)
  print(X_specs_padded.shape)
  np.savez_compressed('drive/MyDrive/pad_data', X_specs_padded)
  print("Saved training data for future use.")
X_specs_padded.shape

(1760, 513, 400, 1)

In [None]:
# Data Augmentation on the Spectograms - inspired from SpecAugment for ASR
def frequency_mask(spectrogram, F=27):
  # Adding a frequency mask
  f = tf.random.uniform([], minval = 0, maxval = F, dtype = tf.int32)
  v, T, _ = spectrogram.shape
  f0 = tf.random.uniform([], minval = 0, maxval = v//2-f, dtype = tf.int32)
  res1 = spectrogram[:f0,:]
  res2 = spectrogram[f0+f:, :]
  mask = tf.zeros_like(spectrogram[f0:f0+f,:])
  masked_spec = tf.concat([res1, mask, res2], axis = 0)
  # assert masked_spec.shape == spectrogram.shape
  return tf.cast(masked_spec, dtype = tf.float64)


def time_mask(spectrogram, T=30):
  # Adding a time mask
  t = tf.random.uniform([], minval = 0, maxval = T, dtype = tf.int32)
  _, tau, _ = spectrogram.shape
  t0 = tf.random.uniform([], minval = 0, maxval = tau-t, dtype = tf.int32)
  res1 = spectrogram[:,:t0]
  res2 = spectrogram[:, t0+t:]
  mask = tf.zeros_like(spectrogram[:, t0:t0+t])
  masked_spec = tf.concat([res1, mask, res2], axis = 1)
  # assert masked_spec.shape == spectrogram.shape
  return tf.cast(masked_spec, dtype = tf.float64)


In [None]:
def one_hot(vals, classes):
  result = []
  for val in vals:
    temp = np.zeros((classes))
    temp[val] = 1
    result.append(temp)
  return np.array(result)

one_hot([0, 1, 2, 3], 4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [None]:
# Defining labels
labels = np.unique(audio_labels)
label2idx = {label:i for i, label in enumerate(labels)}
print("Label to Index :", label2idx)
audio_labels_idx = np.array([label2idx[label] for label in audio_labels])
print(audio_labels_idx[:10]) 
audio_labels_idx = one_hot(audio_labels_idx, 10)
audio_labels_idx[:5]

Label to Index : {'air_conditioner': 0, 'car_horn': 1, 'children_playing': 2, 'dog_bark': 3, 'drilling': 4, 'engine_idling': 5, 'gun_shot': 6, 'jackhammer': 7, 'siren': 8, 'street_music': 9}
[3 3 3 3 3 3 3 3 3 6]


array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]])

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_specs_padded, audio_labels_idx[:len(X_specs_padded)], shuffle=True, test_size = 0.1)

In [None]:
del X_specs_padded

In [None]:
# Building the neural network for  classification

from tensorflow.keras.models import Sequential, load_model 
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling3D

In [None]:
try:
  event_model = load_model("drive/MyDrive/aed_model/event_model")
  print("Loaded model from memory.")
except:
  print("Defining a new model.")
  event_model = Sequential([
                            Conv2D(32, (3, 3), activation='relu', input_shape  = (513, 400, 1)),
                            Conv2D(16, (3, 3), activation = 'relu'),
                            Conv2D(8  , (3, 3), activation = 'relu'),
                            Flatten(),
                            Dropout(0.5),
                            Dense(200, activation = 'relu'),
                            Dropout(0.4),
                            Dense(100, activation = 'relu'),
                            Dropout(0.5),
                            Dense(10, activation = 'softmax')
                           ], 
                            name = 'Event_Model')

  event_model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
event_model.summary()

Loaded model from memory.
Model: "Event_Model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 511, 398, 32)      320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 509, 396, 16)      4624      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 507, 394, 8)       1160      
_________________________________________________________________
flatten (Flatten)            (None, 1598064)           0         
_________________________________________________________________
dropout (Dropout)            (None, 1598064)           0         
_________________________________________________________________
dense (Dense)                (None, 200)               319613000 
_________________________________________________________________
dropout_1 (Dropout)          

In [None]:
def plot_hist(acc, loss, vacc, vloss):
  fig, (ax1, ax2) = plt.subplots(1, 2)

  ax1.plot(acc, label  = 'acc')
  ax1.plot(vacc, label = 'val_acc')
  ax1.legend()

  ax2.plot(loss, label='loss')
  ax2.plot(vloss, label='val_loss')
  ax2.legend()

  plt.suptitle("Training History")
  plt.show()

# plot_hist(acc, loss, vacc, vloss)

In [None]:
acc= []
loss = []
vacc = []
vloss = []
for step in range(3): 
  print(f"Step {step} initiated.")
  f = np.random.choice([time_mask, frequency_mask])
  L = len(X_train)
  for i in range(L):
    X_train[i] = f(X_train[i])
    print(f'\rDataPrep Progress : {(i+1)*100/L:0.2f}%', end = '')
  print()
  print("Data prepped.")
  hist = event_model.fit(X_train, y=y_train,validation_data = (X_val, y_val) ,epochs = 7)
  acc += (hist.history['accuracy'])
  loss += (hist.history['loss'])
  vacc += (hist.history['val_accuracy'])
  vloss += (hist.history['val_loss'])
  plot_hist(acc, loss, vacc, vloss)
  if vacc[-1] > 0.7102:
    try:
      event_model.save('drive/MyDrive/aed_model/event_model')
    except:
      event_model.save_weights('drive/MyDrive/aed_model/event_model')
      print("Saved weights only.")

In [None]:
!ls -r drive/MyDrive/aed_model/event_model

# Prediction Script

In [4]:
try:
    import tensorflow as tf
    from tensorflow.keras.models import Sequential, load_model
    from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D
except:
    print("Can't load tensorflow. Are you sure you have downloaded the pip package?")
import numpy as np
import os
import glob
import pandas as pd

# Global Variables - Required file/folder paths
MODEL_PATH = "drive/MyDrive/aed_model/event_model"  # Add model path here
TASK1_PATH = "feats"  # Add task1 test cases folder path here
TASK1_SUBMISSION_PATH = "190016_task1_labels_test.csv"  # Add the task 1 submission csv path here

# (Pre)Processing Functions
def pad_spectrogram(spec, pad_shape=(513, 400)):
    x, y = pad_shape
    if spec.shape == pad_shape:
        return spec
    padded = np.zeros((x, y))
    spec = spec[:x, :y]
    padded[: spec.shape[0], : spec.shape[1]] = spec
    return padded

# Variables.
event_model = load_model(MODEL_PATH)
event_model.summary()
label2idx = {
    "air_conditioner": 0,
    "car_horn": 1,
    "children_playing": 2,
    "dog_bark": 3,
    "drilling": 4,
    "engine_idling": 5,
    "gun_shot": 6,
    "jackhammer": 7,
    "siren": 8,
    "street_music": 9,
}
idx2label = {label2idx[label]: label for label in label2idx}

# Prediction Pipeline for Task 1
## Reading the Test Cases
task1_files = glob.glob(TASK1_PATH + "/*.npy")
x_test = []
for file in task1_files:
    spec = np.load(file)
    # Padding the file
    x_test.append(pad_spectrogram(spec)[:, :, np.newaxis])
x_test = np.array(x_test)

## Getting the Predictions
y_pred = event_model.predict(x_test)
predictions = []
for pred in y_pred:
    idx = np.argmax(pred)
    label = idx2label[idx]
    predictions.append(label)
predictions = np.array(predictions)

##  Saving the Predicted Labels in the required format
submission_task1_files = [filename.split('/')[-1].split('.')[0] for filename in task1_files]
submission = {"file": submission_task1_files, "labels": predictions}
submission_frame = pd.DataFrame(submission)
submission_frame.to_csv(TASK1_SUBMISSION_PATH, header=False, index=False)
print(f"Submission saved to {TASK1_SUBMISSION_PATH}")

Model: "Event_Model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 511, 398, 32)      320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 509, 396, 16)      4624      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 507, 394, 8)       1160      
_________________________________________________________________
flatten (Flatten)            (None, 1598064)           0         
_________________________________________________________________
dropout (Dropout)            (None, 1598064)           0         
_________________________________________________________________
dense (Dense)                (None, 200)               319613000 
_________________________________________________________________
dropout_1 (Dropout)          (None, 200)               