In [None]:
import pickle
from pathlib import Path

import librosa
import keras
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import BatchNormalization, Conv1D, Dense, Dropout, Flatten, LeakyReLU, MaxPooling1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers, optimizers

In [None]:
DATA_DIR = Path("data/UrbanSound8K/")
AUDIO_DIR = DATA_DIR / "audio"
META_CSV = DATA_DIR / "metadata" / "UrbanSound8K.csv"
NUM_CLASSES = 10
SAMPLING_RATE = 16000
INPUT_SIZE = 16000
STRIDE = 8000

WEIGHT_DECAY = 1e-4 # l2 regularization hyperparameter

In [None]:
meta = pd.read_csv(META_CSV)
meta

In [None]:
def to_chunks(X, y, chunk_size, stride):
    """Split a numpy array into chunks of given size jumping stride indices each time.
    Any chunks of smaller size are padded with 0 at the end."""
    chunks = []
    for start in range(0, len(X), stride):
        chunk = X[start : start + chunk_size]
        if len(chunk) == chunk_size:
            chunks.append(chunk)
        else:
            chunk = np.pad(chunk, (0, chunk_size - len(chunk)))
            chunks.append(chunk)
            break
    #chunks = [X[end - chunk_size : end] for end in range(chunk_size, len(X)+1, stride)]
    #X = np.array([np.pad(chunk, (0, chunk_size - len(chunk))) for chunk in chunks])
    y = np.repeat(y, len(chunks))
    return np.array(chunks), y, len(chunks)

In [None]:
def load_fold_data(meta, fold):
    """Load the audio and label data for given fold"""
    entries = meta[meta["fold"] == fold]
    fold_dir = AUDIO_DIR / f"fold{fold}"
    filenames = [fold_dir / filename for filename in entries["slice_file_name"]]
    audio = [librosa.load(filename, sr=SAMPLING_RATE)[0] for filename in filenames]
    classes = entries["classID"]

    X, y, chunk_lens = zip(*[to_chunks(x, y, INPUT_SIZE, STRIDE) for x, y in zip(audio, classes)])
    X, y = np.concatenate(X), np.concatenate(y)

    X = X[..., np.newaxis]  # add new axis required by tensorflow
    y = to_categorical(y, num_classes=NUM_CLASSES)  # convert to one-hot encoding
    return X, y, np.array(chunk_lens)

In [None]:
%%time

# run this cell only if the pickled data is in some way incorrect

#data = zip(*[load_fold_data(meta, fold) for fold in range(1, 11)])
#with (DATA_DIR / "foldData.pickle").open("bw") as f:
#    pickle.dump(data, f)

In [None]:
with (DATA_DIR / "foldData.pickle").open("br") as f:
    fold_Xs, fold_ys, fold_chunk_lens = pickle.load(f)

In [None]:
# check GPUs available
gpus = tf.config.list_physical_devices("GPU")
gpus

In [None]:
tf.config.set_visible_devices(gpus[1:], "GPU")

In [None]:
def create_model(input_shape):
    model = Sequential()
    # CONV1
    model.add(Conv1D(16, kernel_size=64, activation="relu", input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=8, strides=8))

    # CONV2
    model.add(Conv1D(32, kernel_size=32, strides=2, activation="relu"))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=8, strides=8))

    # CONV3
    model.add(Conv1D(64, kernel_size=16, strides=2, activation="relu"))
    model.add(BatchNormalization())

    # CONV4
    model.add(Conv1D(128, kernel_size=8, strides=2, activation="relu"))
    model.add(BatchNormalization())

    # FC
    model.add(Flatten())
    model.add(Dense(128, activation="relu"))
    model.add(Dropout(0.25))
    model.add(Dense(64, activation="relu"))
    model.add(Dropout(0.25))
    model.add(Dense(10, activation="softmax"))

    return model

In [None]:
def create_model(input_shape):
    model = Sequential()
    # CONV1
    model.add(Conv1D(16, kernel_size=64, strides=2, activation="relu",
                     #kernel_regularizer=regularizers.l2(WEIGHT_DECAY),
                     input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=8, strides=8))

    # CONV2
    model.add(Conv1D(32, kernel_size=32, strides=2, activation="relu",
                    #kernel_regularizer=regularizers.l2(WEIGHT_DECAY)
                    ))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=8, strides=8))

    # CONV3
    model.add(Conv1D(64, kernel_size=16, strides=2, activation="relu",
                    #kernel_regularizer=regularizers.l2(WEIGHT_DECAY)
                    ))
    model.add(BatchNormalization())

    # CONV4
    model.add(Conv1D(128, kernel_size=8, strides=2, activation="relu",
                    #kernel_regularizer=regularizers.l2(WEIGHT_DECAY)
                    ))
    model.add(BatchNormalization())

    # FC
    model.add(Flatten())
    model.add(Dense(128, activation="relu"))
    model.add(Dropout(0.25))
    model.add(Dense(64, activation="relu"))
    model.add(Dropout(0.25))
    model.add(Dense(10, activation="softmax"))

    return model

In [None]:
def create_model_mlp(input_shape):
    model = Sequential()
    # CONV1
    #model.add(Conv1D(16, kernel_size=64, strides=2, activation="relu", input_shape=input_shape))
    #model.add(BatchNormalization())
    #model.add(MaxPooling1D(pool_size=8, strides=8))

    # CONV2
    #model.add(Conv1D(32, kernel_size=32, strides=2, activation="relu"))
    #model.add(BatchNormalization())
    #model.add(MaxPooling1D(pool_size=8, strides=8))

    # CONV3
    #model.add(Conv1D(64, kernel_size=16, strides=2, activation="relu"))
    #model.add(BatchNormalization())

    # CONV4
    #model.add(Conv1D(128, kernel_size=8, strides=2, activation="relu"))
    #model.add(BatchNormalization())

    # FC
    model.add(Flatten(input_shape=input_shape))
    model.add(Dense(512, activation="relu"))
    model.add(Dropout(0.25))
    model.add(Dense(512, activation="relu"))
    model.add(Dropout(0.25))
    model.add(Dense(10, activation="softmax"))

    return model

In [None]:
def create_model(input_shape):
    model = Sequential()
    # CONV1
    model.add(Conv1D(16, kernel_size=16, strides=8,
                     input_shape=input_shape))
    model.add(BatchNormalization())

    # CONV2
    #model.add(ZeroPadding1D(padding=1))
    model.add(Conv1D(16, kernel_size=6, strides=1))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))

    # CONV3
    #model.add(ZeroPadding1D(padding=1))
    model.add(Conv1D(32, kernel_size=6, strides=2))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))

    # CONV4
    #model.add(ZeroPadding1D(padding=1))
    model.add(Conv1D(64, kernel_size=6, strides=2))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))

    # CONV5
    #model.add(ZeroPadding1D(padding=1))
    model.add(Conv1D(128, kernel_size=6, strides=2))
    model.add(LeakyReLU())
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))

    # CONV6
    #model.add(ZeroPadding1D(padding=1))
    model.add(Conv1D(128, kernel_size=3, strides=2))
    model.add(BatchNormalization())
    model.add(LeakyReLU())

    # FC
    model.add(Flatten())
    model.add(Dense(512))
    model.add(LeakyReLU())
    model.add(Dense(NUM_CLASSES, activation="softmax"))

    return model

In [None]:
X_train, y_train = np.concatenate(fold_Xs[:9]), np.concatenate(fold_ys[:9])
X_test, y_test = fold_Xs[9], fold_ys[9]

In [None]:
#model = Sequential()
## CONV1
#model.add(
#    Conv1D(16, kernel_size=64, strides=2, activation="relu", input_shape=X_train.shape[1:])
#)
## POOL
#model.add(MaxPooling1D(pool_size=8))
## CONV2
#model.add(
#    Conv1D(
#        16, kernel_size=64, strides=2, activation="relu", input_shape=(None, INPUT_SIZE)
#    )
#)
## POOL
#model.add(MaxPooling1D(pool_size=8))
## FC7
#model.add(Flatten())
#model.add(Dense(10, activation="softmax"))
#
#model.summary()
## compile with categorical crossentropy since
## this is multi-class classification
#model.compile(
#    loss="categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"]
#)

In [None]:
model = create_model(input_shape=X_train.shape[1:])

In [None]:
model.summary()

In [None]:
model = create_model(input_shape=X_train.shape[1:])
model.summary()

In [None]:
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=64, activation='relu', input_shape=(X_train.shape[1:])))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model = Sequential()
# CONV1
model.add(Conv1D(16, kernel_size=64, strides=2, activation="relu", input_shape=X_train.shape[1:]))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=8, strides=8))

# CONV2
model.add(Conv1D(32, kernel_size=32, strides=2, activation="relu"))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=8, strides=8))

# CONV3
model.add(Conv1D(64, kernel_size=16, strides=2, activation="relu"))
model.add(BatchNormalization())

# CONV4
model.add(Conv1D(128, kernel_size=8, strides=2, activation="relu"))
model.add(BatchNormalization())

# FC
model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.25))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.25))
model.add(Dense(10, activation="softmax"))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.compile(#optimizer=tf.keras.optimizers.Adadelta(learning_rate=1),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
              #loss=tf.keras.losses.MeanSquaredLogarithmicError(),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

In [None]:
# callbacks to save and stop early
checkpointer = ModelCheckpoint(filepath="model.weights.best.hdf5", save_best_only=True, verbose=1)
escallback = EarlyStopping(monitor="val_loss", min_delta=0, patience=50, verbose=1)

In [None]:
# do training and save weights to disk
hist = model.fit(
    X_train,
    y_train,
    batch_size=32,
    epochs=20,
    validation_split=1/9,
    callbacks=[checkpointer, escallback],
    verbose=1,
    shuffle=True,
)

In [None]:
from matplotlib import pyplot
pyplot.plot(hist.history["loss"], label="train")
pyplot.plot(hist.history["val_loss"], label="test")
pyplot.legend()
pyplot.show()

In [None]:
# do training and save weights to disk
hist = model.fit(
    X_train,
    y_train,
    batch_size=100,
    epochs=100,
    validation_split=1/9,
    callbacks=[checkpointer, escallback],
    verbose=1,
    shuffle=True,
)

In [None]:
hist

In [None]:
# load weights
model.load_weights("model.weights.best.hdf5")

In [None]:
# predictions
y_pred = model.predict(X_test)

In [None]:
X_test.shape, y_pred.shape, y_test.shape

In [None]:
y_test.sum(axis=0)/y_test.sum()

In [None]:
y_pred_final = np.array([res.mean(axis=0).argmax() for res in np.split(y_pred, fold_chunk_lens[9].cumsum()[:-1])])
y_test_final = np.array([res.mean(axis=0).argmax() for res in np.split(y_test, fold_chunk_lens[9].cumsum()[:-1])])
(y_pred_final == y_test_final).sum()/len(y_pred_final)

In [None]:
# evaluate on test set
score = model.evaluate(X_test, y_test, verbose=1)
print("\n", "Test accuracy:", score[1])

In [None]:
scores = {}

In [None]:
for val_idx in range(10):
    X_test, y_test = fold_Xs[val_idx], fold_ys[val_idx]
    X_train = np.concatenate([fold_Xs[i] for i in range(10) if i != val_idx])
    y_train = np.concatenate([fold_ys[i] for i in range(10) if i != val_idx])

    model = create_model(input_shape=X_train.shape[1:])
    model.compile(optimizer=tf.keras.optimizers.Adadelta(learning_rate=1),
                  loss=tf.keras.losses.MeanSquaredLogarithmicError(),
                  metrics=["accuracy"])
    # callbacks to save and stop early
    checkpointer = ModelCheckpoint(filepath=f"model.weights.best{val_idx}.hdf5", save_best_only=True, verbose=1)
    escallback = EarlyStopping(monitor="val_loss", min_delta=0, patience=30, verbose=1)
    # do training and save weights to disk
    hist = model.fit(
        X_train,
        y_train,
        batch_size=100,
        epochs=100,
        validation_split=1/9,
        callbacks=[checkpointer, escallback],
        verbose=1,
        shuffle=True,
    )
    # load weights
    #model.load_weights(f"model.weights.best{val_idx}.hdf5")

    model.save(f"model{val_idx}")
    
    # evaluate on test set
    score = model.evaluate(X_test, y_test, verbose=1)[1]
    scores[val_idx] = score
    print("\n", "Test accuracy:", score)

    # predictions on test set
    y_pred = model.predict(X_test)
    y_pred_final = np.array([res.mean(axis=0).argmax() for res in
                             np.split(y_pred, fold_chunk_lens[val_idx].cumsum()[:-1])])
    y_test_final = np.array([res.mean(axis=0).argmax() for res in
                             np.split(y_test, fold_chunk_lens[val_idx].cumsum()[:-1])])
    score_final = (y_pred_final == y_test_final).sum()/len(y_pred_final)
    print("\n", "Final Test accuracy:", score_final)

    scores[val_idx] = (hist, score, score_final, y_pred_final, y_test_final)
    print(scores)

In [None]:
scores

In [None]:
for val_idx in range(10):
    X_test, y_test = fold_Xs[val_idx], fold_ys[val_idx]
    #X_train = np.concatenate([fold_Xs[i] for i in range(10) if i != val_idx])
    #y_train = np.concatenate([fold_ys[i] for i in range(10) if i != val_idx])

    model = keras.models.load_model(f"model{val_idx}")
    
    # evaluate on test set
    score = model.evaluate(X_test, y_test, verbose=1)[1]
    #scores[val_idx] = score
    print("\n", val_idx, "Test accuracy:", score)

    # predictions on test set
    y_pred = model.predict(X_test)
    y_pred_final = np.array([res.mean(axis=0).argmax() for res in
                             np.split(y_pred, fold_chunk_lens[val_idx].cumsum()[:-1])])
    y_test_final = np.array([res.mean(axis=0).argmax() for res in
                             np.split(y_test, fold_chunk_lens[val_idx].cumsum()[:-1])])
    score_final = (y_pred_final == y_test_final).sum()/len(y_pred_final)
    print("\n", "Final Test accuracy:", score_final)

    #scores[val_idx] = (hist, score, score_final, y_pred_final, y_test_final)
    #print(scores)

In [None]:
y_pred = model.predict(X_test)
y_pred_final = np.array([res.mean(axis=0).argmax() for res in
                         np.split(y_pred, fold_chunk_lens[val_idx].cumsum()[:-1])])
y_test_final = np.array([res.mean(axis=0).argmax() for res in
                         np.split(y_test, fold_chunk_lens[val_idx].cumsum()[:-1])])
score_final = (y_pred_final == y_test_final).sum()/len(y_pred_final)

In [None]:
score_final

In [None]:
tf.math.confusion_matrix(y_test_final, y_pred_final, num_classes=NUM_CLASSES)

In [None]:
from matplotlib import pyplot
pyplot.plot(hist.history["accuracy"], label="train")
pyplot.plot(hist.history["val_accuracy"], label="test")
pyplot.legend()
pyplot.show()

In [None]:
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
array = tf.math.confusion_matrix(y_test_final, y_pred_final, num_classes=NUM_CLASSES)
df_cm = pd.DataFrame(array, index = [i for i in range(10)],
                  columns = [i for i in range(10)])
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True)

In [None]:
res = tf.math.confusion_matrix(y_test_final, y_pred_final, num_classes=NUM_CLASSES).numpy()
res

In [None]:
res = np.delete(res, 2, 0)
res = np.delete(res, 2, 1)
res

In [None]:
res.diagonal().sum()/res.sum()