In [None]:
from collections import Counter

import numpy as np
import seaborn as sns
from scipy.linalg import svd
from sklearn import model_selection
from sklearn.metrics import (
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score,
)
import tensorflow as tf
from tensorflow.keras import (
    models,
    layers,
)

Import data

In [None]:
music_data = np.load('drive/MyDrive/collab_data/music_data.npy') # 10500x30x79
other_data = np.load('drive/MyDrive/collab_data/other_data.npy') # 10500x30x79
num_samples = music_data.shape[0]
time_frames = music_data.shape[2]

Add labels and combine the data

In [None]:
X = np.concatenate((music_data, other_data), axis=0)
y = np.array([1]*num_samples + [0]*num_samples)

X30 = X[..., :30]
X10 = X[..., :10]
X20 = X[..., :20]

def accuracy_score(true, pred):
  return np.mean(true == pred)


PCA

In [None]:
XPCA = np.empty((X.shape[0], 10, 79))
for i, x in enumerate(X):
  U, S, Vh = np.linalg.svd(x, full_matrices=False)
  V = Vh.T
  Z = x.T @ U[:, :10]
  XPCA[i] = Z.T

Create ANN for sample classification

In [None]:
cf_matrices = []
accuracies = []
precisions = []
recalls = []
f1_scores = []
for x in [X]:#[X, X30, X20, X10, XPCA]:
  print('='*20 + f' Training model for {x.shape[2]} timeframes per sample ' + '='*20)
  ann = models.Sequential()
  ann.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(*x[0].shape, 1)))
  ann.add(layers.MaxPooling2D((2, 2)))
  ann.add(layers.Dropout(0.2))
  ann.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(*x[0].shape, 1)))
  ann.add(layers.MaxPooling2D((2, 2)))
  ann.add(layers.Dropout(0.2))
  ann.add(layers.Flatten())
  ann.add(layers.Dense(64, activation='relu'))
  ann.add(layers.Dropout(0.2))
  ann.add(layers.Dense(32, activation='relu'))
  ann.add(layers.Dense(1, activation='sigmoid'))

  callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

  ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  X_train, X_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.2, stratify=y, random_state=123)
  X_train, X_val, y_train, y_val = model_selection.train_test_split(X_train, y_train, test_size=0.2, random_state=123)

  history = ann.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), callbacks=[callback])

  y_pred = ann.predict(X_test) > 0.5
  cf_matrices.append(confusion_matrix(y_pred, y_test))
  accuracy = accuracy_score(y_test, y_pred[:, 0])
  precision = precision_score(y_test, y_pred)
  recall = recall_score(y_test, y_pred)
  f1 = f1_score(y_test, y_pred)
  print(f'Accuracy: {accuracy:3.3f}')
  print(f'Precision: {precision:3.3f}')
  print(f'Recall: {recall:3.3f}')
  print(f'F1 Score: {f1:3.3f}')
  accuracies.append(accuracy)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Accuracy: 0.983
Precision: 0.997
Recall: 0.969
F1 Score: 0.983


In [None]:
 print(X_val.shape)
 ann.predict(X_test[0, None])

(3360, 30, 79)


array([[0.981402]], dtype=float32)

In [None]:
for m in cf_matrices:
  print(m)

print(accuracies)

[[2094   65]
 [   6 2035]]
[0.9830952380952381]


In [None]:
from pandas.io.pickle import to_pickle
import pandas as pd
from time import perf_counter

test_data = np.load('drive/MyDrive/collab_data/test_data.npy')
tic = perf_counter()
for _ in range(100):
  test_predict = ann.predict(test_data)
# df = pd.DataFrame(test_predict, columns=['prediction'])
# df.to_csv('sound_classified.csv')
toc = perf_counter()
print((toc-tic)/100)

ann.save('cnn.h5', save_format='h5')

0.5483554661500011
