In [1]:
%matplotlib inline

import os
import numpy as np
import tensorflow as tf
import data_loader
import config
import audio_processing

from tqdm import tqdm

In [2]:
conf = config.get_config()
f = "bright_vs_dark"
conf.update({
    "features": [f],
    "valid_split": 0.0
})

train, valid = data_loader.data_loader(conf)

2021-05-07 12:06:12.475 | INFO     | data_loader:data_loader:28 - Loading csv and checking audio files
2021-05-07 12:06:12.490 | INFO     | data_loader:data_loader:31 - Creating dataset
2021-05-07 12:06:12.491 | INFO     | data_loader:data_loader:34 - Validation split is 0.0
2021-05-07 12:06:12.670 | INFO     | data_loader:data_loader:47 - Creating train and valid splits


In [3]:
min_level = 50 - conf.get("threshold")
max_level = 50 + conf.get("threshold")

num_examples = len(train)

x_train = np.zeros((num_examples, 513, 3))
y_train = np.zeros((num_examples, ))

for i, (key, value) in tqdm(enumerate(train.items())):
    current_class = 1
    file_path = os.path.join(conf.get("base_dir"), f"{key}.wav")

    for _, feature in enumerate(conf.get("features")):
        current_val = int(value[feature])
        if current_val < min_level:
            current_class = 0
        elif current_val > max_level:
            current_class = 2

    y_train[i] = current_class
    h, p, r = audio_processing.get_hpr(file_path, conf)
    h = np.abs(h).mean(axis=-1)
    p = np.abs(p).mean(axis=-1)
    r = np.abs(r).mean(axis=-1)
    x_train[i, :, 0] = h
    x_train[i, :, 1] = p
    x_train[i, :, 2] = r

1807it [01:32, 19.59it/s]


In [4]:
c, u = np.unique(y_train, return_counts=True)

print(u / u.sum())

[0.59988932 0.15716657 0.24294411]


In [None]:
x_train = x_train / x_train.max()

In [20]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(513, 3)),
    tf.keras.layers.Dense(512, activation="relu"),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(3, activation="softmax")
])

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_4 (Flatten)          (None, 1539)              0         
_________________________________________________________________
dense_12 (Dense)             (None, 512)               788480    
_________________________________________________________________
dropout_8 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 64)                32832     
_________________________________________________________________
dropout_9 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_14 (Dense)             (None, 3)                 195       
Total params: 821,507
Trainable params: 821,507
Non-trainable params: 0
________________________________________________

In [22]:
_ = model.fit(
    x_train, y_train,
    validation_split=0.1,
    epochs=10,
    batch_size=16,
    verbose=2
)

Epoch 1/10
35/35 - 0s - loss: 0.8413 - accuracy: 0.6284 - val_loss: 0.8214 - val_accuracy: 0.6129
Epoch 2/10
35/35 - 0s - loss: 0.8365 - accuracy: 0.6373 - val_loss: 0.8177 - val_accuracy: 0.6129
Epoch 3/10
35/35 - 0s - loss: 0.8297 - accuracy: 0.6338 - val_loss: 0.8292 - val_accuracy: 0.5968
Epoch 4/10
35/35 - 0s - loss: 0.8174 - accuracy: 0.6463 - val_loss: 0.8344 - val_accuracy: 0.5806
Epoch 5/10
35/35 - 0s - loss: 0.8167 - accuracy: 0.6445 - val_loss: 0.8358 - val_accuracy: 0.5806
Epoch 6/10
35/35 - 0s - loss: 0.8202 - accuracy: 0.6409 - val_loss: 0.8408 - val_accuracy: 0.5806
Epoch 7/10
35/35 - 0s - loss: 0.8217 - accuracy: 0.6409 - val_loss: 0.8237 - val_accuracy: 0.5968
Epoch 8/10
35/35 - 0s - loss: 0.8078 - accuracy: 0.6463 - val_loss: 0.8214 - val_accuracy: 0.5968
Epoch 9/10
35/35 - 0s - loss: 0.8167 - accuracy: 0.6373 - val_loss: 0.8291 - val_accuracy: 0.5806
Epoch 10/10
35/35 - 0s - loss: 0.8075 - accuracy: 0.6517 - val_loss: 0.8197 - val_accuracy: 0.5968
