In [26]:
import os
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_io as tfio
import librosa, librosa.display

import IPython.display as ipd

In [118]:
data_path = os.path.join('..','..','data')


config = {
    "batch_size": 8
}

std = 0.22
mean = 0.08

In [119]:
def create_dataset(filename, sample_duration):
    df = pd.read_csv(filename, sep='\t').dropna()
    files = '../../data/multitracks/' + df['song'] + '/' + df['track']
    labels = df['label']

    dataset = tf.data.Dataset.from_tensors((files.values, tf.cast(labels.values, tf.int32))).unbatch()
    dataset = dataset.map(lambda filename, label: (tfio.IOTensor.graph(tf.int16).from_audio(filename).to_tensor(), label))
    dataset = dataset.map(lambda wav, label: (wav, label, tf.shape(wav)[0] / 44100))
    dataset = dataset.map(lambda wav, label, duration: (create_feature(wav, duration, sample_duration), create_label(label, duration, sample_duration)), num_parallel_calls=tf.data.experimental.AUTOTUNE)
    return dataset.unbatch().filter(has_signal).prefetch(32)

@tf.function
def create_feature(wav, duration, sample_duration):
    wav = tf.cast(wav, tf.float32) / 32767
    #wav = tf.math.reduce_mean(wav, axis=1)  # make mono
    mono = tf.math.reduce_mean(wav, axis=1)  # make mono
    # spectrogram (100 steps/sec with 1025 channels)
    fft = tf.signal.stft(mono, 2048, 441, pad_end=True)
    fft = tf.abs(fft)
    spec = tf.math.log1p(fft)# * fft
    
    spec = scale(spec, std, mean)

    # split into timesteps (10ms * chunk_size)
    spec = tf.reshape(spec, [-1, 100 * sample_duration, 1025])
    return spec

@tf.function
def create_label(y, duration, sample_duration):
    y = tf.one_hot(y,3)
    y = tf.reshape(y, [-1, 3])
    return tf.repeat(y, int(duration / sample_duration), 0)

@tf.function
def has_signal(wav, label):
    return tf.math.reduce_max(tf.abs(wav)) > 0.1

@tf.function
def scale(x, std, mean):
    return (x - mean) / std

In [120]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense

In [121]:
model = Sequential()
model.add(GRU(1, return_sequences=False, input_shape=(None, 1025)))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy','binary_accuracy'])
model.summary()

Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_15 (GRU)                 (None, 1)                 3084      
_________________________________________________________________
dense_13 (Dense)             (None, 3)                 6         
Total params: 3,090
Trainable params: 3,090
Non-trainable params: 0
_________________________________________________________________


In [117]:
train = create_dataset(os.path.join(data_path,'classification_al','train.csv'),10).take(50)
for x,y in train.as_numpy_iterator():
    #vals = x.reshape(-1)
    print(np.std(x))
    #plt.figure()
    #plt.hist(vals, bins=10, range=(0,0.1))
    #plt.show()
    #break

0.24307898
0.2526298
0.09056211
0.13342454
0.1626367
0.09940865
0.24771799
0.3643004
0.29029447
0.05667586
0.024456015
0.19681206
0.17358142
0.24179499
0.21352437
0.12536286
0.18132336
0.20680566
0.23869698
0.27562138
0.2166498
0.20339516
0.26985937
0.25677386
0.19807649
0.15130658
0.24211769
0.295714
0.32899982
0.08144931
0.10811776
0.27780747
0.27776062
0.32640076
0.22089773
0.15547466
0.2740978
0.12892884
0.2587987
0.27342868
0.33881405
0.34255624
0.30078632
0.002476396
0.22392088
0.25881702
0.3203019
0.28831795
0.27265388
0.34180203


## Baseline Runs
3 Run:
- without any normalization (`spec = fft^2`)
- use `log(fft)`
- standardize & `log(fft)`

as filter (remove silence) is applied afterwards samples might differ. 

In [70]:
train = create_dataset(os.path.join(data_path,'classification_al','train.csv'),1).batch(config["batch_size"])
dev = create_dataset(os.path.join(data_path,'classification_al','dev.csv'),1).batch(config["batch_size"])
model.fit(train, validation_data=dev)



<tensorflow.python.keras.callbacks.History at 0x7f6c2c3e9fd0>

In [108]:
train = create_dataset(os.path.join(data_path,'classification_al','train.csv'),1).batch(config["batch_size"])
dev = create_dataset(os.path.join(data_path,'classification_al','dev.csv'),1).batch(config["batch_size"])
model.fit(train, validation_data=dev)function



<tensorflow.python.keras.callbacks.History at 0x7f6c2c482f50>

In [122]:
train = create_dataset(os.path.join(data_path,'classification_al','train.csv'),1).batch(config["batch_size"])
dev = create_dataset(os.path.join(data_path,'classification_al','dev.csv'),1).batch(config["batch_size"])
model.fit(train, validation_data=dev)



<tensorflow.python.keras.callbacks.History at 0x7f6c05e1d4d0>