In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
import keras
import tensorflow
from keras.optimizers import SGD
import collections

import sys
sys.path.append("..")
from utils import *
from funcs import *

from fcnn_att import model_fcnn
from DCASE_training_functions import *

from tensorflow import ConfigProto
from tensorflow import InteractiveSession

%load_ext autoreload
%autoreload 2

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)


# Please put your csv file for train and validation here.
# If you dont generate the extra augmented data, please use 
# ../evaluation_setup/fold1_train.csv and delete the aug_csv part
train_csv = '../evaluation_setup_v2/fold1_train_10.csv'
val_csv = '../evaluation_setup_v2/fold1_evaluate_10.csv'
#aug_csv = '../evaluation_setup_v2/fold1_train.csv'
#aug_csv = 'evaluation_setup/fold1_train_a_2003.csv'

feat_path = '../features/logmel128_scaled/'
#aug_path = 'features/logmel128_reverb_scaled/'

experiments = 'training_sample_run/'

if not os.path.exists(experiments):
    os.makedirs(experiments)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload




In [2]:
#train_aug_csv = generate_train_aug_csv(train_csv, aug_csv, feat_path, aug_path, experiments)
train_aug_csv = train_csv

num_audio_channels = 1
num_freq_bin = 128
num_classes = 10
max_lr = 0.1
batch_size = 32
num_epochs = 50
mixup_alpha = 0.4
crop_length = 400
sample_num = len(open(train_aug_csv, 'r').readlines()) - 1
print(f'Numbers of training samples: {sample_num}')


# compute delta and delta delta for validation data
data_val, y_val = load_data_2020(feat_path, val_csv, num_freq_bin, 'logmel')
print(f'{len(y_val)} validation samples with values {set(y_val)}')
print(f'{collections.Counter(y_val)}\n')
print(f'validation data shape: {data_val.shape}')


data_deltas_val = deltas(data_val)
print(f'validation data deltas shape: {data_deltas_val.shape}')

data_deltas_deltas_val = deltas(data_deltas_val)
print(f'validation data delta deltas shape: {data_deltas_val.shape}')

data_val = np.concatenate((data_val[:,:,4:-4,:],data_deltas_val[:,:,2:-2,:],data_deltas_deltas_val),axis=-1)
print(f'validation data concat shape: {data_val.shape}')

y_val = keras.utils.to_categorical(y_val, num_classes)

Numbers of training samples: 13962
2968 validation samples with values {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
Counter({1: 297, 2: 297, 3: 297, 4: 297, 5: 297, 6: 297, 7: 297, 8: 297, 0: 296, 9: 296})

validation data shape: (2968, 128, 431, 1)
validation data deltas shape: (2968, 128, 427, 1)
validation data delta deltas shape: (2968, 128, 427, 1)
validation data concat shape: (2968, 128, 423, 3)


In [6]:
model = model_fcnn(num_classes, input_shape=[num_freq_bin, None, 3*num_audio_channels], num_filters=[48, 96, 192], wd=0)

In [7]:
model.compile(loss='categorical_crossentropy',
              optimizer =SGD(lr=max_lr,decay=1e-6, momentum=0.9, nesterov=False),
              metrics=['accuracy'])

model.summary()

lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size), Tmult=2,
                              initial_lr=max_lr, min_lr=max_lr*1e-4,
                              epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0,127.0]) 
save_path = experiments + "/model-{epoch:02d}-{val_acc:.4f}.hdf5"
checkpoint = keras.callbacks.ModelCheckpoint(save_path, monitor='val_acc', verbose=1, save_best_only=False, mode='max')
callbacks = [lr_scheduler, checkpoint]


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 128, None, 3) 0                                            
__________________________________________________________________________________________________
batch_normalization_12 (BatchNo (None, 128, None, 3) 12          input_2[0][0]                    
__________________________________________________________________________________________________
zero_padding2d_9 (ZeroPadding2D (None, 132, None, 3) 0           batch_normalization_12[0][0]     
__________________________________________________________________________________________________
conv2d_10 (Conv2D)              (None, 64, None, 144 10800       zero_padding2d_9[0][0]           
__________________________________________________________________________________________________
batch_nor

In [8]:
# Due to the memory limitation, in the training stage we split the training data
train_data_generator = Generator_timefreqmask_withdelta_splitted(feat_path, train_aug_csv, num_freq_bin,
                              batch_size=batch_size,
                              alpha=mixup_alpha,
                              crop_length=crop_length, splitted_num=20, classes=10)()

history = model.fit_generator(train_data_generator,
                              validation_data=(data_val, y_val),
                              epochs=num_epochs, 
                              verbose=1, 
                              workers=4,
                              max_queue_size = 100,
                              callbacks=callbacks,
                              steps_per_epoch=np.ceil(sample_num/batch_size)
                              ) 

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/50

LearningRate:0.050185

Epoch 00001: saving model to training_sample_run//model-01-0.2547.hdf5
Epoch 2/50

LearningRate:0.000010

Epoch 00002: saving model to training_sample_run//model-02-0.3982.hdf5
Epoch 3/50

LearningRate:0.085420

Epoch 00003: saving model to training_sample_run//model-03-0.2574.hdf5
Epoch 4/50

LearningRate:0.050095

Epoch 00004: saving model to training_sample_run//model-04-0.3373.hdf5
Epoch 5/50

LearningRate:0.014717

Epoch 00005: saving model to training_sample_run//model-05-0.5253.hdf5
Epoch 6/50

LearningRate:0.000010

Epoch 00006: saving model to training_sample_run//model-06-0.5263.hdf5
Epoch 7/50

LearningRate:0.096212

Epoch 00007: saving model to training_sample_run//model-07-0.4356.hdf5
Epoch 8/50

LearningRate:0.085389

Epoch 00008: saving model to training_sample_run//model-08-0.3686.hdf5
Epoch 9/50

LearningRate:0.069179

Epoch 00009: saving mod