In [12]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import numpy as np
import keras
import tensorflow
from keras.optimizers import SGD
import collections

import sys
sys.path.append("..")
from utils import *
from funcs import *

from fcnn_att import model_fcnn
from DCASE_training_functions import *

from tensorflow import ConfigProto
from tensorflow import InteractiveSession

%load_ext autoreload
%autoreload 2

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)


# # 2020 DCASE
# train_csv = '../evaluation_setup_v2/fold1_train_shuffled.csv'
# val_csv = '../evaluation_setup_v2/fold1_eval_shuffled.csv'
# feat_path = '../features/logmel128_scaled/'
# experiments = 'DCASE_2020/'

# # 2019 DCASE
# base_path = '/work/user_data/jtaylor/data/acoustic_scene_classification/RFR-CNN-2019/datasets/TAU-urban-acoustic-scenes-2019-development/'
# train_csv = f'{base_path}evaluation_setup/fold1_train.csv'
# val_csv = f'{base_path}evaluation_setup/fold1_evaluate.csv'
# feat_path = '../features_2019/logmel128_scaled/'
# experiments = 'DCASE_2019/'

# Cochlscene
# base_path = '/work/user_data/jtaylor/data/acoustic_scene_classification/data/CochlScene/'
# train_csv = f'{base_path}train_fold.tsv'
# val_csv = f'{base_path}val_fold.tsv'
# feat_path = f'{base_path}features/logmel128_scaled_v2/'
# experiments = 'cochlscene/'

#scenes
base_path = '/work/user_data/jtaylor/data/acoustic_scene_classification/data/scenes/'
train_csv = f'{base_path}train_fold.tsv'
val_csv = f'{base_path}val_fold.tsv'
feat_path = f'{base_path}features/logmel128_scaled_30/'
experiments = 'scenes/'

if not os.path.exists(experiments):
    os.makedirs(experiments)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload




In [22]:
#train_aug_csv = generate_train_aug_csv(train_csv, aug_csv, feat_path, aug_path, experiments)
train_aug_csv = train_csv

num_audio_channels = 1
num_freq_bin = 128
num_classes = 10
max_lr = 0.1
batch_size = 8
num_epochs = 300
mixup_alpha = 0.4
crop_length = 400
sample_num = len(open(train_aug_csv, 'r').readlines()) - 1
print(f'Numbers of training samples: {sample_num}')


# compute delta and delta delta for validation data
data_val, y_val = load_data_2020(feat_path, val_csv, num_freq_bin, 'logmel')
print(f'{len(y_val)} validation samples with values {set(y_val)}')
print(f'{collections.Counter(y_val)}\n')
print(f'validation data shape: {data_val.shape}')


data_deltas_val = deltas(data_val)
print(f'validation data deltas shape: {data_deltas_val.shape}')

data_deltas_deltas_val = deltas(data_deltas_val)
print(f'validation data delta deltas shape: {data_deltas_val.shape}')

data_val = np.concatenate((data_val[:,:,4:-4,:],data_deltas_val[:,:,2:-2,:],data_deltas_deltas_val),axis=-1)
print(f'validation data concat shape: {data_val.shape}')

y_val = keras.utils.to_categorical(y_val, num_classes)

Numbers of training samples: 169
100 validation samples with values {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
Counter({7: 10, 8: 10, 9: 10, 0: 10, 2: 10, 6: 10, 3: 10, 1: 10, 5: 10, 4: 10})

validation data shape: (100, 128, 1292, 1)
validation data deltas shape: (100, 128, 1288, 1)
validation data delta deltas shape: (100, 128, 1288, 1)
validation data concat shape: (100, 128, 1284, 3)


In [23]:
model = model_fcnn(num_classes, input_shape=[num_freq_bin, None, 3*num_audio_channels], num_filters=[48, 96, 192], wd=0)

In [24]:
model.compile(loss='categorical_crossentropy',
              optimizer =SGD(lr=max_lr,decay=1e-6, momentum=0.9, nesterov=False),
              metrics=['accuracy'])

model.summary()

lr_scheduler = LR_WarmRestart(nbatch=np.ceil(sample_num/batch_size), Tmult=2,
                              initial_lr=max_lr, min_lr=max_lr*1e-4,
                              epochs_restart = [3.0, 7.0, 15.0, 31.0, 63.0,127.0]) 
save_path = experiments + "/model-{epoch:02d}-{val_acc:.4f}.hdf5"
checkpoint = keras.callbacks.ModelCheckpoint(save_path, monitor='val_acc', verbose=1, save_best_only=False, mode='max')
callbacks = [lr_scheduler, checkpoint]

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 128, None, 3) 0                                            
__________________________________________________________________________________________________
batch_normalization_45 (BatchNo (None, 128, None, 3) 12          input_5[0][0]                    
__________________________________________________________________________________________________
zero_padding2d_33 (ZeroPadding2 (None, 132, None, 3) 0           batch_normalization_45[0][0]     
__________________________________________________________________________________________________
conv2d_37 (Conv2D)              (None, 64, None, 144 10800       zero_padding2d_33[0][0]          
__________________________________________________________________________________________________
batch_norm

In [25]:
# Due to the memory limitation, in the training stage we split the training data
train_data_generator = Generator_timefreqmask_withdelta_splitted(feat_path, train_aug_csv, num_freq_bin,
                              batch_size=batch_size,
                              alpha=mixup_alpha,
                              crop_length=crop_length, splitted_num=1, classes=10)()

history = model.fit_generator(train_data_generator,
                              validation_data=(data_val, y_val),
                              epochs=num_epochs, 
                              verbose=1, 
                              workers=4,
                              max_queue_size = 100,
                              callbacks=callbacks,
                              steps_per_epoch=np.ceil(sample_num/batch_size)
                              ) 

Epoch 1/300

LearningRate:0.053572

Epoch 00001: saving model to scenes//model-01-0.1700.hdf5
Epoch 2/300

LearningRate:0.000137

Epoch 00002: saving model to scenes//model-02-0.2500.hdf5
Epoch 3/300

LearningRate:0.086596

Epoch 00003: saving model to scenes//model-03-0.1800.hdf5
Epoch 4/300

LearningRate:0.051789

Epoch 00004: saving model to scenes//model-04-0.1100.hdf5
Epoch 5/300

LearningRate:0.015938

Epoch 00005: saving model to scenes//model-05-0.2100.hdf5
Epoch 6/300

LearningRate:0.000042

Epoch 00006: saving model to scenes//model-06-0.2200.hdf5
Epoch 7/300

LearningRate:0.096528

Epoch 00007: saving model to scenes//model-07-0.3000.hdf5
Epoch 8/300

LearningRate:0.085982

Epoch 00008: saving model to scenes//model-08-0.3400.hdf5
Epoch 9/300

LearningRate:0.069959

Epoch 00009: saving model to scenes//model-09-0.2800.hdf5
Epoch 10/300

LearningRate:0.050897

Epoch 00010: saving model to scenes//model-10-0.4400.hdf5
Epoch 11/300

LearningRate:0.031700

Epoch 00011: saving mo