# DCASE Challenge 2020 - Task 1 - Acoustic scene classification


## Parameters

In [1]:
# features parameters
sequence_time = 2.0
sequence_hop_time = 2.0
audio_hop = 1024
audio_win = 2048
n_fft = 2048
sr = 44100
features_name = 'MelSpectrogram'
features_kwargs = {'mel_bands': 40}
#features_name = 'Openl3'
#features_kwargs = {'content_type': 'music', 
#                   'input_repr': 'mel256',
#                   'embedding_size': 512} 

# normalizer
normalizer = 'minmax'

# train parameters
early_stopping = 100
epochs = 200
considered_improvement = 0
learning_rate = 0.001
batch_size = 64
verbose = 1
optimizer = 'Adam'

# dataset parameters
dataset_name = 'TAUUrbanAcousticScenes2020Mobile'
dataset_path = '../../../../data/pzinemanas/TAUUrbanAcousticScenes2020Mobile'
audio_folder = 'audio'
feature_folder = 'features' 

## Imports

In [2]:
import sys
import os
import glob
import numpy as np
import argparse

sys.path.append('../')
from dcase_models.utils.files import load_json, mkdir_if_not_exists
from dcase_models.data.data_generator import *
from dcase_models.model.container import *
from dcase_models.model.models import *
from dcase_models.data.scaler import Scaler
from dcase_models.data.feature_extractor import *
from dcase_models.utils.misc import get_class_by_name

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Define model

In [22]:
from autopool import AutoPool1D
class DCASE2020Task1Baseline(DCASEModelContainer):
    def __init__(self, model=None, folder=None, metrics=['accuracy'], n_frames_cnn=96, 
                n_freq_cnn=64, n_classes=10, hidden_layers_size=[512, 128]):

        if folder is None:
            # input
            inputs = Input(shape=(n_frames_cnn,n_freq_cnn), dtype='float32', name='input')

            num_hidden_layers = len(hidden_layers_size)
            # Hidden layers
            for idx in range(num_hidden_layers):
                if idx == 0:
                    y = inputs
                y = TimeDistributed(Dense(hidden_layers_size[idx], activation='relu',
                                    name='dense_{}'.format(idx+1)))(y)

            # Output layer
            y = TimeDistributed(Dense(n_classes, activation='softmax',
                                name='output_t'))(y)

            # Apply autopool over time dimension
            y = AutoPool1D(axis=1, name='output')(y)

            # Create model
            model = Model(inputs=inputs, outputs=y, name='model')

        super().__init__(model=model, folder=folder, model_name='DCASE2020Task5Baseline', metrics=metrics)

## Define feature extractor and data generator

In [4]:
# get feature extractor class
feature_extractor_class = get_class_by_name(globals(), features_name, FeatureExtractor)
# init feature extractor
feature_extractor = feature_extractor_class(sequence_time=sequence_time, 
                                            sequence_hop_time=sequence_hop_time, 
                                            audio_win=audio_win, 
                                            audio_hop=audio_hop, 
                                            n_fft=n_fft, 
                                            sr=sr, **features_kwargs)

# get dataset class
data_generator_class = get_class_by_name(globals(), dataset_name, DataGenerator)
# init data_generator
data_generator = data_generator_class(dataset_path, feature_folder, features_name, 
                                      audio_folder=audio_folder)

The dataset was not downloaded : download [y] or continue without downloading [n] : n


## Extract features if needed

In [5]:
folders_list = data_generator.get_folder_lists()
for audio_features_paths in folders_list:
    print('Extracting features from folder: ', audio_features_paths['audio'])
    response = feature_extractor.extract(audio_features_paths['audio'], audio_features_paths['features'])
    if response is None:
        print('Features already were calculated, continue...')
    print('Done!')

Extracting features from folder:  ../../../../data/pzinemanas/TAUUrbanAcousticScenes2020Mobile/audio
Features already were calculated, continue...
Done!


## Load data

In [6]:
print('Loading data... ')
data_generator.load_data()
print('Done!')

Loading data... 
fold: [############################################################] 2/2
Done!


## Get data for trainint and apply scaler

In [7]:
X_train, Y_train, X_val, Y_val = data_generator.get_data_for_training()
scaler = Scaler(normalizer=normalizer)
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)

## Create model

In [23]:
n_frames_cnn = X_train.shape[1]
n_freq_cnn = X_train.shape[2]
n_classes = Y_train.shape[1]
print(n_frames_cnn, n_freq_cnn, n_classes)
model_container = DCASE2020Task1Baseline(model=None, folder=None, n_classes=n_classes, 
                                         n_frames_cnn=n_frames_cnn, n_freq_cnn=n_freq_cnn)

model_container.model.summary()

84 40 10
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, 84, 40)            0         
_________________________________________________________________
time_distributed_10 (TimeDis (None, 84, 512)           20992     
_________________________________________________________________
time_distributed_11 (TimeDis (None, 84, 128)           65664     
_________________________________________________________________
time_distributed_12 (TimeDis (None, 84, 10)            1290      
_________________________________________________________________
output (AutoPool1D)          (None, 10)                10        
Total params: 87,956
Trainable params: 87,956
Non-trainable params: 0
_________________________________________________________________


## Set paths and save model json

In [24]:
model_name = 'DCASE2020Task1Baseline'
mkdir_if_not_exists(model_name)
exp_folder = os.path.join(model_name, dataset_name)
mkdir_if_not_exists(exp_folder)

# save model as json
print('saving model to %s' % exp_folder)
model_container.save_model_json(exp_folder)

saving model to DCASE2020Task1Baseline/TAUUrbanAcousticScenes2020Mobile


## Train model

In [25]:
train_arguments = {'early_stopping': early_stopping,
                  'epochs': epochs,
                  'considered_improvement': considered_improvement,
                  'learning_rate': learning_rate,
                  'batch_size': batch_size,
                  'verbose': verbose,
                  'optimizer': optimizer}

model_container.train(X_train, Y_train, X_val, Y_val, weights_path=exp_folder, **train_arguments)


Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/200
Acc = 0.5032 -  Best val Acc: 0.5032 (IMPROVEMENT, saving)

Epoch 2/200
Acc = 0.5531 -  Best val Acc: 0.5531 (IMPROVEMENT, saving)

Epoch 3/200
Acc = 0.5809 -  Best val Acc: 0.5809 (IMPROVEMENT, saving)

Epoch 4/200
Acc = 0.5945 -  Best val Acc: 0.5945 (IMPROVEMENT, saving)

Epoch 5/200
Acc = 0.6324 -  Best val Acc: 0.6324 (IMPROVEMENT, saving)

Epoch 6/200
Acc = 0.6492 -  Best val Acc: 0.6492 (IMPROVEMENT, saving)

Epoch 7/200
Acc = 0.6490 - Best val Acc: 0.6492 (5)

Epoch 8/200
Acc = 0.6634 -  Best val Acc: 0.6634 (IMPROVEMENT, saving)

Epoch 9/200
Acc = 0.6739 -  Best val Acc: 0.6739 (IMPROVEMENT, saving)

Epoch 10/200
Acc = 0.7047 -  Best val Acc: 0.7047 (IMPROVEMENT, saving)

Epoch 11/200
Acc = 0.7109 -  Best val Acc: 0.7109 (IMPROVEMENT, saving)

Epoch 12/200
Acc = 0.7197 -  Best val Acc: 0.7197 (IMPROVEMENT, saving)

Epoch 13/200
Acc = 0.7182 

Acc = 0.8729 - Best val Acc: 0.8770 (53)

Epoch 57/200
Acc = 0.8852 -  Best val Acc: 0.8852 (IMPROVEMENT, saving)

Epoch 58/200
Acc = 0.8691 - Best val Acc: 0.8852 (56)

Epoch 59/200
Acc = 0.8780 - Best val Acc: 0.8852 (56)

Epoch 60/200
Acc = 0.8782 - Best val Acc: 0.8852 (56)

Epoch 61/200
Acc = 0.8722 - Best val Acc: 0.8852 (56)

Epoch 62/200
Acc = 0.8815 - Best val Acc: 0.8852 (56)

Epoch 63/200
Acc = 0.8886 -  Best val Acc: 0.8886 (IMPROVEMENT, saving)

Epoch 64/200
Acc = 0.8937 -  Best val Acc: 0.8937 (IMPROVEMENT, saving)

Epoch 65/200
Acc = 0.8804 - Best val Acc: 0.8937 (63)

Epoch 66/200
Acc = 0.8790 - Best val Acc: 0.8937 (63)

Epoch 67/200
Acc = 0.8820 - Best val Acc: 0.8937 (63)

Epoch 68/200
Acc = 0.8853 - Best val Acc: 0.8937 (63)

Epoch 69/200
Acc = 0.8876 - Best val Acc: 0.8937 (63)

Epoch 70/200
Acc = 0.8963 -  Best val Acc: 0.8963 (IMPROVEMENT, saving)

Epoch 71/200
Acc = 0.8881 - Best val Acc: 0.8963 (69)

Epoch 72/200
Acc = 0.8911 - Best val Acc: 0.8963 (69)

Epoch 

Acc = 0.9211 - Best val Acc: 0.9378 (174)

Epoch 179/200
Acc = 0.9316 - Best val Acc: 0.9378 (174)

Epoch 180/200
Acc = 0.9353 - Best val Acc: 0.9378 (174)

Epoch 181/200
Acc = 0.9249 - Best val Acc: 0.9378 (174)

Epoch 182/200
Acc = 0.9221 - Best val Acc: 0.9378 (174)

Epoch 183/200
Acc = 0.9287 - Best val Acc: 0.9378 (174)

Epoch 184/200
Acc = 0.9341 - Best val Acc: 0.9378 (174)

Epoch 185/200
Acc = 0.9323 - Best val Acc: 0.9378 (174)

Epoch 186/200
Acc = 0.9390 -  Best val Acc: 0.9390 (IMPROVEMENT, saving)

Epoch 187/200
Acc = 0.9351 - Best val Acc: 0.9390 (185)

Epoch 188/200
Acc = 0.9368 - Best val Acc: 0.9390 (185)

Epoch 189/200
Acc = 0.9294 - Best val Acc: 0.9390 (185)

Epoch 190/200
Acc = 0.9378 - Best val Acc: 0.9390 (185)

Epoch 191/200
Acc = 0.9276 - Best val Acc: 0.9390 (185)

Epoch 192/200
Acc = 0.9257 - Best val Acc: 0.9390 (185)

Epoch 193/200
Acc = 0.9385 - Best val Acc: 0.9390 (185)

Epoch 194/200
Acc = 0.9231 - Best val Acc: 0.9390 (185)

Epoch 195/200
Acc = 0.9336 -

## Test model

In [26]:
# load best_weights
model_container.load_model_weights(exp_folder)

# test model
X_test, Y_test = data_generator.get_data_for_testing()
X_test = scaler.transform(X_test)
results = model_container.evaluate(X_test, Y_test)

print(results['accuracy'])

0.409366576819407
