# DCASE Challenge 2020 - Task 1 - Acoustic scene classification


## Parameters

In [1]:
# features parameters
sequence_time = 2.0
sequence_hop_time = 2.0
audio_hop = 1024
audio_win = 2048
n_fft = 2048
sr = 44100
features_name = 'MelSpectrogram'
features_kwargs = {'mel_bands': 40}
#features_name = 'Openl3'
#features_kwargs = {'content_type': 'music', 
#                   'input_repr': 'mel256',
#                   'embedding_size': 512} 

# normalizer
normalizer = 'minmax'

# train parameters
early_stopping = 10
epochs = 20
considered_improvement = 0
learning_rate = 0.001
batch_size = 64
verbose = 1
optimizer = 'Adam'

# dataset parameters
dataset_name = 'TAUUrbanAcousticScenes2020Mobile'
dataset_path = '../../../../data/pzinemanas/TAUUrbanAcousticScenes2020Mobile'
audio_folder = 'audio'
feature_folder = 'features' 

## Imports

In [2]:
import sys
import os
import glob
import numpy as np
import argparse

sys.path.append('../')
from dcase_models.utils.files import load_json, mkdir_if_not_exists
from dcase_models.data.data_generator import DataGenerator
from dcase_models.model.container import DCASEModelContainer
from dcase_models.data.datasets import get_available_datasets
from dcase_models.data.features import get_available_features
from dcase_models.model.models import get_available_models
from dcase_models.data.scaler import Scaler
from dcase_models.data.feature_extractor import FeatureExtractor

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## Define model

In [12]:
from autopool import AutoPool1D
from keras.layers import Input, TimeDistributed, Dense
from keras.models import Model
class DCASE2020Task1Baseline(DCASEModelContainer):
    def __init__(self, model=None, folder=None, metrics=['accuracy'], n_frames_cnn=96, 
                n_freq_cnn=64, n_classes=10, hidden_layers_size=[512, 128]):

        if folder is None:
            # input
            inputs = Input(shape=(n_frames_cnn,n_freq_cnn), dtype='float32', name='input')

            num_hidden_layers = len(hidden_layers_size)
            # Hidden layers
            for idx in range(num_hidden_layers):
                if idx == 0:
                    y = inputs
                y = TimeDistributed(Dense(hidden_layers_size[idx], activation='relu',
                                    name='dense_{}'.format(idx+1)))(y)

            # Output layer
            y = TimeDistributed(Dense(n_classes, activation='softmax',
                                name='output_t'))(y)

            # Apply autopool over time dimension
            y = AutoPool1D(axis=1, name='output')(y)

            # Create model
            model = Model(inputs=inputs, outputs=y, name='model')

        super().__init__(model=model, folder=folder, model_name='DCASE2020Task5Baseline', metrics=metrics)

## Define feature extractor and data generator

In [4]:
# get feature extractor class
feature_extractor_class = get_available_features()[features_name]
# init feature extractor
feature_extractor = feature_extractor_class(sequence_time=sequence_time, 
                                            sequence_hop_time=sequence_hop_time, 
                                            audio_win=audio_win, 
                                            audio_hop=audio_hop, 
                                            n_fft=n_fft, 
                                            sr=sr, **features_kwargs)

# get dataset class
data_generator_class = get_available_datasets()[dataset_name]
# init data_generator
data_generator = data_generator_class(dataset_path, feature_folder, features_name, 
                                      audio_folder=audio_folder)

## Extract features if needed

In [5]:
folders_list = data_generator.get_folder_lists()
for audio_features_paths in folders_list:
    print('Extracting features from folder: ', audio_features_paths['audio'])
    response = feature_extractor.extract(audio_features_paths['audio'], audio_features_paths['features'])
    if response is None:
        print('Features already were calculated, continue...')
    print('Done!')

Extracting features from folder:  ../../../../data/pzinemanas/TAUUrbanAcousticScenes2020Mobile/audio
../../../../data/pzinemanas/TAUUrbanAcousticScenes2020Mobile/features/MelSpectrogram/parameters.json
Features already were calculated, continue...
Done!


## Load data

In [6]:
print('Loading data... ')
data_generator.load_data()
print('Done!')

Loading data... 
fold: [############################################################] 2/2
Done!


## Get data for trainint and apply scaler

In [9]:
X_train, Y_train, X_val, Y_val = data_generator.get_data_for_training()
scaler = Scaler(normalizer=normalizer)
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)

## Create model

In [13]:
n_frames_cnn = X_train.shape[1]
n_freq_cnn = X_train.shape[2]
n_classes = Y_train.shape[1]
print(n_frames_cnn, n_freq_cnn, n_classes)
model_container = DCASE2020Task1Baseline(model=None, folder=None, n_classes=n_classes, 
                                         n_frames_cnn=n_frames_cnn, n_freq_cnn=n_freq_cnn)

model_container.model.summary()

84 40 10
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, 84, 40)            0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 84, 512)           20992     
_________________________________________________________________
time_distributed_2 (TimeDist (None, 84, 128)           65664     
_________________________________________________________________
time_distributed_3 (TimeDist (None, 84, 10)            1290      
_________________________________________________________________
output (AutoPool1D)          (None, 10)                10        
Total params: 87,956
Trainable params: 87,956
Non-trainable params: 0
_________________________________________________________________


## Set paths and save model json

In [14]:
model_name = 'DCASE2020Task1Baseline'
mkdir_if_not_exists(model_name)
exp_folder = os.path.join(model_name, dataset_name)
mkdir_if_not_exists(exp_folder)

# save model as json
print('saving model to %s' % exp_folder)
model_container.save_model_json(exp_folder)

saving model to DCASE2020Task1Baseline/TAUUrbanAcousticScenes2020Mobile


## Train model

In [15]:
train_arguments = {'early_stopping': early_stopping,
                  'epochs': epochs,
                  'considered_improvement': considered_improvement,
                  'learning_rate': learning_rate,
                  'batch_size': batch_size,
                  'verbose': verbose,
                  'optimizer': optimizer}

model_container.train(X_train, Y_train, X_val, Y_val, weights_path=exp_folder, **train_arguments)


Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/20
Acc = 0.4986 -  Best val Acc: 0.4986 (IMPROVEMENT, saving)

Epoch 2/20
Acc = 0.5445 -  Best val Acc: 0.5445 (IMPROVEMENT, saving)

Epoch 3/20
Acc = 0.5764 -  Best val Acc: 0.5764 (IMPROVEMENT, saving)

Epoch 4/20
Acc = 0.6151 -  Best val Acc: 0.6151 (IMPROVEMENT, saving)

Epoch 5/20
Acc = 0.6230 -  Best val Acc: 0.6230 (IMPROVEMENT, saving)

Epoch 6/20
Acc = 0.6325 -  Best val Acc: 0.6325 (IMPROVEMENT, saving)

Epoch 7/20
Acc = 0.6569 -  Best val Acc: 0.6569 (IMPROVEMENT, saving)

Epoch 8/20
Acc = 0.6725 -  Best val Acc: 0.6725 (IMPROVEMENT, saving)

Epoch 9/20
Acc = 0.6733 -  Best val Acc: 0.6733 (IMPROVEMENT, saving)

Epoch 10/20
Acc = 0.7007 -  Best val Acc: 0.7007 (IMPROVEMENT, saving)

Epoch 11/20
Acc = 0.7089 -  Best val Acc: 0.7089 (IMPROVEMENT, saving)

Epoch 12/20
Acc = 0.7028 - Best val Acc: 0.7089 (10)

Epoch 13/20
Acc = 0.7184 -  Best val 

## Test model

In [16]:
# load best_weights
model_container.load_model_weights(exp_folder)

# test model
X_test, Y_test = data_generator.get_data_for_testing()
X_test = scaler.transform(X_test)
results = model_container.evaluate(X_test, Y_test)

print(results['accuracy'])

0.1997978436657682
