# SB_CNN Example

In [2]:
import sys
import os
import glob
import numpy as np
import argparse

sys.path.append('../')
from dcase_models.utils.files import load_json, mkdir_if_not_exists
from dcase_models.data.datasets import UrbanSound8k
from dcase_models.data.features import MelSpectrogram
from dcase_models.model.models import SB_CNN
from dcase_models.data.scaler import Scaler

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

## Define parameters

In [3]:
# features parameters
sequence_time = 2.0
sequence_hop_time = 0.5
audio_hop = 670
audio_win = 1024
n_fft = 1024
sr = 22050
mel_bands = 128

# normalizer
normalizer = 'minmax'

# train parameters
early_stopping = 10
epochs = 20
considered_improvement = 0
learning_rate = 0.001
batch_size = 64
verbose = 1
optimizer = 'Adam'

# dataset parameters
dataset_path = '../../../../data/pzinemanas/UrbanSound8K'
audio_folder = 'audio22050'
feature_folder = 'features'

## Init Feature Extractor and Data Generator

In [4]:
# Init Feature Extractor
feature_extractor = MelSpectrogram(sequence_time=sequence_time, 
                                   sequence_hop_time=sequence_hop_time, 
                                   audio_win=audio_win, 
                                   audio_hop=audio_hop, 
                                   n_fft=n_fft, 
                                   sr=sr, mel_bands=mel_bands)

# Init Data Generator
data_generator = UrbanSound8k(dataset_path, feature_folder, 'MelSpectrogram', 
                              audio_folder=audio_folder)

../../../../data/pzinemanas/UrbanSound8K/features/MelSpectrogram/fold1
['../../../../data/pzinemanas/UrbanSound8K/features/MelSpectrogram/fold1/101415-3-0-2.npy', '../../../../data/pzinemanas/UrbanSound8K/features/MelSpectrogram/fold1/101415-3-0-3.npy', '../../../../data/pzinemanas/UrbanSound8K/features/MelSpectrogram/fold1/101415-3-0-8.npy', '../../../../data/pzinemanas/UrbanSound8K/features/MelSpectrogram/fold1/102106-3-0-0.npy', '../../../../data/pzinemanas/UrbanSound8K/features/MelSpectrogram/fold1/102305-6-0-0.npy', '../../../../data/pzinemanas/UrbanSound8K/features/MelSpectrogram/fold1/102842-3-0-1.npy', '../../../../data/pzinemanas/UrbanSound8K/features/MelSpectrogram/fold1/102842-3-1-0.npy', '../../../../data/pzinemanas/UrbanSound8K/features/MelSpectrogram/fold1/102842-3-1-5.npy', '../../../../data/pzinemanas/UrbanSound8K/features/MelSpectrogram/fold1/102842-3-1-6.npy', '../../../../data/pzinemanas/UrbanSound8K/features/MelSpectrogram/fold1/103074-7-0-0.npy', '../../../../data/

## Extract Features

In [5]:
data_generator.extract_features(feature_extractor)

True

## Load data

In [6]:
print('Loading data... ')
data_generator.load_data()
print('Done!')

Loading data... 
fold: [############################################################] 10/10
Done!


## Fit scaler

In [7]:
fold_test = 'fold1'

X_train, Y_train, X_val, Y_val = data_generator.get_data_for_training(fold_test)
scaler = Scaler(normalizer=normalizer)
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)

## Init Model

In [8]:
n_frames_cnn = X_train.shape[1]
n_freq_cnn = X_train.shape[2]
n_classes = Y_train.shape[1]
print(n_frames_cnn, n_freq_cnn, n_classes)

model_container = SB_CNN(model=None, model_path=None, n_classes=n_classes, 
                         n_frames_cnn=n_frames_cnn, n_freq_cnn=n_freq_cnn)

model_container.model.summary()

64 128 10
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, 64, 128)           0         
_________________________________________________________________
lambda (Lambda)              (None, 64, 128, 1)        0         
_________________________________________________________________
conv1 (Conv2D)               (None, 60, 124, 24)       624       
_________________________________________________________________
maxpool1 (MaxPooling2D)      (None, 30, 62, 24)        0         
_________________________________________________________________
batchnorm1 (BatchNormalizati (None, 30, 62, 24)        96        
_________________________________________________________________
conv2 (Conv

## Train model

In [9]:
exp_folder = './'

train_arguments = {'early_stopping': early_stopping,
                  'epochs': epochs,
                  'considered_improvement': considered_improvement,
                  'learning_rate': learning_rate,
                  'batch_size': batch_size,
                  'verbose': verbose,
                  'optimizer': optimizer}

model_container.train(X_train, Y_train, X_val, Y_val, weights_path=exp_folder, **train_arguments)

Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Acc = 0.6745 - Best val Acc: 0.6745 (IMPROVEMENT, saving)

Epoch 2/20
Acc = 0.6768 - Best val Acc: 0.6768 (IMPROVEMENT, saving)

Epoch 3/20
Acc = 0.6881 - Best val Acc: 0.6881 (IMPROVEMENT, saving)

Epoch 4/20
Acc = 0.7703 - Best val Acc: 0.7703 (IMPROVEMENT, saving)

Epoch 5/20
Acc = 0.6543 - Best val Acc: 0.7703 (3)

Epoch 6/20
Acc = 0.6554 - Best val Acc: 0.7703 (3)

Epoch 7/20
Acc = 0.7038 - Best val Acc: 0.7703 (3)

Epoch 8/20
Acc = 0.6464 - Best val Acc: 0.7703 (3)

Epoch 9/20
Acc = 0.6374 - Best val Acc: 0.7703 (3)

Epoch 10/20
Acc = 0.7083 - Best val Acc: 0.7703 (3)

Epoch 11/20
Acc = 0.6959 - Best val Acc: 0.7703 (3)

Epoch 12/20
Acc = 0.6644 - Best val Acc: 0.7703 (3)

Epoch 13/20
Acc = 0.6667 - Best val Acc: 0.7703 (3)

Not improvement for 10 epochs, stopping the training


## Evaluate Model

In [29]:
# Load best_weights
model_container.load_model_weights(exp_folder)

# Test model
X_test, Y_test = data_generator.get_data_for_testing(fold_test)
X_test = scaler.transform(X_test)
results = model_container.evaluate(X_test, Y_test)

print(results['accuracy'])

0.695303550973654
