In [1]:
%reload_ext autoreload
%autoreload 2
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import scipy.io.wavfile as wv
import scipy.signal as sig
import wave

from datetime import datetime
from keras.models import Sequential, load_model
from keras.layers import Dense

from util import util
from util import WavFileParts
from util.logUtil import LOG, LOG_HEADER
from util.confusionMatrix import ConfusionMatrix


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### globale settings

In [2]:
classes = ['music', 'voice', 'environment']

macDir = '/Volumes/SAA_DATA/datasets/'
winDir = 'E:/SAA_DATA/'
osDir = macDir
recordingDir = osDir + '/localizationRecordings'

if osDir == winDir:
    storageFolder = 'E:/SAA_DATA/storedData/'
else:
    storageFolder = '/Users/etto/Desktop/storedData/'

baseSrcDir = osDir + 'localizationFiles/20171025AllExtractionsMic4'
orgWavDirs1 = ['G428_0.0_1.4',
              'G527_0.5_1.4',
              'Studio_2.0_4.2'
              ]

orgWavDirs2 = ['G428_2.1_2.4',
              'G527_1.2_5.8',
              'Studio_3.0_2.0'
              ]

NFFT = 1024

chunksBaseDir = 'chunks'
rooms = ['Studio', 'G428', 'G527']

### utility functies

In [3]:
def readSoundChunksDynamic(moduleString):
    chunks = importlib.import_module(moduleString).soundChunks
    wfPts = []
    for jsonString in chunks:
        wfPts.append(WavFileParts.WavFilePartFromJson(jsonString))
    return wfPts

def timeFunction(func):
    """
    Aanroep: bijv. fpc = timeFunction(lambda: getFilesPerCategory(srcDir))
    """
    startTime = datetime.now()
    print('Start: ' + startTime.strftime('%H:%M:%S') + '\n=================')

    res = func()
    
    endTime = datetime.now()
    print('\n=================\nEnd: ' + endTime.strftime('%H:%M:%S'))
    print('Time taken: '),
    print(endTime - startTime)
    print()
    
    return res
    
def storeTestData(allSpectros, allClasses, storageName, keyName):
    filename = storageFolder + storageName + '.hd5'
    df = pd.DataFrame(allSpectros)
    df.to_hdf(path_or_buf=filename, key='spectros_' + keyName)

    df = pd.DataFrame(allClasses)
    df.to_hdf(path_or_buf=filename, key='classes_' + keyName)

def retrieveTestData(storageName, keyName):
    filename = storageFolder + storageName + '.hd5'
    specDf = pd.read_hdf(path_or_buf=filename, key='spectros_' + keyName)
    classesDf = pd.read_hdf(path_or_buf=filename, key='classes_' + keyName)
    return specDf.values, classesDf.values
    

### functies tbv trainen

In [4]:
# Maakt een dictionary aan; per categorie alle files (volledig pad) uit de srcDir
# srcDir is een van de orgWavDirs, bijvoorbeeld
#    localizationFiles/20171025AllExtractionsMic4/G428_0.0_1.4
def getFilesPerCategory(srcDir):
    filesPerCategory = {}
    for catDirLong in glob.glob(srcDir + '/*'):
        catDir = catDirLong.replace('\\', '/')
        catDir = catDir.replace(srcDir + '/', '')

        filesPerCategory[catDir] = []
        for filename in glob.glob(catDirLong + '/*'):
            filename = filename.replace('\\','/')
            filesPerCategory[catDir].append(filename)
    return filesPerCategory

def getFilesPerCatFromMultipleDirs(srcDirs, srcDirsBase=''):
    filesPerCat = {}
    for dirName in srcDirs:
        srcDir = srcDirsBase + '/' + dirName
        fpcNw = getFilesPerCategory(srcDir)
        if not filesPerCat:
            filesPerCat = fpcNw
        else:
            for key in filesPerCat:
                filesPerCat[key] += fpcNw[key]
    return filesPerCat
        

In [5]:
# Maakt een dictionary aan; per categorie de spectrogrammen
def getSpectrosFromFilesPerCategory(filesPerCategory):
    spectros = {}
    for clz in classes:
        spectros[clz] = []
        for filename in filesPerCategory[clz]:
            fs, signal = wv.read(filename)
            freq_array, segment_times, spectrogram = sig.spectrogram(x=signal, fs=fs, nfft=NFFT, noverlap=0)
            spectros[clz].append(spectrogram.T)
    return spectros

In [6]:
def getClassLengths(spectrosPerCat):
    clzLengths = {}
    for clz in classes:
        clzLengths[clz] = sum([np.shape(lst)[0] for lst in spectrosPerCat[clz]])
    return clzLengths

In [7]:
# verwacht invoer van getSpectrosFromFilesPerCategory
# levert traindata op (X_train en Y_train)
def createTrainDataFromSpectros(spectrosPerCat, clzLengths):
    X_train = np.concatenate(spectrosPerCat[classes[0]], axis=0)
    for i in range(1, len(classes)):
        nwSpectros = np.concatenate(spectrosPerCat[classes[i]], axis=0)
        X_train = np.concatenate((X_train,nwSpectros), axis=0)
    
    # one-hot encoding voor Y_train
    nrFiles = clzLengths[classes[0]]
    Y_train = np.array((np.ones(nrFiles),np.zeros(nrFiles), np.zeros(nrFiles))).T

    nrFiles = clzLengths[classes[1]]
    Y_train_nw = np.array((np.zeros(nrFiles), np.ones(nrFiles), np.zeros(nrFiles))).T
    Y_train = np.concatenate((Y_train, Y_train_nw),axis=0)

    nrFiles = clzLengths[classes[2]]
    Y_train_nw = np.array((np.zeros(nrFiles), np.zeros(nrFiles), np.ones(nrFiles))).T
    Y_train = np.concatenate((Y_train, Y_train_nw),axis=0)
    
    return X_train, Y_train

### deep learning model

In [8]:
def create_model(layersizes):
    # create model
    model = Sequential()
    model.add(Dense(layersizes[0], input_dim=513, activation='relu'))
    for lsize in layersizes[1:]:
        model.add(Dense(lsize, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [9]:
def getModelFileName(modelFilePath, baseModelFilename, layers, nrEpochs):
    modelFilename = modelFilePath + baseModelFilename
    for lsize in layers:
        modelFilename = '{}_{}'.format(modelFilename, lsize)
    modelFilename += 'ep{}'.format(nrEpochs)
    modelFilename += '.hd5'
    return modelFilename

In [10]:
def getModelName(baseModelFilename, layers, nrEpochs):
    modelFilename = baseModelFilename
    for lsize in layers:
        modelFilename = '{}_{}'.format(modelFilename, lsize)
    modelFilename += 'ep{}'.format(nrEpochs)
    return modelFilename

In [11]:
def train_model(X_train, Y_train, layers, nrEpochs, modelFilePath, baseModelFilename):
    soundModel = create_model(layers)
    history = timeFunction(lambda: soundModel.fit(X_train,Y_train, epochs=nrEpochs, shuffle=True, verbose=1))
    soundModel.save(getModelFileName(modelFilePath, baseModelFilename, layers, nrEpochs))
    plt.plot(history.history['loss'])
    plt.show()

In [12]:
def evaluate_model(X_test, realClasses, layers, nrEpochs, modelFilePath, baseModelFilename):
    soundModel = load_model(getModelFileName(modelFilePath, baseModelFilename, layers, nrEpochs))

    # predicted classes
    predictions = soundModel.predict(X_test)
    predClasses = predictions.argmax(axis=1)

    matrix = ConfusionMatrix(classes)
    for vals in zip(realClasses, predClasses):
        matrix.add(int(vals[0]), int(vals[1]), 1)
    LOG(matrix.toString(),True)
    LOG('', True)
    LOG(matrix.toF1String(), True)
    return matrix

In [13]:
def train_and_evaluate_per_epoch(X_train, Y_train, realClasses, layers, nrEpochs, modelFilePath, baseModelFilename):
    soundModel = create_model(layers)
    for epNr in range(1, nrEpochs+1):
        LOG('\n*****************\n* Epoch nr {}\n*****************\n'.format(epNr), True)
        soundModel.fit(X_train,Y_train, epochs=1, shuffle=True, verbose=1)
        soundModel.save(getModelFileName(modelFilePath, baseModelFilename, layers, epNr))
        evaluate_model(X_train, realClasses, layers, epNr, modelFilePath, baseModelFilename)

### trainen van model : alleen orgWavDirs2

In [14]:
# bepaal train data
# srcDir = baseSrcDir + '/' + orgWavDirs1[0]
# fpc = getFilesPerCategory(srcDir)
fpc = getFilesPerCatFromMultipleDirs(orgWavDirs2, baseSrcDir)
spcs = getSpectrosFromFilesPerCategory(fpc)
clzLengths = getClassLengths(spcs)
X_train, Y_train = createTrainDataFromSpectros(spcs, clzLengths)
# real train classes
realTrainClasses = np.concatenate((np.zeros(clzLengths[classes[0]]), 
                              np.ones(clzLengths[classes[1]]), 
                              2*np.ones(clzLengths[classes[2]])))


In [15]:
baseModelFilename = '20180816_orgsHelft2'
modelFilePath = storageFolder
logPrefix = 'Orgs helft 2'

In [16]:
layers = [100, 20]

LOG_HEADER(logPrefix + ', lagen: [100, 20]', True)

nrEpochs = 5
train_and_evaluate_per_epoch(X_train, Y_train, realTrainClasses, layers, nrEpochs, modelFilePath, baseModelFilename)

####################################
#
# Orgs helft 2, lagen: [100, 20]
#
####################################


*****************
* Epoch nr 1
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         174693    9147     187 |  0.95  0.97
voice           8411  171581    3654 |  0.93  0.95
environment      636    6936  178073 |  0.96  0.98
--------------------------------------
prec            0.95    0.91    0.98

F1 overall: 0.95
F1 music: 0.95
F1 voice: 0.92
F1 environment: 0.97


*****************
* Epoch nr 2
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         171424   12452     151 |  0.93  0.97
voice           2519  176583    4544 |  0.96  0.96
environment      391    3451  181803 |  0.98  0.98
--------------------------------------
prec            0.98    0.92    0.97

F1 overall: 0.96
F1 music: 0.96
F1 voice: 0.94
F1 environment: 0.98


*****************
* Epoch nr 3
*****************

Epoch 1/1
     

In [17]:
layers = [400, 250, 100, 20]

LOG_HEADER(logPrefix + ', lagen: [400, 250, 100, 20]', True)

nrEpochs = 5
train_and_evaluate_per_epoch(X_train, Y_train, realTrainClasses, layers, nrEpochs, modelFilePath, baseModelFilename)

####################################
#
# Orgs helft 2, lagen: [400, 250, 100, 20]
#
####################################


*****************
* Epoch nr 1
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         175848    7877     302 |  0.96  0.98
voice           5287  173540    4819 |  0.94  0.96
environment      212    3025  182408 |  0.98  0.98
--------------------------------------
prec            0.97    0.94    0.97

F1 overall: 0.96
F1 music: 0.96
F1 voice: 0.94
F1 environment: 0.98


*****************
* Epoch nr 2
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         177358    6477     192 |  0.96  0.98
voice           5025  174496    4125 |  0.95  0.97
environment      183    2705  182757 |  0.98  0.99
--------------------------------------
prec            0.97    0.95    0.98

F1 overall: 0.97
F1 music: 0.97
F1 voice: 0.95
F1 environment: 0.98


*****************
* Epoch nr 3
*****************

Epoch

In [18]:
layers = [400, 300, 200, 100, 50, 20, 10]

LOG_HEADER(logPrefix + ', lagen: [400, 300, 200, 100, 50, 20, 10]', True)

nrEpochs = 5
train_and_evaluate_per_epoch(X_train, Y_train, realTrainClasses, layers, nrEpochs, modelFilePath, baseModelFilename)

####################################
#
# Orgs helft 2, lagen: [400, 300, 200, 100, 50, 20, 10]
#
####################################


*****************
* Epoch nr 1
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         171823   11718     486 |  0.93  0.97
voice           2716  179003    1927 |  0.97  0.96
environment      200    8385  177060 |  0.95  0.98
--------------------------------------
prec            0.98    0.90    0.99

F1 overall: 0.95
F1 music: 0.96
F1 voice: 0.94
F1 environment: 0.97


*****************
* Epoch nr 2
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         179030    4418     579 |  0.97  0.97
voice           9899  170759    2988 |  0.93  0.96
environment      110    4352  181183 |  0.98  0.99
--------------------------------------
prec            0.95    0.95    0.98

F1 overall: 0.96
F1 music: 0.96
F1 voice: 0.94
F1 environment: 0.98


*****************
* Epoch nr 3
***********

In [19]:
layers = [450, 400, 350, 300, 250, 200, 150, 100, 50, 21]

LOG_HEADER(logPrefix + ', lagen: [450, 400, 350, 300, 250, 200, 150, 100, 50, 21]', True)

nrEpochs = 5
train_and_evaluate_per_epoch(X_train, Y_train, realTrainClasses, layers, nrEpochs, modelFilePath, baseModelFilename)

####################################
#
# Orgs helft 2, lagen: [450, 400, 350, 300, 250, 200, 150, 100, 50, 21]
#
####################################


*****************
* Epoch nr 1
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         173777    9844     406 |  0.94  0.97
voice           5727  170306    7613 |  0.93  0.95
environment      227    2092  183326 |  0.99  0.98
--------------------------------------
prec            0.97    0.93    0.96

F1 overall: 0.95
F1 music: 0.96
F1 voice: 0.93
F1 environment: 0.97


*****************
* Epoch nr 2
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         179924    3847     256 |  0.98  0.97
voice          11841  167733    4072 |  0.91  0.96
environment      280    3208  182157 |  0.98  0.99
--------------------------------------
prec            0.94    0.96    0.98

F1 overall: 0.96
F1 music: 0.96
F1 voice: 0.94
F1 environment: 0.98


*****************
* Epoch 

### functies tbv testen

In [20]:
def createAndStoreTestData(wavFileParts, baseDir, fileDate, micNr, storeFilename, keyName):
    allSpectros = np.array([])
    allClasses = np.array([])

    for wfPt in wavFileParts: #type: WavFilePart
        if not 'Gunshot' in wfPt.getSoundType():
            filename = baseDir + '/{:d}_{:d}_mono{:d}.wav'.format(fileDate, wfPt.fileNr, micNr)
            fs, signal = wv.read(filename)

            classNr = classes.index(wfPt.getSoundType().lower())
            for soundChunk in wfPt.getSoundChunks(micNr):
                startFrame = int(soundChunk[0] * fs)
                endFrame = int(soundChunk[1] * fs)

                sigChunk = signal[startFrame: endFrame]
                freq_array, segment_times, spectrogram = sig.spectrogram(x=sigChunk, fs=fs, nfft=NFFT, noverlap=0)
                if len(allSpectros) == 0:
                    allSpectros = spectrogram.T
                else:
                    allSpectros = np.append(allSpectros, spectrogram.T, axis=0)
                allClasses = np.append(allClasses, classNr * np.ones(len(segment_times)))
                
    storeTestData(allSpectros, allClasses, storeFilename, keyName)


## Voer tests uit bij modellen met alle orgs

In [21]:
# model parameters
modelFilePath = storageFolder
layerss = [[100, 20], [400, 250, 100, 20], [400, 300, 200, 100, 50, 20, 10], [450, 400, 350, 300, 250, 200, 150, 100, 50, 21]]
nrsEpochs = [1,2,3,4,5]

# test data
micNr = 4
testFileNames = ['testData_G428', 'testData_G527', 'testData_Studio']
keyname = 'mic{}'.format(micNr)

for testFileName in testFileNames:
    for layers in layerss:
        for nrEpochs in nrsEpochs:
            testSpecs, testClasses = retrieveTestData(testFileName, keyname)

            LOG_HEADER(getModelName(baseModelFilename, layers, nrEpochs) 
                       + '\n# ' + keyname
                       + '\n# ' + testFileName,
                       True)
            matrix = evaluate_model(testSpecs, testClasses, layers, nrEpochs, modelFilePath, baseModelFilename)


####################################
#
# 20180816_orgsHelft2_100_20ep1
# mic4
# testData_G428
#
####################################

                   m       v       e |  sens   acc
music          19993     625      21 |  0.97  0.95
voice           1453   12772     211 |  0.88  0.95
environment       76      79   13070 |  0.99  0.99
--------------------------------------
prec            0.93    0.95    0.98

F1 overall: 0.95
F1 music: 0.95
F1 voice: 0.92
F1 environment: 0.99

####################################
#
# 20180816_orgsHelft2_100_20ep2
# mic4
# testData_G428
#
####################################

                   m       v       e |  sens   acc
music          19680     938      21 |  0.95  0.97
voice            423   13797     216 |  0.96  0.97
environment       90      87   13048 |  0.99  0.99
--------------------------------------
prec            0.97    0.93    0.98

F1 overall: 0.96
F1 music: 0.96
F1 voice: 0.94
F1 environment: 0.98

################################

                   m       v       e |  sens   acc
music          19860     745      34 |  0.96  0.96
voice           1006   13212     218 |  0.92  0.96
environment       56      74   13095 |  0.99  0.99
--------------------------------------
prec            0.95    0.94    0.98

F1 overall: 0.96
F1 music: 0.96
F1 voice: 0.93
F1 environment: 0.99

####################################
#
# 20180816_orgsHelft2_450_400_350_300_250_200_150_100_50_21ep4
# mic4
# testData_G428
#
####################################

                   m       v       e |  sens   acc
music          19746     850      43 |  0.96  0.97
voice            373   13846     217 |  0.96  0.97
environment       69      63   13093 |  0.99  0.99
--------------------------------------
prec            0.98    0.94    0.98

F1 overall: 0.97
F1 music: 0.97
F1 voice: 0.95
F1 environment: 0.99

####################################
#
# 20180816_orgsHelft2_450_400_350_300_250_200_150_100_50_21ep5
# mic4
# testData_G428
#
########

                   m       v       e |  sens   acc
music           8735     176       6 |  0.98  0.97
voice            228    2638       0 |  0.92  0.97
environment       10       2    2479 |  1.00  1.00
--------------------------------------
prec            0.97    0.94    1.00

F1 overall: 0.97
F1 music: 0.98
F1 voice: 0.93
F1 environment: 1.00

####################################
#
# 20180816_orgsHelft2_450_400_350_300_250_200_150_100_50_21ep1
# mic4
# testData_G527
#
####################################

                   m       v       e |  sens   acc
music           8765     140      12 |  0.98  0.97
voice            212    2654       0 |  0.93  0.98
environment        3       1    2487 |  1.00  1.00
--------------------------------------
prec            0.98    0.95    1.00

F1 overall: 0.97
F1 music: 0.98
F1 voice: 0.94
F1 environment: 1.00

####################################
#
# 20180816_orgsHelft2_450_400_350_300_250_200_150_100_50_21ep2
# mic4
# testData_G527
#
########

####################################
#
# 20180816_orgsHelft2_400_300_200_100_50_20_10ep2
# mic4
# testData_Studio
#
####################################

                   m       v       e |  sens   acc
music          54151    7196    2248 |  0.85  0.92
voice           1778   35212     914 |  0.93  0.93
environment       18     512   41807 |  0.99  0.97
--------------------------------------
prec            0.97    0.82    0.93

F1 overall: 0.91
F1 music: 0.91
F1 voice: 0.87
F1 environment: 0.96

####################################
#
# 20180816_orgsHelft2_400_300_200_100_50_20_10ep3
# mic4
# testData_Studio
#
####################################

                   m       v       e |  sens   acc
music          54318    8138    1139 |  0.85  0.93
voice           1314   36163     427 |  0.95  0.93
environment       27     548   41762 |  0.99  0.99
--------------------------------------
prec            0.98    0.81    0.96

F1 overall: 0.92
F1 music: 0.91
F1 voice: 0.87
F1 environment