In [1]:
%reload_ext autoreload
%autoreload 2
import glob
import importlib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import scipy.io.wavfile as wv
import scipy.signal as sig
import wave

from datetime import datetime
from keras.models import Sequential, load_model
from keras.layers import Dense

from util import util
from util import WavFileParts
from util.logUtil import LOG, LOG_HEADER
from util.confusionMatrix import ConfusionMatrix


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### globale settings

In [2]:
classes = ['music', 'voice', 'environment']

macDir = '/Volumes/SAA_DATA/datasets/'
winDir = 'E:/SAA_DATA/'
osDir = macDir
recordingDir = osDir + '/localizationRecordings'

if osDir == winDir:
    storageFolder = 'E:/SAA_DATA/storedData/'
else:
    storageFolder = '/Users/etto/Desktop/storedData/'

baseSrcDir = osDir + 'localizationFiles/20171025AllExtractionsMic4'
orgWavDirs1 = ['G428_0.0_1.4',
              'G527_0.5_1.4',
              'Studio_2.0_4.2'
              ]

orgWavDirs2 = ['G428_2.1_2.4',
              'G527_1.2_5.8',
              'Studio_3.0_2.0'
              ]

NFFT = 1024

chunksBaseDir = 'chunks'
rooms = ['Studio', 'G428', 'G527']

### utility functies

In [3]:
def readSoundChunksDynamic(moduleString):
    chunks = importlib.import_module(moduleString).soundChunks
    wfPts = []
    for jsonString in chunks:
        wfPts.append(WavFileParts.WavFilePartFromJson(jsonString))
    return wfPts

def timeFunction(func):
    """
    Aanroep: bijv. fpc = timeFunction(lambda: getFilesPerCategory(srcDir))
    """
    startTime = datetime.now()
    print('Start: ' + startTime.strftime('%H:%M:%S') + '\n=================')

    res = func()
    
    endTime = datetime.now()
    print('\n=================\nEnd: ' + endTime.strftime('%H:%M:%S'))
    print('Time taken: '),
    print(endTime - startTime)
    print()
    
    return res
    
def storeTestData(allSpectros, allClasses, storageName, keyName):
    filename = storageFolder + storageName + '.hd5'
    df = pd.DataFrame(allSpectros)
    df.to_hdf(path_or_buf=filename, key='spectros_' + keyName)

    df = pd.DataFrame(allClasses)
    df.to_hdf(path_or_buf=filename, key='classes_' + keyName)

def retrieveTestData(storageName, keyName):
    filename = storageFolder + storageName + '.hd5'
    specDf = pd.read_hdf(path_or_buf=filename, key='spectros_' + keyName)
    classesDf = pd.read_hdf(path_or_buf=filename, key='classes_' + keyName)
    return specDf.values, classesDf.values
    

### functies tbv trainen

In [4]:
# Maakt een dictionary aan; per categorie alle files (volledig pad) uit de srcDir
# srcDir is een van de orgWavDirs, bijvoorbeeld
#    localizationFiles/20171025AllExtractionsMic4/G428_0.0_1.4
def getFilesPerCategory(srcDir):
    filesPerCategory = {}
    for catDirLong in glob.glob(srcDir + '/*'):
        catDir = catDirLong.replace('\\', '/')
        catDir = catDir.replace(srcDir + '/', '')

        filesPerCategory[catDir] = []
        for filename in glob.glob(catDirLong + '/*'):
            filename = filename.replace('\\','/')
            filesPerCategory[catDir].append(filename)
    return filesPerCategory

def getFilesPerCatFromMultipleDirs(srcDirs, srcDirsBase=''):
    filesPerCat = {}
    for dirName in srcDirs:
        srcDir = srcDirsBase + '/' + dirName
        fpcNw = getFilesPerCategory(srcDir)
        if not filesPerCat:
            filesPerCat = fpcNw
        else:
            for key in filesPerCat:
                filesPerCat[key] += fpcNw[key]
    return filesPerCat
        

In [5]:
# Maakt een dictionary aan; per categorie de spectrogrammen
def getSpectrosFromFilesPerCategory(filesPerCategory):
    spectros = {}
    for clz in classes:
        spectros[clz] = []
        for filename in filesPerCategory[clz]:
            fs, signal = wv.read(filename)
            freq_array, segment_times, spectrogram = sig.spectrogram(x=signal, fs=fs, nfft=NFFT, noverlap=0)
            spectros[clz].append(spectrogram.T)
    return spectros

In [6]:
def getClassLengths(spectrosPerCat):
    clzLengths = {}
    for clz in classes:
        clzLengths[clz] = sum([np.shape(lst)[0] for lst in spectrosPerCat[clz]])
    return clzLengths

In [7]:
# verwacht invoer van getSpectrosFromFilesPerCategory
# levert traindata op (X_train en Y_train)
def createTrainDataFromSpectros(spectrosPerCat, clzLengths):
    X_train = np.concatenate(spectrosPerCat[classes[0]], axis=0)
    for i in range(1, len(classes)):
        nwSpectros = np.concatenate(spectrosPerCat[classes[i]], axis=0)
        X_train = np.concatenate((X_train,nwSpectros), axis=0)
    
    # one-hot encoding voor Y_train
    nrFiles = clzLengths[classes[0]]
    Y_train = np.array((np.ones(nrFiles),np.zeros(nrFiles), np.zeros(nrFiles))).T

    nrFiles = clzLengths[classes[1]]
    Y_train_nw = np.array((np.zeros(nrFiles), np.ones(nrFiles), np.zeros(nrFiles))).T
    Y_train = np.concatenate((Y_train, Y_train_nw),axis=0)

    nrFiles = clzLengths[classes[2]]
    Y_train_nw = np.array((np.zeros(nrFiles), np.zeros(nrFiles), np.ones(nrFiles))).T
    Y_train = np.concatenate((Y_train, Y_train_nw),axis=0)
    
    return X_train, Y_train

### deep learning model

In [8]:
def create_model(layersizes):
    # create model
    model = Sequential()
    model.add(Dense(layersizes[0], input_dim=513, activation='relu'))
    for lsize in layersizes[1:]:
        model.add(Dense(lsize, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [9]:
def getModelFileName(modelFilePath, baseModelFilename, layers, nrEpochs):
    modelFilename = modelFilePath + baseModelFilename
    for lsize in layers:
        modelFilename = '{}_{}'.format(modelFilename, lsize)
    modelFilename += 'ep{}'.format(nrEpochs)
    modelFilename += '.hd5'
    return modelFilename

In [10]:
def getModelName(baseModelFilename, layers, nrEpochs):
    modelFilename = baseModelFilename
    for lsize in layers:
        modelFilename = '{}_{}'.format(modelFilename, lsize)
    modelFilename += 'ep{}'.format(nrEpochs)
    return modelFilename

In [11]:
def train_model(X_train, Y_train, layers, nrEpochs, modelFilePath, baseModelFilename):
    soundModel = create_model(layers)
    history = timeFunction(lambda: soundModel.fit(X_train,Y_train, epochs=nrEpochs, shuffle=True, verbose=1))
    soundModel.save(getModelFileName(modelFilePath, baseModelFilename, layers, nrEpochs))
    plt.plot(history.history['loss'])
    plt.show()

In [12]:
def evaluate_model(X_test, realClasses, layers, nrEpochs, modelFilePath, baseModelFilename):
    soundModel = load_model(getModelFileName(modelFilePath, baseModelFilename, layers, nrEpochs))

    # predicted classes
    predictions = soundModel.predict(X_test)
    predClasses = predictions.argmax(axis=1)

    matrix = ConfusionMatrix(classes)
    for vals in zip(realClasses, predClasses):
        matrix.add(int(vals[0]), int(vals[1]), 1)
    LOG(matrix.toString(),True)
    LOG('', True)
    LOG(matrix.toF1String(), True)
    return matrix

In [13]:
def train_and_evaluate_per_epoch(X_train, Y_train, realClasses, layers, nrEpochs, modelFilePath, baseModelFilename):
    soundModel = create_model(layers)
    for epNr in range(1, nrEpochs+1):
        LOG('\n*****************\n* Epoch nr {}\n*****************\n'.format(epNr), True)
        soundModel.fit(X_train,Y_train, epochs=1, shuffle=True, verbose=1)
        soundModel.save(getModelFileName(modelFilePath, baseModelFilename, layers, epNr))
        evaluate_model(X_train, realClasses, layers, epNr, modelFilePath, baseModelFilename)

### trainen van model : alleen orgWavDirs1

In [14]:
# bepaal train data
# srcDir = baseSrcDir + '/' + orgWavDirs1[0]
# fpc = getFilesPerCategory(srcDir)
fpc = getFilesPerCatFromMultipleDirs(orgWavDirs1, baseSrcDir)
spcs = getSpectrosFromFilesPerCategory(fpc)
clzLengths = getClassLengths(spcs)
X_train, Y_train = createTrainDataFromSpectros(spcs, clzLengths)
# real train classes
realTrainClasses = np.concatenate((np.zeros(clzLengths[classes[0]]), 
                              np.ones(clzLengths[classes[1]]), 
                              2*np.ones(clzLengths[classes[2]])))


In [24]:
baseModelFilename = '20180816_orgsHelft1'
modelFilePath = storageFolder
logPrefix = 'Orgs helft 1'

In [16]:
layers = [100, 20]

LOG_HEADER(logPrefix + ', lagen: [100, 20]', True)

nrEpochs = 5
train_and_evaluate_per_epoch(X_train, Y_train, realTrainClasses, layers, nrEpochs, modelFilePath, baseModelFilename)

####################################
#
# Orgs helft 1, lagen: [100, 20]
#
####################################


*****************
* Epoch nr 1
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         166836   15013     660 |  0.91  0.96
voice           4306  173966    4805 |  0.95  0.94
environment      625    7509  181267 |  0.96  0.98
--------------------------------------
prec            0.97    0.89    0.97

F1 overall: 0.94
F1 music: 0.94
F1 voice: 0.92
F1 environment: 0.96


*****************
* Epoch nr 2
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         172183    9703     623 |  0.94  0.97
voice           5667  172425    4985 |  0.94  0.96
environment      374    3964  185063 |  0.98  0.98
--------------------------------------
prec            0.97    0.93    0.97

F1 overall: 0.95
F1 music: 0.95
F1 voice: 0.93
F1 environment: 0.97


*****************
* Epoch nr 3
*****************

Epoch 1/1
     

In [17]:
layers = [400, 250, 100, 20]

LOG_HEADER(logPrefix + ', lagen: [400, 250, 100, 20]', True)

nrEpochs = 5
train_and_evaluate_per_epoch(X_train, Y_train, realTrainClasses, layers, nrEpochs, modelFilePath, baseModelFilename)

####################################
#
# Orgs helft 1, lagen: [400, 250, 100, 20]
#
####################################


*****************
* Epoch nr 1
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         169999   11888     622 |  0.93  0.97
voice           4388  176369    2320 |  0.96  0.95
environment      394    7813  181194 |  0.96  0.98
--------------------------------------
prec            0.97    0.90    0.98

F1 overall: 0.95
F1 music: 0.95
F1 voice: 0.93
F1 environment: 0.97


*****************
* Epoch nr 2
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         172075   10077     357 |  0.94  0.97
voice           3647  174493    4937 |  0.95  0.96
environment      351    2628  186422 |  0.98  0.99
--------------------------------------
prec            0.98    0.93    0.97

F1 overall: 0.96
F1 music: 0.96
F1 voice: 0.94
F1 environment: 0.98


*****************
* Epoch nr 3
*****************

Epoch

In [18]:
layers = [400, 300, 200, 100, 50, 20, 10]

LOG_HEADER(logPrefix + ', lagen: [400, 300, 200, 100, 50, 20, 10]', True)

nrEpochs = 5
train_and_evaluate_per_epoch(X_train, Y_train, realTrainClasses, layers, nrEpochs, modelFilePath, baseModelFilename)

####################################
#
# Orgs helft 1, lagen: [400, 300, 200, 100, 50, 20, 10]
#
####################################


*****************
* Epoch nr 1
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         171334   10815     360 |  0.94  0.97
voice           3928  174959    4190 |  0.96  0.96
environment      571    5285  183545 |  0.97  0.98
--------------------------------------
prec            0.97    0.92    0.98

F1 overall: 0.95
F1 music: 0.96
F1 voice: 0.94
F1 environment: 0.97


*****************
* Epoch nr 2
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         173546    8647     316 |  0.95  0.98
voice           4017  175382    3678 |  0.96  0.96
environment      539    3754  185108 |  0.98  0.99
--------------------------------------
prec            0.97    0.93    0.98

F1 overall: 0.96
F1 music: 0.96
F1 voice: 0.95
F1 environment: 0.98


*****************
* Epoch nr 3
***********

In [19]:
layers = [450, 400, 350, 300, 250, 200, 150, 100, 50, 21]

LOG_HEADER(logPrefix + ', lagen: [450, 400, 350, 300, 250, 200, 150, 100, 50, 21]', True)

nrEpochs = 5
train_and_evaluate_per_epoch(X_train, Y_train, realTrainClasses, layers, nrEpochs, modelFilePath, baseModelFilename)

####################################
#
# Orgs helft 1, lagen: [450, 400, 350, 300, 250, 200, 150, 100, 50, 21]
#
####################################


*****************
* Epoch nr 1
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         168214   13868     427 |  0.92  0.97
voice           2212  177480    3385 |  0.97  0.95
environment      666    6905  181830 |  0.96  0.98
--------------------------------------
prec            0.98    0.90    0.98

F1 overall: 0.95
F1 music: 0.95
F1 voice: 0.93
F1 environment: 0.97


*****************
* Epoch nr 2
*****************

Epoch 1/1
                   m       v       e |  sens   acc
music         173825    8122     562 |  0.95  0.97
voice           7344  172741    2992 |  0.94  0.95
environment      554    6560  182287 |  0.96  0.98
--------------------------------------
prec            0.96    0.92    0.98

F1 overall: 0.95
F1 music: 0.95
F1 voice: 0.93
F1 environment: 0.97


*****************
* Epoch 

### functies tbv testen

In [20]:
def createAndStoreTestData(wavFileParts, baseDir, fileDate, micNr, storeFilename, keyName):
    allSpectros = np.array([])
    allClasses = np.array([])

    for wfPt in wavFileParts: #type: WavFilePart
        if not 'Gunshot' in wfPt.getSoundType():
            filename = baseDir + '/{:d}_{:d}_mono{:d}.wav'.format(fileDate, wfPt.fileNr, micNr)
            fs, signal = wv.read(filename)

            classNr = classes.index(wfPt.getSoundType().lower())
            for soundChunk in wfPt.getSoundChunks(micNr):
                startFrame = int(soundChunk[0] * fs)
                endFrame = int(soundChunk[1] * fs)

                sigChunk = signal[startFrame: endFrame]
                freq_array, segment_times, spectrogram = sig.spectrogram(x=sigChunk, fs=fs, nfft=NFFT, noverlap=0)
                if len(allSpectros) == 0:
                    allSpectros = spectrogram.T
                else:
                    allSpectros = np.append(allSpectros, spectrogram.T, axis=0)
                allClasses = np.append(allClasses, classNr * np.ones(len(segment_times)))
                
    storeTestData(allSpectros, allClasses, storeFilename, keyName)


## maak spectra voor testdata

In [21]:
LOG_HEADER('maak spectra voor testdata Studio', True)
wvPts = readSoundChunksDynamic('chunks.Studio.soundChunks')
baseDir = recordingDir + '/20171011'
fileDate = 170816
filename = 'testData_Studio'
for micNr in [1,2,3,4]:
    keyname = 'mic{}'.format(micNr)
    createAndStoreTestData(wvPts, baseDir, fileDate, micNr, filename, keyname)

LOG('Klaar: spectra voor testdata Studio', True)

####################################
#
# maak spectra voor testdata Studio
#
####################################





Klaar: spectra voor testdata Studio


In [22]:
LOG_HEADER('maak spectra voor testdata G428', True)
wvPts = readSoundChunksDynamic('chunks.G428.soundChunks')
baseDir = recordingDir + '/20170221'
fileDate = 170221
filename = 'testData_G428'
for micNr in [1,2,3,4]:
    keyname = 'mic{}'.format(micNr)
    createAndStoreTestData(wvPts, baseDir, fileDate, micNr, filename, keyname)

LOG('Klaar: spectra voor testdata G428', True)

LOG_HEADER('maak spectra voor testdata G527', True)
wvPts = readSoundChunksDynamic('chunks.G527.soundChunks')
baseDir = recordingDir + '/20170221'
fileDate = 170221
filename = 'testData_G527'
for micNr in [1,2,3,4]:
    keyname = 'mic{}'.format(micNr)
    createAndStoreTestData(wvPts, baseDir, fileDate, micNr, filename, keyname)

LOG('Klaar: spectra voor testdata G527', True)


####################################
#
# maak spectra voor testdata G428
#
####################################





Klaar: spectra voor testdata G428
####################################
#
# maak spectra voor testdata G527
#
####################################

Klaar: spectra voor testdata G527


## Voer tests uit bij modellen met alle orgs

In [25]:
# model parameters
modelFilePath = storageFolder
layerss = [[100, 20], [400, 250, 100, 20], [400, 300, 200, 100, 50, 20, 10], [450, 400, 350, 300, 250, 200, 150, 100, 50, 21]]
nrsEpochs = [1,2,3,4,5]

# test data
micNr = 4
testFileNames = ['testData_G428', 'testData_G527', 'testData_Studio']
keyname = 'mic{}'.format(micNr)

for testFileName in testFileNames:
    for layers in layerss:
        for nrEpochs in nrsEpochs:
            testSpecs, testClasses = retrieveTestData(testFileName, keyname)

            LOG_HEADER(getModelName(baseModelFilename, layers, nrEpochs) 
                       + '\n# ' + keyname
                       + '\n# ' + testFileName,
                       True)
            matrix = evaluate_model(testSpecs, testClasses, layers, nrEpochs, modelFilePath, baseModelFilename)


####################################
#
# 20180816_orgsHelft1_100_20ep1
# mic4
# testData_G428
#
####################################

                   m       v       e |  sens   acc
music          19932     683      24 |  0.97  0.96
voice           1005   13175     256 |  0.91  0.96
environment      380      43   12802 |  0.97  0.99
--------------------------------------
prec            0.94    0.95    0.98

F1 overall: 0.95
F1 music: 0.95
F1 voice: 0.93
F1 environment: 0.97

####################################
#
# 20180816_orgsHelft1_100_20ep2
# mic4
# testData_G428
#
####################################

                   m       v       e |  sens   acc
music          19844     754      41 |  0.96  0.96
voice            953   13235     248 |  0.92  0.96
environment      232      16   12977 |  0.98  0.99
--------------------------------------
prec            0.94    0.95    0.98

F1 overall: 0.95
F1 music: 0.95
F1 voice: 0.93
F1 environment: 0.98

################################

####################################
#
# 20180816_orgsHelft1_450_400_350_300_250_200_150_100_50_21ep3
# mic4
# testData_G428
#
####################################

                   m       v       e |  sens   acc
music          19789     838      12 |  0.96  0.96
voice            780   13467     189 |  0.93  0.96
environment      368      66   12791 |  0.97  0.99
--------------------------------------
prec            0.95    0.94    0.98

F1 overall: 0.95
F1 music: 0.95
F1 voice: 0.93
F1 environment: 0.98

####################################
#
# 20180816_orgsHelft1_450_400_350_300_250_200_150_100_50_21ep4
# mic4
# testData_G428
#
####################################

                   m       v       e |  sens   acc
music          20096     536       7 |  0.97  0.93
voice           1582   12687     167 |  0.88  0.95
environment     1228      23   11974 |  0.91  0.97
--------------------------------------
prec            0.88    0.96    0.99

F1 overall: 0.93
F1 music: 0.92
F1 voic

                   m       v       e |  sens   acc
music           8796     114       7 |  0.99  0.98
voice            114    2751       1 |  0.96  0.98
environment        4       2    2485 |  1.00  1.00
--------------------------------------
prec            0.99    0.96    1.00

F1 overall: 0.98
F1 music: 0.99
F1 voice: 0.96
F1 environment: 1.00

####################################
#
# 20180816_orgsHelft1_450_400_350_300_250_200_150_100_50_21ep1
# mic4
# testData_G527
#
####################################

                   m       v       e |  sens   acc
music           8769     147       1 |  0.98  0.98
voice             71    2795       0 |  0.98  0.98
environment       12       2    2477 |  0.99  1.00
--------------------------------------
prec            0.99    0.95    1.00

F1 overall: 0.98
F1 music: 0.99
F1 voice: 0.96
F1 environment: 1.00

####################################
#
# 20180816_orgsHelft1_450_400_350_300_250_200_150_100_50_21ep2
# mic4
# testData_G527
#
########

####################################
#
# 20180816_orgsHelft1_400_300_200_100_50_20_10ep2
# mic4
# testData_Studio
#
####################################

                   m       v       e |  sens   acc
music          55970    6927     698 |  0.88  0.94
voice            657   36825     422 |  0.97  0.94
environment      173     743   41421 |  0.98  0.99
--------------------------------------
prec            0.99    0.83    0.97

F1 overall: 0.93
F1 music: 0.93
F1 voice: 0.89
F1 environment: 0.98

####################################
#
# 20180816_orgsHelft1_400_300_200_100_50_20_10ep3
# mic4
# testData_Studio
#
####################################

                   m       v       e |  sens   acc
music          57939    5035     621 |  0.91  0.94
voice           2759   34628     517 |  0.91  0.94
environment       81     494   41762 |  0.99  0.99
--------------------------------------
prec            0.95    0.86    0.97

F1 overall: 0.93
F1 music: 0.93
F1 voice: 0.89
F1 environment