In [1]:
import base64
import IPython
import os
import uuid
import glob
import re

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.layers import Dropout
from keras.preprocessing import sequence

Using CNTK backend


In [2]:
os.chdir("c:/tensorflow_speech/")

In [3]:
import librosa

import numpy as np

import librosa.core
import librosa.display
import librosa.effects



In [23]:
import math

In [4]:
import keras
import keras.utils
import random
import numpy.random
import pickle

In [5]:
import keras.preprocessing.text

In [29]:
labels = glob.glob('*')

In [30]:
labels = [x for x in labels if not x.startswith("_")]

In [32]:
tokenizer = keras.preprocessing.text.Tokenizer(num_words=len(labels))

In [33]:
tokenizer.fit_on_texts(labels)

In [36]:
tokenizer.texts_to_matrix(labels)

array([[ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,

In [25]:
math.floor(len(labels)/ 16)

4045

In [50]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras Sequence dataset'
    
    def __init__(self, tokenizer, samples=10, batch_size=16, maxlen=5e5, n_classes=30, shuffle=True):
        'Initialize class'
        
        files = glob.glob('./*/*.wav')
        
        # ignore noises
        files = [x for x in files if not x.startswith("_")]
        
        self.batch_size = batch_size
        
        # labels
        self.file_to_label = {x:re.findall(r'\\(.*)\\(.*)$', x)[0][0] for x in files}
        
        # files
        self.files = files
        
        # params
        self.tokenizer = tokenizer
        self.maxlen = int(maxlen)
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.samples = len(files)
        

    def __len__(self):
        'Returns the number of batches per epoch'
        return math.floor(self.samples/ self.batch_size)
    

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        files = [self.files[random.randint(0, len(self.files)-1)] for x in range(self.batch_size)]
        X, y = self.__data_generation(files)

        return X, y


    def __data_generation(self, files):
        'Generates data containing batch_size samples' 
        
        X = np.empty((self.batch_size, self.maxlen))
        y = np.empty((self.batch_size), dtype=object)

        # Generate data
        for i, file in enumerate(files):
            # load file
            # MONO rate=8k auto trimming
            data, fs = librosa.load(file, mono=True, sr=8e3)
            trimed, index = librosa.effects.trim(data, top_db=29, frame_length=10)
            
            if self.shuffle:
                rs = random.randint(100,1000)
                rss = random.randint(5000,10000)

                smp = np.concatenate( (np.random.ranf(size=rs)/1e3 , trimed[rs:], np.random.ranf(size=rss)/1e3), axis=0)
            else:
                smp = trimed
            
            X[i,] = np.concatenate( (smp[: min(self.maxlen, len(smp))], np.zeros(( self.maxlen - min(self.maxlen, len(smp))))), axis=0)

            # Store class
            y[i] = self.file_to_label[file]
            # print(y)

        return X, tokenizer.texts_to_matrix(y) # Why? need + 1 ???

In [51]:
max_review_length = int(1e4)
embedding=100

In [52]:
train = DataGenerator(tokenizer, maxlen=max_review_length, batch_size=16, shuffle=True)
test = DataGenerator(tokenizer, maxlen=max_review_length, batch_size=16, shuffle=False)

In [53]:
classes = 30

In [54]:
model = Sequential()
# Embedding
model.add(Embedding(max_review_length, embedding, input_length=max_review_length))

# Convilution
model.add(Conv1D(filters=125, kernel_size=5, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(filters=16, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))

# Dropout
model.add(Dropout(0.2))

# LSTM
model.add(LSTM(100))

# Classification
model.add(Dense(classes, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 10000, 100)        1000000   
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 10000, 125)        62625     
_________________________________________________________________
max_pooling1d_7 (MaxPooling1 (None, 5000, 125)         0         
_________________________________________________________________
conv1d_8 (Conv1D)            (None, 5000, 32)          12032     
_________________________________________________________________
max_pooling1d_8 (MaxPooling1 (None, 2500, 32)          0         
_________________________________________________________________
conv1d_9 (Conv1D)            (None, 2500, 16)          1552      
_________________________________________________________________
max_pooling1d_9 (MaxPooling1 (None, 1250, 16)          0         
__________

In [None]:
model.fit_generator(generator=train, steps_per_epoch=50, epochs=1, validation_data=test)

Epoch 1/1


  (sample.dtype, var.uid, str(var.dtype)))
  (sample.dtype, var.uid, str(var.dtype)))




In [197]:
model.save("nr_5e_22c.h5")

In [189]:
pickle.dump(tokenizer, open("tokenizer_22.pkl", "wb"))