In [24]:
from config import SEQ_LENGTH, FRAMERATE, CHUNK, FFT_SIZE
import generate_wav_samples as gen
from config import MORSE_CHR
import generator_test as gt

from tensorflow import keras
import librosa
import librosa.display
import scipy.io.wavfile
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import os
import itertools
from tqdm import tqdm
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D
from tensorflow.keras.layers import Input, Dense, Activation,TimeDistributed, GlobalMaxPooling1D
from tensorflow.keras.layers import Reshape, Lambda, Dropout, Bidirectional, Permute
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GRU, SimpleRNN,LSTM
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.preprocessing import image
import tensorflow.keras.callbacks
import pickle
import Levenshtein
import string
import pandas as pd
import bz2

In [2]:
OUTPUT_DIR = 'rnn_output'

In [3]:
class VizCallback(tensorflow.keras.callbacks.Callback):
    def __init__(self, run_name, test_func, X):
        self.test_func = test_func
        self.output_dir = os.path.join(
            OUTPUT_DIR, run_name)
        self.X = X

    def show_edit_distance(self, num):
        print('edit distance: ', num)

    def on_epoch_end(self, epoch, logs={}):
        self.model.save_weights(os.path.join(self.output_dir, 'weights%02d.h5' % (epoch)))
        
        dec_len = 10
        for i in range(dec_len):
            labels = self.X[1][i:i+1]
            print('labels: ', labels_to_text([int(e) for e in labels[0]]))
        
        word_batch = self.X[0][:dec_len]
        res = decode_batch(self.test_func, word_batch)
        print('result lens: ', len(res))
        for e in res[:dec_len]:
            print(e)
        
        len_for_cer_count = 5000
        word_batch = self.X[0][:len_for_cer_count]
        res = decode_batch(self.test_func, word_batch)
        print()
        
        cers = []
        for i, t in enumerate(self.X[1][:len_for_cer_count]):
            true = labels_to_text(t)
            pred = res[i]

            c = cer(true, pred)

            cers.append(c)

        print(np.mean(cers))
            
def cer(true, pred):
    t = ''.join(true).strip()
    p = ''.join(pred).strip()
    distance = Levenshtein.distance(t, p)
    return distance / len(t) if len(t) > 0 else len(p)

def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    bc = K.ctc_batch_cost(labels, y_pred, input_length, label_length)
    return bc

def labels_to_text(i):
    return [MORSE_CHR[e] for e in i]

def decode_batch2(test_func, word_batch):
    out = test_func([word_batch])[0]
    ret = []
    print(np.argmax(out, axis = -1))
    return np.argmax(out, axis = -1)


def decode_batch(test_func, word_batch):
    out = test_func([word_batch])[0]
    r = np.argmax(out, axis=-1)
    
    res = []
    for a in r:
        sub_res = []
        for i, e in enumerate(a):
            if i == 0:
                sub_res.append(e)
                continue
            if (e == a[i-1]):
                continue
            if (e == len(MORSE_CHR) - 1):
                continue
            sub_res.append(e)
            
        sub_res = [e for e in sub_res if e != len(MORSE_CHR) - 1]
        sub_res = labels_to_text(sub_res)
        res.append(sub_res)
            
    return res



In [4]:
sample_len = SEQ_LENGTH

samples_count = 100000
sr = 8000
dict_len = len(MORSE_CHR)
max_seq_len = 5
mel_count = 1
mel_len = 161

In [5]:
dg = gen.DataGenerator()
g = dg.seq_generator(SEQ_LENGTH, FRAMERATE, 1, sr, mel_count)

In [6]:
def read_data(set_len, g):
    l = np.zeros([set_len, max_seq_len], dtype=np.int32)
    X = np.zeros([set_len,  mel_len, mel_count])
    input_length = np.zeros([set_len, 1], dtype=np.int32)
    label_length = np.zeros([set_len, 1], dtype=np.int32)

    i = 0
    for wave, label_indexes, labels, c, mel in tqdm(g):        
        if len(labels) > max_seq_len:
            continue
        
        X[i, :, :] = mel
        
        l[i, :len(labels)] = labels
        input_length[i, :] = mel.shape[0]
        
        label_length[i, :1] = c
        
        i+=1
        if i == set_len:
            break
        
    return [X, l, input_length, label_length], l

In [23]:
dataset = 'dataset_100k_digits_15-16wpm'#'dataset_100k_digits.pickle'

In [8]:
if False:
    X, l = gt.read_data(samples_count, g)

99991it [11:55, 151.65it/s]Process Process-3:
Process Process-2:
Process Process-4:
Process Process-1:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/user/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/user/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/user/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/user/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/user/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/user/Dropbox/projs/MorseNet/generate_wav_samples.py", line 218, in do_work
    time.sleep(0.0100)
  File "/home/user/Dropbox/projs/MorseNet/generate_wav_samples.py", line 218, i

In [22]:

import cPickle as pickle


if True:
    with open(dataset, 'wb') as f:
        pickle.dump([X, l], f, protocol=4)

In [10]:
if True:
    with open(dataset, 'rb') as f:
        X, l = pickle.load(f)

In [27]:
if True:
    with bz2.BZ2File(f'{dataset}.pbz2', 'w') as f:
        pickle.dump([X, l], f)

In [28]:
if True:
    with bz2.BZ2File(f'{dataset}.pbz2', 'r') as f:
        X, l = pickle.load(f)

In [10]:
X_val, l_val = read_data(200, g)


0it [00:00, ?it/s][A
29it [00:00, 244.30it/s][A
42it [00:00, 185.86it/s][A
55it [00:00, 164.13it/s][A
71it [00:00, 157.69it/s][A
87it [00:00, 154.28it/s][A
100it [00:00, 144.67it/s][A
114it [00:00, 141.87it/s][A
129it [00:00, 141.33it/s][A
143it [00:00, 138.87it/s][A
158it [00:01, 138.99it/s][A
175it [00:01, 144.64it/s][A
190it [00:01, 142.17it/s][A

In [29]:
conv_filters = 64
kernel_size = 16
pool_size = 32
time_dense_size = 32
rnn_size = 32
minibatch_size = 32

In [30]:
import tensorflow.keras.backend as K

def channelPool(x):
    return K.max(x,axis=-1)


def get_model(optimizer):
    input_shape = (mel_len, mel_count)

    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    
    prev = None
    for i in range(1):
        conv = Conv1D(conv_filters, kernel_size, strides = 1, padding='same', 
                       activation=act, kernel_initializer='he_normal',
                       name=f'conv_{i}')(input_data if prev is None else prev)
        
        prev = conv
    
    srnn = SimpleRNN(64, return_sequences=True, kernel_initializer='he_normal')(prev)
    dense1 = Dense(dict_len, kernel_initializer='he_normal', name='dense1')(srnn)

    y_pred = Activation('softmax', name='softmax')(dense1)

    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels', shape=[max_seq_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')

    print(y_pred, labels, input_length, label_length)

    loss_out = Lambda(
        ctc_lambda_func, output_shape=(1,),
        name='ctc')([y_pred, labels, input_length, label_length])

    model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)

    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer)

    test_func = K.function([input_data], [y_pred])
    viz_cb = VizCallback('test', test_func, X_val)
    
    return model, viz_cb


In [31]:
model, viz_cb = get_model(RMSprop(lr=0.005))

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
the_input (InputLayer)       [(None, 161, 1)]          0         
_________________________________________________________________
conv_0 (Conv1D)              (None, 161, 64)           1088      
_________________________________________________________________
conv_1 (Conv1D)              (None, 161, 64)           65600     
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 161, 64)           8256      
_________________________________________________________________
dense1 (Dense)               (None, 161, 12)           780       
_________________________________________________________________
softmax (Activation)         (None, 161, 12)           0         
Total params: 75,724
Trainable params: 75,724
Non-trainable params: 0
_______________________________________________________

In [32]:
import gc
gc.collect()

193323

In [None]:
model.fit(X, l, validation_split=0.1, batch_size=32, callbacks=[viz_cb], epochs=10)

Train on 90000 samples, validate on 10000 samples
Epoch 1/10
labels:  ['3', ' ', ' ', ' ', ' ']
labels:  ['1', ' ', ' ', ' ', ' ']
labels:  ['3', '4', ' ', ' ', ' ']
labels:  ['2', ' ', ' ', ' ', ' ']
labels:  ['8', ' ', ' ', ' ', ' ']
labels:  ['4', '4', ' ', ' ', ' ']
labels:  ['4', ' ', ' ', ' ', ' ']
labels:  ['5', ' ', ' ', ' ', ' ']
labels:  ['4', ' ', ' ', ' ', ' ']
result lens:  10
['4', ' ']
['4', ' ']
['4', ' ']
['4', ' ']
['4', ' ']
['4', ' ']
['4', ' ']
['4', ' ']
['4', ' ']
['4', ' ']

0.8833333333333334
Epoch 2/10
labels:  ['3', ' ', ' ', ' ', ' ']
labels:  ['1', ' ', ' ', ' ', ' ']
labels:  ['3', '4', ' ', ' ', ' ']
labels:  ['2', ' ', ' ', ' ', ' ']
labels:  ['8', ' ', ' ', ' ', ' ']
labels:  ['4', '4', ' ', ' ', ' ']
labels:  ['4', ' ', ' ', ' ', ' ']
labels:  ['5', ' ', ' ', ' ', ' ']
labels:  ['4', ' ', ' ', ' ', ' ']
result lens:  10
['7', ' ']
['7', ' ']
['7', ' ']
['7', ' ']
['7', ' ']
['7', ' ']
['7', ' ']
['7', ' ']
['7', ' ']
['7', ' ']

0.8641666666666667
Epoc

In [None]:
get_all_layer_outputs = K.function([model.layers[0].input],
                                  [l.output for l in model.layers[1:] if l.name == 'softmax'])


In [None]:
decoded = decode_batch(get_all_layer_outputs, X_val[0])
decoded[:10]

In [None]:
for i in l_val[:10]:
    print(labels_to_text(i))

In [None]:
cers = []
for i, t in enumerate(l_val):
    true = labels_to_text(t)
    pred = decoded[i]
    
    c = cer(true, pred)
    
    cers.append(c)
    
print(np.mean(cers))