In [30]:
import librosa
import librosa.display
import scipy.io.wavfile
import sklearn
import tensorflow as tf
import numpy as np
from config import SEQ_LENGTH, FRAMERATE, CHUNK, FFT_SIZE
import matplotlib.pyplot as plt
import generate_wav_samples as gen
import os
import keras
import itertools
from config import MORSE_CHR
from tqdm import tqdm

from keras import backend as K
from keras.layers.convolutional import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D
from keras.layers import Input, Dense, Activation,TimeDistributed
from keras.layers import Reshape, Lambda, Dropout
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers.recurrent import GRU, SimpleRNN,LSTM
from keras.optimizers import SGD, Adam
from keras.utils.data_utils import get_file
from keras.preprocessing import image
import keras.callbacks


In [2]:
OUTPUT_DIR = 'rnn_output'

In [3]:
class VizCallback(keras.callbacks.Callback):
    def __init__(self, run_name, test_func, X):
        self.test_func = test_func
        self.output_dir = os.path.join(
            OUTPUT_DIR, run_name)
        self.X = X

    def show_edit_distance(self, num):
        print('edit distance: ', num)
        """
        num_left = num
        mean_norm_ed = 0.0
        mean_ed = 0.0
        while num_left > 0:
            word_batch = next(self.text_img_gen)[0]
            num_proc = min(word_batch['the_input'].shape[0], num_left)
            decoded_res = decode_batch(self.test_func,
                                       word_batch['the_input'][0:num_proc])
            for j in range(num_proc):
                edit_dist = editdistance.eval(decoded_res[j],
                                              word_batch['source_str'][j])
                mean_ed += float(edit_dist)
                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
            num_left -= num_proc
        mean_norm_ed = mean_norm_ed / num
        mean_ed = mean_ed / num
        print('\nOut of %d samples:  Mean edit distance:'
              '%.3f Mean normalized edit distance: %0.3f'
              % (num, mean_ed, mean_norm_ed))
        """

    def on_epoch_end(self, epoch, logs={}):
        self.model.save_weights(os.path.join(self.output_dir, 'weights%02d.h5' % (epoch)))
        
        self.show_edit_distance(256)
                
        for i in range(5):
            labels = self.X[1][i:i+1]
            print('labels: ', labels_to_text([int(e) for e in labels[0]]))
        
        word_batch = self.X[0][:5]
        res = decode_batch(self.test_func, word_batch)
        print('result lens: ', len(res))
        for e in res[:5]:
            print(e)
            

def labels_to_text(i):
    return [MORSE_CHR[e] for e in i]

def decode_batch2(test_func, word_batch):
    out = test_func([word_batch])[0]
    ret = []
    print(np.argmax(out, axis = -1))
    return np.argmax(out, axis = -1)


def decode_batch(test_func, word_batch):
    out = test_func([word_batch])[0]
    r = np.argmax(out, axis=-1)
    #print('r: ', r)
    
    
    res = []
    for a in r:
        sub_res = []
        for i, e in enumerate(a):
            #print(i, e)
            if i == 0:
                sub_res.append(e)
                continue
            if (e == a[i-1]):
                continue
            if (e == len(MORSE_CHR) - 1):
                continue
            sub_res.append(e)
            
        sub_res = [e for e in sub_res if e != len(MORSE_CHR) - 1]
        sub_res = labels_to_text(sub_res)
        res.append(sub_res)
            
    #[e if (i==0 or c != bc[i-1] and c!=3)]
    #print('res: ', res)
    return res



In [4]:
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    #y_pred = y_pred[:, 2:, :]
    bc = K.ctc_batch_cost(labels, y_pred, input_length, label_length)
    return bc


In [5]:
sample_len = SEQ_LENGTH

#SEQ_LENGTH = 24000

samples_count = 10000
sample_len = 80000
sr = 8000
dict_len = len(MORSE_CHR)
max_seq_len = 8
mel_count = 1
mel_len = 160

In [6]:
g = gen.seq_generator(SEQ_LENGTH, FRAMERATE, 1)

In [7]:
def read_data(set_len):
    l = np.zeros([samples_count, max_seq_len], dtype=np.int32)
    #l += -1.0
    X = np.zeros([samples_count,  mel_len, mel_count])
    input_length = np.zeros([samples_count, 1], dtype=np.int32)
    label_length = np.zeros([samples_count, 1], dtype=np.int32)

    i = 0
    for wave, label_indexes, labels, c in tqdm(g):
        wave = wave.reshape(SEQ_LENGTH)
        wave = librosa.util.normalize(wave)
        mel = librosa.feature.melspectrogram(wave, sr=sr, n_fft=500, n_mels=mel_count,hop_length=250)
        #print(mel.shape)
        #librosa.display.specshow(mel)
        #plt.show()
        #raise Exception
        
        mel = mel.T
        mel = mel / np.max(mel)
        #mel = np.round(mel, decimals=4)

        X[i, :, :] = mel
        #labels = [l for l in labels if l != ' ']
        
        l[i, :len(labels)] = labels
        input_length[i, :] = mel.shape[0]
        
        label_length[i, :1] = c
        
        i+=1
        if i == set_len:
            break
        
    return [X, l, input_length, label_length], l

In [8]:
X, l = read_data(samples_count)

9990it [01:21, 108.56it/s]Process Process-2:
Process Process-1:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/user/Dropbox/projs/MorseNet/generate_wav_samples.py", line 214, in dowork
    q.put(generate_seq(seq_length, framerate))
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/user/Dropbox/projs/MorseNet/generate_wav_samples.py", line 214, in dowork
    q.put(generate_seq(seq_length, framerate))
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 82, in put
    if not self._sem.acquire(block, timeout):
  File "/usr/lib/python3.6/multiprocessing/queues.py"

In [9]:
l

array([[ 2,  7,  1, ...,  0,  0,  0],
       [ 2,  3,  0, ...,  0,  0,  0],
       [ 2,  0,  3, ...,  0,  0,  0],
       ...,
       [ 1,  0,  0, ...,  0,  0,  0],
       [ 7, 10,  0, ...,  0,  0,  0],
       [ 8,  0,  9, ...,  0,  0,  0]], dtype=int32)

In [10]:
X[0][0]

array([[0.00360813],
       [0.00498175],
       [0.00257211],
       [0.00394486],
       [0.00384482],
       [0.00357976],
       [0.0055256 ],
       [0.00548711],
       [0.00329935],
       [0.00591534],
       [0.00585535],
       [0.00524639],
       [0.00259898],
       [0.00372216],
       [0.00522193],
       [0.00373789],
       [0.00315027],
       [0.00349151],
       [0.00555732],
       [0.00271001],
       [0.00578838],
       [0.0604255 ],
       [0.67302122],
       [0.48984835],
       [0.00343293],
       [0.15237681],
       [0.73389329],
       [0.72138648],
       [0.76744827],
       [0.79423036],
       [0.60618904],
       [0.02105147],
       [0.75942428],
       [0.84560206],
       [0.90451319],
       [0.9109881 ],
       [0.91538416],
       [0.9139641 ],
       [0.70529538],
       [0.00427362],
       [0.25513827],
       [0.97129888],
       [0.95336171],
       [0.99629367],
       [0.954364  ],
       [0.19903213],
       [0.1901866 ],
       [1.   

In [11]:
X_val, l_val = read_data(10)


0it [00:00, ?it/s][A

In [12]:
dict_len

12

In [26]:
conv_filters = 32
kernel_size = 32
pool_size = 32
time_dense_size = 32
rnn_size = 32
minibatch_size = 32

In [27]:
ctc_lambda_func

<function __main__.ctc_lambda_func(args)>

In [31]:
input_shape = (mel_len, mel_count)

act = 'relu'
input_data = Input(name='the_input', shape=input_shape, dtype='float32')

inner = Conv1D(conv_filters, kernel_size, padding='same', 
               activation=act, kernel_initializer='he_normal',
               name='conv1')(input_data)
mp = MaxPooling1D(pool_size=pool_size, name='max1', strides=1, padding='same')(inner)

#inner2 = Conv1D(32, 4, padding='same', 
               #activation=act, kernel_initializer='he_normal',
               #name='conv2')(mp)
#mp2 = MaxPooling1D(pool_size=16, name='max2', strides=1, padding='same')(inner2)


gru = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(mp)
srnn = SimpleRNN(100, return_sequences=True)(mp)
lstm = LSTM(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='lstm')(mp)

#dense2 = Dense(128, kernel_initializer='he_normal', name='dense2')(gru)

dpo = Dropout(0.05, name='do1')(gru)
dense1 = Dense(dict_len, kernel_initializer='he_normal', name='dense1')(dpo)

y_pred = Activation('softmax', name='softmax')(dense1)

Model(inputs=input_data, outputs=y_pred).summary()

labels = Input(name='the_labels', shape=[max_seq_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')

print(y_pred, labels, input_length, label_length)

loss_out = Lambda(
    ctc_lambda_func, output_shape=(1,),
    name='ctc')([y_pred, labels, input_length, label_length])

# clipnorm seems to speeds up convergence
model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)

# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
sgd = SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) #
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)

test_func = K.function([input_data], [y_pred])
viz_cb = VizCallback('test', test_func, X_val)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
the_input (InputLayer)       (None, 160, 1)            0         
_________________________________________________________________
conv1 (Conv1D)               (None, 160, 32)           1056      
_________________________________________________________________
max1 (MaxPooling1D)          (None, 160, 32)           0         
_________________________________________________________________
gru1 (GRU)                   (None, 160, 32)           6240      
_________________________________________________________________
dropout_1 (Dropout)          (None, 160, 32)           0         
_________________________________________________________________
dense1 (Dense)               (None, 160, 12)           396       
___________________________

In [33]:
model.fit(X, l, validation_split=0.1, batch_size=1000, callbacks=[viz_cb], epochs=100)

Train on 9000 samples, validate on 1000 samples
Epoch 1/100
edit distance:  256
labels:  ['8', '9', ' ', ' ', ' ', ' ', ' ', ' ']
labels:  ['2', ' ', '1', '1', ' ', ' ', ' ', ' ']
labels:  ['3', '2', ' ', ' ', ' ', ' ', ' ', ' ']
labels:  ['7', '3', '9', ' ', ' ', ' ', ' ', ' ']
labels:  ['7', '9', ' ', ' ', ' ', ' ', ' ', ' ']
result lens:  5
['8', '9', ' ']
['2', ' ', '1', '1', ' ']
['3', '2', ' ']
['7', '3', '9', ' ']
['7', '9', ' ']
Epoch 2/100
edit distance:  256
labels:  ['8', '9', ' ', ' ', ' ', ' ', ' ', ' ']
labels:  ['2', ' ', '1', '1', ' ', ' ', ' ', ' ']
labels:  ['3', '2', ' ', ' ', ' ', ' ', ' ', ' ']
labels:  ['7', '3', '9', ' ', ' ', ' ', ' ', ' ']
labels:  ['7', '9', ' ', ' ', ' ', ' ', ' ', ' ']
result lens:  5
['8', '9', ' ']
['2', ' ', '1', '1', ' ']
['3', '2', ' ']
['7', '3', '9', ' ']
['7', '9', ' ']
Epoch 3/100
edit distance:  256
labels:  ['8', '9', ' ', ' ', ' ', ' ', ' ', ' ']
labels:  ['2', ' ', '1', '1', ' ', ' ', ' ', ' ']
labels:  ['3', '2', ' ', ' ', ' ',

edit distance:  256
labels:  ['8', '9', ' ', ' ', ' ', ' ', ' ', ' ']
labels:  ['2', ' ', '1', '1', ' ', ' ', ' ', ' ']
labels:  ['3', '2', ' ', ' ', ' ', ' ', ' ', ' ']
labels:  ['7', '3', '9', ' ', ' ', ' ', ' ', ' ']
labels:  ['7', '9', ' ', ' ', ' ', ' ', ' ', ' ']
result lens:  5
['8', '9', ' ']
['2', ' ', '1', '1', ' ']
['3', '2', ' ']
['7', '3', '9', ' ']
['7', '9', ' ']
Epoch 19/100
edit distance:  256
labels:  ['8', '9', ' ', ' ', ' ', ' ', ' ', ' ']
labels:  ['2', ' ', '1', '1', ' ', ' ', ' ', ' ']
labels:  ['3', '2', ' ', ' ', ' ', ' ', ' ', ' ']
labels:  ['7', '3', '9', ' ', ' ', ' ', ' ', ' ']
labels:  ['7', '9', ' ', ' ', ' ', ' ', ' ', ' ']
result lens:  5
['8', '9', ' ']
['2', ' ', '1', '1', ' ']
['3', '2', ' ']
['7', '3', '9', ' ']
['9', ' ']
Epoch 20/100
edit distance:  256
labels:  ['8', '9', ' ', ' ', ' ', ' ', ' ', ' ']
labels:  ['2', ' ', '1', '1', ' ', ' ', ' ', ' ']
labels:  ['3', '2', ' ', ' ', ' ', ' ', ' ', ' ']
labels:  ['7', '3', '9', ' ', ' ', ' ', ' ', ' 

KeyboardInterrupt: 

In [None]:
model.predict(X)