In [2]:
from config import SEQ_LENGTH, FRAMERATE, CHUNK, FFT_SIZE
import generate_wav_samples as gen
from config import MORSE_CHR
import generator_test as gt

from tensorflow import keras
import librosa
import librosa.display
import scipy.io.wavfile
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import os
import itertools
from tqdm import tqdm
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D
from tensorflow.keras.layers import Input, Dense, Activation,TimeDistributed, GlobalMaxPooling1D
from tensorflow.keras.layers import Reshape, Lambda, Dropout, Bidirectional, Permute
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GRU, SimpleRNN,LSTM
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.preprocessing import image
import tensorflow.keras.callbacks
import pickle
import Levenshtein
import string
import pandas as pd
import bz2

In [3]:
OUTPUT_DIR = 'rnn_output'

### Здесь объявляем коллбэки

In [4]:
class VizCallback(tensorflow.keras.callbacks.Callback):
    def __init__(self, run_name, test_func, X):
        self.test_func = test_func
        self.output_dir = os.path.join(
            OUTPUT_DIR, run_name)
        self.X = X

    def show_edit_distance(self, num):
        print('edit distance: ', num)

    def on_epoch_end(self, epoch, logs={}):
        self.model.save_weights(os.path.join(self.output_dir, 'weights%02d.h5' % (epoch)))
        
        dec_len = 10
        print()
        for i in range(dec_len):
            labels = self.X[1][i:i+1]
            print('labels: ', labels_to_text([int(e) for e in labels[0]]))
        
        word_batch = self.X[0][:dec_len]
        res = decode_batch(self.test_func, word_batch)
        print()
        print('result lens: ', len(res))
        for e in res[:dec_len]:
            print(e)
        
        len_for_cer_count = 5000
        word_batch = self.X[0][:len_for_cer_count]
        res = decode_batch(self.test_func, word_batch)
        print()
        
        cers = []
        for i, t in enumerate(self.X[1][:len_for_cer_count]):
            true = labels_to_text(t)
            pred = res[i]

            c = cer(true, pred)

            cers.append(c)

        print(np.mean(cers))
            
def cer(true, pred):
    t = ''.join(true).strip()
    p = ''.join(pred).strip()
    distance = Levenshtein.distance(t, p)
    return distance / len(t) if len(t) > 0 else len(p)

def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    bc = K.ctc_batch_cost(labels, y_pred, input_length, label_length)
    return bc

def labels_to_text(i):
    return [MORSE_CHR[e] for e in i]

def decode_batch2(test_func, word_batch):
    out = test_func([word_batch])[0]
    ret = []
    print(np.argmax(out, axis = -1))
    return np.argmax(out, axis = -1)


def decode_batch(test_func, word_batch):
    out = test_func([word_batch])[0]
    r = np.argmax(out, axis=-1)
    
    res = []
    for a in r:
        sub_res = []
        for i, e in enumerate(a):
            if i == 0:
                sub_res.append(e)
                continue
            if (e == a[i-1]):
                continue
            if (e == len(MORSE_CHR) - 1):
                continue
            sub_res.append(e)
            
        sub_res = [e for e in sub_res if e != len(MORSE_CHR) - 1]
        sub_res = labels_to_text(sub_res)
        res.append(sub_res)
            
    return res



### Здесь настраиваем параметры для генерируемых данных и генерируем сами данные.
Для простоты загрузим уже готовое.

In [5]:
sample_len = SEQ_LENGTH

samples_count = 100000
sr = 8000
dict_len = len(MORSE_CHR)
max_seq_len = 5
mel_count = 1
mel_len = 161

In [6]:
dg = gen.DataGenerator()
g = dg.seq_generator(SEQ_LENGTH, FRAMERATE, 1, sr, mel_count)

In [7]:
def read_data(set_len, g):
    l = np.zeros([set_len, max_seq_len], dtype=np.int32)
    X = np.zeros([set_len,  mel_len, mel_count])
    input_length = np.zeros([set_len, 1], dtype=np.int32)
    label_length = np.zeros([set_len, 1], dtype=np.int32)

    i = 0
    for wave, label_indexes, labels, c, mel in tqdm(g):        
        if len(labels) > max_seq_len:
            continue
        
        X[i, :, :] = mel
        
        l[i, :len(labels)] = labels
        input_length[i, :] = mel.shape[0]
        
        label_length[i, :1] = c
        
        i+=1
        if i == set_len:
            break
        
    return [X, l, input_length, label_length], l

In [8]:
dataset = 'dataset_100k_digits_15-16wpm'#'dataset_100k_digits.pickle'

In [9]:
if False:
    X, l = gt.read_data(samples_count, g)

In [10]:
if False:
    with open(dataset, 'wb') as f:
        pickle.dump([X, l], f, protocol=4)

In [11]:
if False:
    with open(dataset, 'rb') as f:
        X, l = pickle.load(f)

In [12]:
if False:
    with bz2.BZ2File(f'{dataset}.pbz2', 'w') as f:
        pickle.dump([X, l], f)

In [13]:
if True:
    with bz2.BZ2File(f'{dataset}.pbz2', 'r') as f:
        X, l = pickle.load(f)

In [14]:
X_val, l_val = read_data(200, g)

191it [00:01, 133.15it/s]
Process Process-2:
Process Process-4:
Process Process-1:
Process Process-3:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/user/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/user/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/user/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/user/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/user/Dropbox/projs/MorseNet/generate_wav_samples.py", line 218, in do_work
    time.sleep(0.0100)
  File "/home/user/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/user/anaconda3/lib/python3.7/mu

### Настроим моделю

In [41]:
conv_filters = 64
kernel_size = 16
pool_size = 32
time_dense_size = 32
rnn_size = 32
minibatch_size = 32

In [85]:
import tensorflow.keras.backend as K

def channelPool(x):
    return K.max(x,axis=-1)


def get_model(optimizer):
    input_shape = (mel_len, mel_count)

    act = 'relu'
    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
    
    prev = None
    '''
    for i in range(2):
        conv = Conv1D(conv_filters, kernel_size, strides = 1, padding='same', 
                       activation=act, kernel_initializer='he_normal',
                       name=f'conv_{i}')(input_data if prev is None else prev)
        
        prev = conv
    ''' 
    
    conv1 = Conv1D(4, 16, strides = 1, padding='same', 
                       activation=act, kernel_initializer='he_normal',
                       name=f'conv_1')(input_data)
        
    conv10 = Conv1D(16, 64, strides = 1, padding='same', 
                       activation=act, kernel_initializer='he_normal',
                       name=f'conv_10')(conv1)
        
    conv2 = Conv1D(32, 64, strides = 1, padding='same', 
                       activation=act, kernel_initializer='he_normal',
                       name=f'conv_2')(conv10)
    
            
    conv3 = Conv1D(16, 64, strides = 1, padding='same', 
                       activation=act, kernel_initializer='he_normal',
                       name=f'conv_3')(conv2)
    
    
    srnn = SimpleRNN(32, return_sequences=True, kernel_initializer='he_normal')(conv3)
    #srnn2 = SimpleRNN(16, return_sequences=True, kernel_initializer='he_normal')(srnn)
    dense1 = Dense(dict_len, kernel_initializer='he_normal', name='dense1')(srnn)

    y_pred = Activation('softmax', name='softmax')(dense1)

    Model(inputs=input_data, outputs=y_pred).summary()

    labels = Input(name='the_labels', shape=[max_seq_len], dtype='float32')
    input_length = Input(name='input_length', shape=[1], dtype='int64')
    label_length = Input(name='label_length', shape=[1], dtype='int64')

    print(y_pred, labels, input_length, label_length)

    loss_out = Lambda(
        ctc_lambda_func, output_shape=(1,),
        name='ctc')([y_pred, labels, input_length, label_length])

    model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)

    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer)

    test_func = K.function([input_data], [y_pred])
    viz_cb = VizCallback('test', test_func, X_val)
    
    return model, viz_cb


In [88]:
model, viz_cb = get_model(RMSprop(lr=0.005))

Model: "model_30"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
the_input (InputLayer)       [(None, 161, 1)]          0         
_________________________________________________________________
conv_1 (Conv1D)              (None, 161, 4)            68        
_________________________________________________________________
conv_10 (Conv1D)             (None, 161, 16)           4112      
_________________________________________________________________
conv_2 (Conv1D)              (None, 161, 32)           32800     
_________________________________________________________________
conv_3 (Conv1D)              (None, 161, 16)           32784     
_________________________________________________________________
simple_rnn_20 (SimpleRNN)    (None, 161, 32)           1568      
_________________________________________________________________
dense1 (Dense)               (None, 161, 12)           396

In [89]:
import gc
gc.collect()

6620

In [92]:
model.fit(X, l, validation_split=0.1, batch_size=32, callbacks=[viz_cb], epochs=10)

Train on 90000 samples, validate on 10000 samples
Epoch 1/10
labels:  ['4', ' ', ' ', ' ', ' ']
labels:  ['9', ' ', ' ', ' ', ' ']
labels:  ['4', ' ', ' ', ' ', ' ']
labels:  ['1', ' ', ' ', ' ', ' ']
labels:  ['4', ' ', ' ', ' ', ' ']
labels:  ['6', '4', ' ', ' ', ' ']
labels:  ['8', '0', ' ', ' ', ' ']
labels:  ['4', '6', ' ', ' ', ' ']
labels:  ['1', '8', ' ', ' ', ' ']
labels:  ['5', ' ', '8', ' ', ' ']

result lens:  10
['4', ' ']
['9', ' ']
['4', ' ']
['1', ' ']
['4', ' ']
['6', '4', ' ']
['8', '0', ' ']
['4', '6', ' ']
['1', '8', ' ']
['5', ' ', '8', ' ']

0.03583333333333333
Epoch 2/10
labels:  ['4', ' ', ' ', ' ', ' ']
labels:  ['9', ' ', ' ', ' ', ' ']
labels:  ['4', ' ', ' ', ' ', ' ']
labels:  ['1', ' ', ' ', ' ', ' ']
labels:  ['4', ' ', ' ', ' ', ' ']
labels:  ['6', '4', ' ', ' ', ' ']
labels:  ['8', '0', ' ', ' ', ' ']
labels:  ['4', '6', ' ', ' ', ' ']
labels:  ['1', '8', ' ', ' ', ' ']
labels:  ['5', ' ', '8', ' ', ' ']

result lens:  10
['4', ' ']
['9', ' ']
['4', ' '

KeyboardInterrupt: 

In [91]:
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=RMSprop(lr=0.0005))

### Загрузим предобученную модель

In [93]:
model, viz_cb = get_model(RMSprop(lr=0.005))
model.load_weights('rnn_output/test/weights_best.h5')

Model: "model_32"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
the_input (InputLayer)       [(None, 161, 1)]          0         
_________________________________________________________________
conv_1 (Conv1D)              (None, 161, 4)            68        
_________________________________________________________________
conv_10 (Conv1D)             (None, 161, 16)           4112      
_________________________________________________________________
conv_2 (Conv1D)              (None, 161, 32)           32800     
_________________________________________________________________
conv_3 (Conv1D)              (None, 161, 16)           32784     
_________________________________________________________________
simple_rnn_21 (SimpleRNN)    (None, 161, 32)           1568      
_________________________________________________________________
dense1 (Dense)               (None, 161, 12)           396

In [94]:
get_all_layer_outputs = K.function([model.layers[0].input],
                                  [l.output for l in model.layers[1:] if l.name == 'softmax'])


In [95]:
decoded = decode_batch(get_all_layer_outputs, X_val[0])
decoded[:10]

[['4', ' '],
 ['9', ' '],
 ['4', ' '],
 ['1', ' '],
 ['4', ' '],
 ['6', '4', ' '],
 ['8', '0', ' '],
 ['4', '6', ' '],
 ['1', '8', ' '],
 ['5', ' ', '8', ' ']]

In [96]:
for i in l_val[:10]:
    print(labels_to_text(i))

['4', ' ', ' ', ' ', ' ']
['9', ' ', ' ', ' ', ' ']
['4', ' ', ' ', ' ', ' ']
['1', ' ', ' ', ' ', ' ']
['4', ' ', ' ', ' ', ' ']
['6', '4', ' ', ' ', ' ']
['8', '0', ' ', ' ', ' ']
['4', '6', ' ', ' ', ' ']
['1', '8', ' ', ' ', ' ']
['5', ' ', '8', ' ', ' ']


In [97]:
cers = []
for i, t in enumerate(l_val):
    true = labels_to_text(t)
    pred = decoded[i]
    
    c = cer(true, pred)
    
    cers.append(c)
    
print(np.mean(cers))

0.03083333333333333
