## LSTMを使って、音声認識します！

In [4]:
import tensorflow as tf
import tflearn
import matplotlib.pyplot as plt
import librosa
import librosa.display
import numpy as np
from sklearn import mixture
from sklearn.externals import joblib

def shuffle_samples(X, Y):
    order = np.arange(X.shape[0])
    np.random.shuffle(order)
    X_result = np.zeros(X.shape)
    Y_result = np.zeros(Y.shape)
    for i in range(X.shape[0]):
        X_result[i,:,:] = X[order[i],:,:]
        Y_result[i,:] = Y[order[i],:]
    return X_result, Y_result

In [5]:
NUM_CLASS = 2 #識別クラス＝２
SAMPLE_LEN = 100  #1秒ごと

#バイナリ形式で、保存した特徴量をロード
feature1 = np.load('breath_normal.npy')
feature2 = np.load('breath_wrong.npy')
NUM_INPUT_DIM = feature1.shape[1]

featureALL = np.empty((0,SAMPLE_LEN,NUM_INPUT_DIM))
featureTmp = np.empty((1,SAMPLE_LEN,NUM_INPUT_DIM))
labelALL = np.empty((0,2))

## データの作成（特徴量ALL、ラベルALL）
特徴１も特徴２も混ぜ込んで１つのデータにします！

In [6]:
i = 0
while i+SAMPLE_LEN-1 < feature1.shape[0]:
    j = 0
    while j < SAMPLE_LEN:
        featureTmp[0,j,:] = feature1[i+j,:]
        j = j + 1
    i = i + j
    featureALL = np.append(featureALL, featureTmp, axis=0)
    labelALL = np.append(labelALL, np.array([[1,0]]), axis=0) #正解ラベル
    
i = 0
while i+SAMPLE_LEN-1 < feature2.shape[0]:
    j = 0
    while j < SAMPLE_LEN:
        featureTmp[0,j,:] = feature2[i+j,:]
        j = j + 1
    i = i + j
    featureALL = np.append(featureALL, featureTmp, axis=0)
    labelALL = np.append(labelALL, np.array([[0,1]]), axis=0)  #正解ラベル２

# データをランダムにして、LSTMの実行！

In [9]:
#ランダムに並べ直す。
trainX, trainY = shuffle_samples(featureALL,labelALL)

#初期化のおまじない。
tf.reset_default_graph()

#ニューラルネットの構成を指示する。
net=tflearn.input_data(shape=[None, SAMPLE_LEN, NUM_INPUT_DIM])
net=tflearn.lstm(net, 128)
net=tflearn.fully_connected(net, NUM_CLASS, activation='softmax')
net=tflearn.regression(net, optimizer='sgd', learning_rate=0.5, loss='categorical_crossentropy')

model = tflearn.DNN(net)
#学習を実行する。
model.fit(trainX, trainY, n_epoch=20, batch_size=10, validation_set=0.1, show_metric=True)

Training Step: 59  | total loss: [1m[32m0.30876[0m[0m | time: 0.105s
| SGD | epoch: 020 | loss: 0.30876 - acc: 0.9231 -- iter: 20/21
Training Step: 60  | total loss: [1m[32m0.26792[0m[0m | time: 1.165s
| SGD | epoch: 020 | loss: 0.26792 - acc: 0.9333 | val_loss: 1.57324 - val_acc: 0.0000 -- iter: 21/21
--


## モデルの保存＆下準備

In [None]:
#学習済モデルを保存する。
model.save('lstm_model.breath')

#事前確率の計算　学習データを変える。
prior = np.zeros(NUM_CLASS)
for i in range(0,trainY.shape[0]):
    if trainY[i,0] == 1:
        prior[0] += 1
    if trainY[i,1] == 1:
        prior[1] += 1
prior[0] = prior[0]/(prior[0]+prior[1])
prior[1] = 1 - prior[0]
np.save('lstm_prior.breath.npy', prior)

## モデルを使って診断しよう！

In [16]:
import tensorflow as tf 
import tflearn 
import matplotlib.pyplot as plt 
import librosa 
import librosa.display 
import numpy as np 
from sklearn import mixture 
from sklearn.externals import joblib

NUM_CLASS = 2 
SAMPLE_LEN = 100  #1秒ごと
#バイナリ形式でセーブした特徴量データをロード 
feature = np.load('breath_my.npy') 
#feature = np.load('c:/temp/work/data_feature_crack1.npy') 
NUM_INPUT_DIM = feature.shape[1]
featureALL = np.empty((0,SAMPLE_LEN,NUM_INPUT_DIM)) 
featureTmp = np.empty((1,SAMPLE_LEN,NUM_INPUT_DIM))

In [17]:
i = 0 
while i+SAMPLE_LEN-1 < feature.shape[0]: 
    j = 0 
    while j < SAMPLE_LEN: 
        featureTmp[0,j,:] = feature[i+j,:] 
        j = j + 1 
    i = i + j 
    featureALL = np.append(featureALL, featureTmp, axis=0)
    
testX = featureALL

## 診断します！

In [None]:
#初期化のおまじない 
tf.reset_default_graph()
#ニューラルネットワークの構成を指示する 
net=tflearn.input_data(shape=[None, SAMPLE_LEN, NUM_INPUT_DIM]) 
net=tflearn.lstm(net, 128) 
net=tflearn.fully_connected(net, NUM_CLASS, activation='softmax') 
net=tflearn.regression(net, optimizer='sgd', learning_rate=0.5, loss='categorical_crossentropy')
model = tflearn.DNN(net)
#学習済みデータの読み出し 
model.load('lstm_model.breath') 
#事前確率データの読み出し 
prior = np.load('lstm_prior.breath.npy')

In [20]:
#事後確率を得る 
pred_prob=np.array(model.predict(testX)) 
#尤度を得る 
pred_LL = pred_prob / prior
#平均の確率を得る。 
epsilon = np.full(pred_prob.shape, 0.0001) 
pred_prob_log = np.log(np.maximum(pred_prob,epsilon)) 
print(np.exp(np.mean(pred_prob_log,axis=0)))
#平均の尤度を得る。 
epsilon = np.full(pred_prob.shape, 0.0001)
pred_prob_log_LL = np.log(np.maximum(pred_LL,epsilon)) 
print(np.exp(np.mean(pred_prob_log_LL,axis=0)))

[0.54384183 0.18428244]
[0.93230028 0.44227786]


# スタッキングします。

In [None]:
NUM_CLASS = 2
SAMPLE_LEN = 100  
NUM_FRAME_STACK = 4

feature1 = np.load('****.npy')
feature2 = np.load('****.npy')
NUM_INPUT_DIM = feature1.shape[1]

featureALL = np.empty((0,int(SAMPLE_LEN/NUM_FRAME_STACK),NUM_INPUT_DIM*NUM_FRAME_STACK))
featureTmp = np.empty((1,int(SAMPLE_LEN/NUM_FRAME_STACK),NUM_INPUT_DIM*NUM_FRAME_STACK))
labelALL = np.empty((0,2))

i = 0
while i+SAMPLE_LEN-1 < feature1.shape[0]:
    j = 0
    while j < SAMPLE_LEN:
        m = int(j / NUM_FRAME_STACK)
        n = j - m*NUM_FRAME_STACK
        for k in range(0,NUM_INPUT_DIM):
            featureTmp[0,m,n*NUM_INPUT_DIM+k] = feature1[i+j,k]
        j = j + 1
    i = i + j
    featureALL = np.append(featureALL, featureTmp, axis=0)
    labelALL = np.append(labelALL, np.array([[1,0]]), axis=0) 
    
i = 0
while i+SAMPLE_LEN-1 < feature2.shape[0]:
    j = 0
    while j < SAMPLE_LEN:
        m = int(j / NUM_FRAME_STACK)
        n = j - m*NUM_FRAME_STACK
        for k in range(0,NUM_INPUT_DIM):
            featureTmp[0,m,n*NUM_INPUT_DIM+k] = feature2[i+j,k]
        j = j + 1
    i = i + j
    featureALL = np.append(featureALL, featureTmp, axis=0)
    labelALL = np.append(labelALL, np.array([[0,1]]), axis=0) #³ðx2

trainX, trainY = shuffle_samples(featureALL,labelALL)

tf.reset_default_graph()

net=tflearn.input_data(shape=[None, int(SAMPLE_LEN/NUM_FRAME_STACK),NUM_INPUT_DIM*NUM_FRAME_STACK])
net=tflearn.lstm(net, 128)
net=tflearn.fully_connected(net, NUM_CLASS, activation='softmax')
net=tflearn.regression(net, optimizer='sgd', learning_rate=0.5, loss='categorical_crossentropy')

model = tflearn.DNN(net)
model.fit(trainX, trainY, n_epoch=20, batch_size=10, validation_set=0.1, show_metric=True)

model.save('lstm_model_frame-stack.****')

prior = np.zeros(NUM_CLASS)
for i in range(0,trainY.shape[0]):
    if trainY[i,0] == 1:
        prior[0] += 1
    if trainY[i,1] == 1:
        prior[1] += 1
prior[0] = prior[0]/(prior[0]+prior[1])
prior[1] = 1 - prior[0]
np.save('Stack_lstm_prior.****.npy', prior)
