# Attempt 1

This is the first attempt at generating a Neural Network which can recognize the cry of a child, using Keras and CNTK

In [69]:
# Enable intellisense
%config IPCompleter.greedy=True

In [53]:
from __future__ import print_function
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import BatchNormalization
from keras.utils import to_categorical
import numpy as np

Having imported the necessary libraries, we now move on to load the files.
The files contain the data for the different sets, and their labels are generated.

In [4]:
cries = np.load("../dataset/slow/cries.npy")
cry_labels = np.ones((cries.shape[0], 1), dtype=int)

noise = np.load("../dataset/slow/noise.npy")
noise_labels = np.zeros((noise.shape[0], 1), dtype=int)

print("Cries #:", cries.shape)
print("Noise #:", noise.shape)

Cries #: (9000L, 128L)
Noise #: (25000L, 128L)


The shape describes the two dimensions of the dataset, i.e. the amount of rows and their length.

Say with a matrix of (100, 128), there are 100 rows with a vector of length 128.

Now that we have our datasets loaded, we can design our neural network.

In [3]:
# Save the datasets as npy for faster loading
cries = np.loadtxt("../dataset/slow/cries_slow.csv", dtype=np.int16, delimiter=',')
np.save("../dataset/slow/cries", cries)
noise = np.loadtxt("../dataset/slow/noise_slow.csv", dtype=np.int16, delimiter=',')
np.save("../dataset/slow/noise", noise)

In [43]:
c_len = cries.shape[0]
c_cut = c_len/10*10
n_len = noise.shape[0]
n_cut = n_len/10*10

training_data = np.vstack((cries[:c_cut],noise[:n_cut]))
training_labels = np.vstack((cry_labels[:c_cut], noise_labels[:n_cut]))

testing_data = np.vstack((cries[c_cut:],noise[n_cut:]))
testing_labels = np.vstack((cry_labels[c_cut:], noise_labels[n_cut:]))

In [62]:
model = Sequential()
model.add(BatchNormalization(input_shape=(128,)))
model.add(Dense(units=4, activation="relu"))
model.add(Dense(units=1, activation="sigmoid"))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(training_data, training_labels, epochs=10, batch_size=128)
model.evaluate(testing_data, testing_labels, batch_size=128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[]

In [63]:
# true positive rate
tp = np.sum(model.predict_classes(cries))
tp_rate = float(tp)/cries.shape[0]

# false positive rate
fp = np.sum(model.predict_classes(noise))
fp_rate = float(fp)/noise.shape[0]

print("tp rate: ", tp_rate, "\nfp rate: ", fp_rate)

tp rate:  0.754555555556 
fp rate:  0.035


In [64]:
arr = np.arange(1024).reshape(8, 128)
arr.shape

(8L, 128L)

In [65]:
# See how the predictions spread across the noise levels
model.predict(arr)

array([[1.        ],
       [1.        ],
       [0.9999999 ],
       [0.9999981 ],
       [0.99995863],
       [0.99911195],
       [0.9812626 ],
       [0.7090931 ]], dtype=float32)

In [67]:
# Save the successful model
model.save("recognizer.h5")

Now the model can be used to load and respond to the NXT.

In [72]:
from receiver import NXTReceiver, unpack_u16

rc = NXTReceiver()

while True:
    lines = []
    for i in range(0, 5):
        line = rc.recv(256)
        lines.append([unpack_u16(line[i:i+2]) for i in range(0, line.__len__(), 2)])
    val = np.sum(model.predict_classes(np.vstack(lines))) / 5
    if val > 0.6:
        rc.sock.send(b'\x01')
        print(val)
    else:
        rc.sock.send(b'\x00')
    

Connecting via Bluetooth...
Connected.
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


KeyboardInterrupt: 