In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, MaxPooling1D, Flatten
from keras.layers import LSTM,Convolution1D
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.utils.data_utils import get_file
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import random
import sys
import os
import glob

In [None]:
alldata = []
filenames = ['./Sec_III_A/LPF2.dat']

for filename in filenames:
    data = np.fromfile(filename, dtype='>i2')    
    data = data[2:]   # exclude first two header values
    data = data >> 3  # shift right by 3 bits  
    alldata.append(data)

mu, sigma = np.mean(data), np.std(data)
print mu, sigma
# the histogram of the data
n, bins, patches = plt.hist(data, 1024, normed=1, facecolor='green', alpha=0.75)

# add a 'best fit' line
y = mlab.normpdf( bins, mu, sigma)
l = plt.plot(bins, y, 'r--', linewidth=1)

plt.xlabel('Bins')
plt.ylabel('Probability')
plt.title(r'$\mathrm{Histogram\ of\ QNRG\ Electrical\ raw\ data:}\ \mu=%.2f,\ \sigma=%.2f$' % (mu,sigma))
#plt.axis([-10000, 10000, 0, 0.002])
plt.grid(True)

plt.show()

del data    
alldata = np.concatenate(alldata)    
print alldata.shape

In [None]:
text = alldata[:5000000]
test1 = alldata[5000000:6000000]
test2 = alldata[6000000:7000000]
test3 = alldata[7000000:8000000]
test4 = alldata[8000000:9000000]
test5 = alldata[9000000:]
text = list(text)
test1 = list(test1)
test2 = list(test2)
test3 = list(test3)
test4 = list(test4)
test5 = list(test5)

text = map(str,text)
test1 = map(str,test1)
test2 = map(str,test2)
test3 = map(str,test3)
test4 = map(str,test4)
test5 = map(str,test5)

print len(text), len(test1), len(test5)
print text[:10]


In [None]:
# Treating each number as a "word". Creating a dictionary.
alldata = alldata.astype(np.str)
chars = sorted(list(set(alldata)))

print(chars)
del alldata
print('Total words:', len(chars))

char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [None]:
# Length of input. Treating each input that consists of 100 "words" as a "sentence".
maxlen = 100
# Distance between 2 consecutive "sentences". Can be set larger for faster training
step = 3

sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: (i + maxlen)])
    next_chars.append(text[(i + maxlen)])
print('nb sequences:', len(sentences))


print('Start vectorization...')
X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    #print ('sentence', sentence)
    for t, char in enumerate(sentence):        
        X[i, t, char_indices[char]] = 1
    #print next_chars[i]
    y[i, char_indices[next_chars[i]]] = 1
print('Done vectorization!')    

In [None]:
# build the RCNN model
print('Build model...')
model = Sequential()
model.add(Convolution1D(nb_filter=64, filter_length=9, border_mode='same', activation='relu', input_shape=(maxlen, len(chars))))
model.add(MaxPooling1D(pool_length=2))
model.add(Convolution1D(nb_filter=128, filter_length=3, border_mode='same', activation='relu'))
model.add(MaxPooling1D(pool_length=2))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.0005)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
print model.summary()
early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1)
monitoring = ModelCheckpoint('weights_E3a_ch0_ver6.hdf5', monitor='val_loss', verbose=1, save_best_only=True)


In [None]:
model.fit(X, y, nb_epoch=50, batch_size=128, validation_split=0.2, verbose=1, callbacks=[early_stopping,monitoring])
model.load_weights('weights_E3a_ch0_ver6.hdf5')
#os.remove('weights.hdf5')

In [None]:
#Running test
tests = [test1,test2,test3,test4,test5]
del test1
del test2
del test3
del test4
del test5
for test in tests:
    maxlen = 100
    step = 1
    sentences = []
    next_chars = []
    for i in range(0, len(test) - maxlen, step):
        sentences.append(test[i: (i + maxlen)])
        next_chars.append(test[(i + maxlen)])
    print('nb sequences:', len(sentences))

    print('Vectorization...')
    Xt = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
    yt = np.zeros((len(sentences), len(chars)), dtype=np.bool)
    for i, sentence in enumerate(sentences):
        
        for t, char in enumerate(sentence):        
            Xt[i, t, char_indices[char]] = 1       
        yt[i, char_indices[next_chars[i]]] = 1
    n_true = 0
    diversity = 1
    for i,x in enumerate(Xt):
        if i % 100000 == 0:
            print ("Processed %d %d" % (i,n_true))
        x = x.reshape(1,maxlen,-1)
        preds = model.predict(x, verbose=0)[0]
        next_index = np.argmax(preds)      
        next_char = indices_char[next_index]
        if next_char == indices_char[np.argmax(yt[i])]:
            n_true += 1
    print ("%d_%d_%.5f" % (n_true,yt.shape[0],(float(n_true)/yt.shape[0])))        