In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Conv1D, LSTM, CuDNNLSTM, Flatten, Dropout
from keras import optimizers
from keras.utils import to_categorical

Using TensorFlow backend.


In [2]:
data = pd.read_csv("S1-drill.csv",header=None)
data.drop([35,36,37], axis=1, inplace=True) # drop columns 35,36,37 they're all zeros
data.columns = [i for i in range(data.shape[1])]
data.rename(columns ={110: 'labels'}, inplace =True)

In [3]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder()
one_hot_labels = encoder.fit_transform(data['labels'].values.reshape(-1,1)).toarray()
one_hot_labels.shape

(54915, 18)

In [4]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
data.drop(['labels'], 1, inplace=True)
data.shape
data = scaler.fit_transform(data)

In [5]:
class trainerHelper():
    
    def __init__(self, data, labels, win_size, num_sensor_data, num_classes):
        self.data = data
        self.labels = labels
        self.win_size = win_size
        self.sensor_data = num_sensor_data
        self.num_classes = num_classes
        self.data_length = self.data.shape[0]
        print(self.data_length)
        self.start = 0
        
    def windows(self):
 
        while self.start + self.win_size < self.data_length:
            yield int(self.start), int(self.start + self.win_size)
            self.start += (self.win_size/2)
    
    def segment_data(self):
        self.start = 0
        segments = np.empty((0, self.win_size, 110))
        labels= np.empty((0, self.num_classes))
        for (start, end) in self.windows():
            x = np.zeros((1, self.win_size, 110))
            x[0,:] = self.data[start:end,:]
            label = self.labels[start:end,:]
            if(x.shape[1] == self.win_size):
                segments = np.vstack([segments,x])
                lb = np.zeros((1, self.num_classes))
                lb[0,:] = label[-1]
                labels = np.vstack([labels,lb])
        return segments, labels

In [6]:
trainer_helper = trainerHelper(data, one_hot_labels, 15, 110, 18)

54915


In [7]:
segments, labels = trainer_helper.segment_data()

In [8]:
segments.shape

(7320, 15, 110)

In [9]:
labels.shape

(7320, 18)

In [10]:
tot_segments = segments
tot_labels = labels

In [11]:
# maybe we need to normalize the data
num_rows = segments.shape[1]
num_cols = segments.shape[2]
trainSplitRatio = 0.8
# reshaping segments for network input
reshapedSegments = segments.transpose(0,2,1)
# splitting segments in training and testing data
trainSplit = np.random.rand(len(reshapedSegments)) < trainSplitRatio
trainX = reshapedSegments[trainSplit]
testX = reshapedSegments[~trainSplit]
trainY = labels[trainSplit]
testY = labels[~trainSplit]

In [12]:
trainX.shape

(5881, 110, 15)

In [13]:
print(reshapedSegments[0][0])

[ 0.56200975  0.55776977  0.57197371  0.56794573  0.57070172  0.56476574
  0.55416578  0.55904176  0.55882976  0.55522578  0.55819377  0.54526182
  0.56010176  0.55458978  0.54610982]


In [14]:
model = Sequential() #permettere di usare la struttura a stack

In [15]:
win_size = 15
classes = 18
num_sensors = 110
kernel_height = 5
inputshape = (num_sensors, win_size)

In [16]:
model.add(Conv1D(64, kernel_size=kernel_height, strides=1,
                 activation='relu',
                 input_shape=(num_sensors,15)))

In [17]:
model.add(Conv1D(64, kernel_size=kernel_height, strides=1,
                 activation='relu'))

In [18]:
model.add(Conv1D(64, kernel_size=kernel_height, strides=1,
                 activation='relu'))

In [19]:
model.add(Conv1D(64, kernel_size=kernel_height, strides=1,
                 activation='relu'))

In [20]:
model.add(Dropout(0.5))

In [21]:
model.add(CuDNNLSTM(128, return_sequences=True))

Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [22]:
model.add(Dropout(0.5))

In [23]:
model.add(CuDNNLSTM(128, return_sequences=False))

In [24]:
model.add(Dropout(0.5))

In [25]:
model.add(Dense(classes, activation='softmax'))

In [26]:
adam = optimizers.RMSprop(lr=0.001, decay=1e-6) 
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [27]:
for layer in model.layers:
    print(layer.name)

conv1d_1
conv1d_2
conv1d_3
conv1d_4
dropout_1
cu_dnnlstm_1
dropout_2
cu_dnnlstm_2
dropout_3
dense_1


In [28]:
import keras as keras

In [None]:
batchSize = 100
keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)
model.fit(trainX,trainY, validation_split=1-trainSplitRatio,epochs=100,batch_size=batchSize,verbose=1)
score = model.evaluate(testX,testY,verbose=1)
model.save('tommyCUDACNNmodel.h5')

Train on 4704 samples, validate on 1177 samples
Epoch 1/100


In [38]:
print(score)

[0.38723762703712, 0.89189189193118301]


In [42]:
predictions = model.predict(testX)
predictions.shape

(1496, 18)

In [32]:
count = 0.0
den = 0.0
for pair in zip(predictions, testY):
    if np.argmax(pair[0]) == np.argmax(pair[1]):
        count += 1.0
    den += 1.0

print(count / den)

0.9211229946524064


In [38]:
pred_df = pd.DataFrame(predictions)
pred_df.to_csv('preds_test.csv', header=False, index=False)

In [39]:
true_df = pd.DataFrame(testY)
true_df.to_csv('true_test.csv', header=False, index=False)