In [1]:
import keras
from keras.utils.np_utils import to_categorical

import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('./DATA/sensorless_data.csv')
data.head()

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,...,F40,F41,F42,F43,F44,F45,F46,F47,F48,TARGET
0,-3.0146e-07,8.2603e-06,-1.2e-05,-2e-06,-1.4386e-06,-2.1e-05,0.031718,0.03171,0.031721,-0.032963,...,-0.63308,2.9646,8.1198,-1.4961,-1.4961,-1.4961,-1.4996,-1.4996,-1.4996,1
1,2.9132e-06,-5.2477e-06,3e-06,-6e-06,2.7789e-06,-4e-06,0.030804,0.03081,0.030806,-0.03352,...,-0.59314,7.6252,6.169,-1.4967,-1.4967,-1.4967,-1.5005,-1.5005,-1.5005,1
2,-2.9517e-06,-3.184e-06,-1.6e-05,-1e-06,-1.5753e-06,1.7e-05,0.032877,0.03288,0.032896,-0.029834,...,-0.63252,2.7784,5.3017,-1.4983,-1.4983,-1.4982,-1.4985,-1.4985,-1.4985,1
3,-1.3226e-06,8.8201e-06,-1.6e-05,-5e-06,-7.2829e-07,4e-06,0.02941,0.029401,0.029417,-0.030156,...,-0.62289,6.5534,6.2606,-1.4963,-1.4963,-1.4963,-1.4975,-1.4975,-1.4976,1
4,-6.8366e-08,5.6663e-07,-2.6e-05,-6e-06,-7.9406e-07,1.3e-05,0.030119,0.030119,0.030145,-0.031393,...,-0.6301,4.5155,9.5231,-1.4958,-1.4958,-1.4958,-1.4959,-1.4959,-1.4959,1


### Format input data

In [3]:
x_cols = list(data.columns[:-1])
X_data = data[x_cols].copy()
# Adding 0 for easy reshaping
X_data['F49'] = 0
X_data.head()

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,...,F40,F41,F42,F43,F44,F45,F46,F47,F48,F49
0,-3.0146e-07,8.2603e-06,-1.2e-05,-2e-06,-1.4386e-06,-2.1e-05,0.031718,0.03171,0.031721,-0.032963,...,-0.63308,2.9646,8.1198,-1.4961,-1.4961,-1.4961,-1.4996,-1.4996,-1.4996,0
1,2.9132e-06,-5.2477e-06,3e-06,-6e-06,2.7789e-06,-4e-06,0.030804,0.03081,0.030806,-0.03352,...,-0.59314,7.6252,6.169,-1.4967,-1.4967,-1.4967,-1.5005,-1.5005,-1.5005,0
2,-2.9517e-06,-3.184e-06,-1.6e-05,-1e-06,-1.5753e-06,1.7e-05,0.032877,0.03288,0.032896,-0.029834,...,-0.63252,2.7784,5.3017,-1.4983,-1.4983,-1.4982,-1.4985,-1.4985,-1.4985,0
3,-1.3226e-06,8.8201e-06,-1.6e-05,-5e-06,-7.2829e-07,4e-06,0.02941,0.029401,0.029417,-0.030156,...,-0.62289,6.5534,6.2606,-1.4963,-1.4963,-1.4963,-1.4975,-1.4975,-1.4976,0
4,-6.8366e-08,5.6663e-07,-2.6e-05,-6e-06,-7.9406e-07,1.3e-05,0.030119,0.030119,0.030145,-0.031393,...,-0.6301,4.5155,9.5231,-1.4958,-1.4958,-1.4958,-1.4959,-1.4959,-1.4959,0


In [4]:
len(X_data)

58509

In [5]:
X = X_data.to_numpy().reshape((len(X_data),7,7,1))
X.shape

(58509, 7, 7, 1)

In [6]:
X[0].shape

(7, 7, 1)

In [7]:
data['TARGET'] = data['TARGET'] - 1
y = data['TARGET'].to_numpy()

In [8]:
y = to_categorical(y, num_classes=data['TARGET'].nunique())

### Randomizing dataset

In [9]:
idxs = np.arange(len(X))
samples = np.random.choice(idxs,size=10000)

X_rand = X[samples]
y_rand = y[samples]

### CNN Model

In [10]:
model = keras.models.Sequential([
    keras.layers.Conv2D(256, (2,2), strides=1, activation='relu', input_shape=(7,7,1)),
    keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
    keras.layers.Conv2D(256, (2,2), strides=1, activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dense(11, activation='softmax')
])

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 6, 6, 256)         1280      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 3, 3, 256)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 2, 2, 256)         262400    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 1, 1, 256)         0         
_________________________________________________________________
flatten (Flatten)            (None, 256)               0         
_________________________________________________________________
dense (Dense)                (None, 512)               131584    
_________________________________________________________________
dense_1 (Dense)              (None, 11)                5

In [12]:
y_rand[:25000][0].shape

(11,)

In [13]:
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
history = model.fit(X_rand , y_rand, steps_per_epoch=100, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
history.history['accuracy'][-1]

0.7714999914169312

### Running Experiment

In [15]:
all_fold_idxs = np.random.choice(idxs, size=len(idxs), replace=False)

nfolds = 3
fold_size = int(len(all_fold_idxs)/3)
folds = [all_fold_idxs[i:i+fold_size] for i in range(0, len(all_fold_idxs), fold_size)]

In [16]:
X_data.iloc[folds[0]]

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,...,F40,F41,F42,F43,F44,F45,F46,F47,F48,F49
13282,-2.498500e-06,0.000020,0.000142,3.876000e-06,0.000050,-0.000129,0.007317,0.007297,0.007155,0.025732,...,-0.66409,0.539460,9.2646,-1.5112,-1.5113,-1.5113,-1.4961,-1.4963,-1.4964,0
5174,1.544100e-05,0.000156,0.000303,-5.227900e-07,-0.000017,0.000292,0.016687,0.016531,0.016228,-0.024502,...,-0.51306,3.364600,4.6597,-1.5057,-1.5057,-1.5059,-1.4954,-1.4955,-1.4954,0
56792,-1.063200e-05,-0.000086,-0.000044,1.347900e-05,-0.000036,-0.000056,-0.116980,-0.116900,-0.116850,-0.123940,...,-0.70077,-0.227160,6.7801,-1.5010,-1.5010,-1.5012,-1.4975,-1.4976,-1.4978,0
50032,-4.186700e-06,0.000003,-0.000074,-4.039300e-07,-0.000005,-0.000037,-0.033640,-0.033643,-0.033569,-0.022103,...,-0.82356,16.849000,11.8230,-1.5014,-1.5015,-1.5013,-1.4977,-1.4977,-1.4976,0
56449,-9.420000e-06,0.000087,0.000153,-6.452900e-06,-0.000062,-0.000222,-0.103150,-0.103240,-0.103390,-0.129020,...,-0.60830,1.596300,2.3231,-1.5057,-1.5059,-1.5054,-1.4990,-1.4991,-1.4989,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,-4.421400e-07,0.000005,-0.000098,-4.069400e-06,-0.000018,-0.000052,0.028602,0.028597,0.028694,0.105540,...,-0.60829,1.117800,5.5143,-1.4993,-1.4994,-1.4987,-1.5013,-1.5014,-1.5011,0
14939,4.539500e-07,-0.000053,-0.000221,-1.003200e-05,0.000025,-0.000121,0.008172,0.008225,0.008446,0.002708,...,-0.67029,2.392700,2.3973,-1.5026,-1.5026,-1.5030,-1.4928,-1.4929,-1.4929,0
38209,-1.472900e-06,0.000001,0.000010,2.217300e-06,0.000004,0.000012,0.042767,0.042766,0.042756,0.021472,...,-0.43613,127.140000,8.0230,-1.5001,-1.5001,-1.5001,-1.4971,-1.4971,-1.4971,0
51274,1.211600e-05,0.000003,0.000111,2.695000e-06,0.000011,0.000060,-0.026697,-0.026700,-0.026811,-0.018667,...,-0.67809,1.038600,5.7108,-1.5090,-1.5091,-1.5090,-1.4978,-1.4978,-1.4974,0


In [17]:
len(all_fold_idxs)/3

19503.0

In [18]:
len(set(all_fold_idxs))

58509

In [19]:
import pickle

data_dct = {}
data_dct['X'] = X
data_dct['y'] = y
data_dct['folds'] = folds

with open('data_dct.pkl','wb') as f:
    pickle.dump(data_dct,f)

In [20]:
train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []
for fold_idx in range(nfolds):
    print("Running on fold " + str(fold_idx+1))
    X_test = X[folds[fold_idx]]
    y_test = y[folds[fold_idx]]
    
    train_fold_idxs = [i for i in range(nfolds) if i != fold_idx]
    train_idxs = np.concatenate([list(folds[idx]) for idx in train_fold_idxs])
    X_train = X[train_idxs]
    y_train = y[train_idxs]
    
    model = keras.models.Sequential([
        keras.layers.Conv2D(256, (2,2), strides=1, activation='relu', input_shape=(7,7,1)),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
        keras.layers.Conv2D(256, (2,2), strides=1, activation='relu'),
        keras.layers.MaxPooling2D(pool_size=(2, 2), strides=2),
        keras.layers.Flatten(),
        keras.layers.Dense(512, activation='relu'),
        keras.layers.Dense(11, activation='softmax')
    ])
    
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['categorical_accuracy'])
    history = model.fit(X_train , y_train, steps_per_epoch=100, epochs = 10)
    
    train_loss = history.history['loss'][-1]
    train_accuracy = history.history['categorical_accuracy'][-1]

    test_preds = model.predict(X_test)

    loss_calc = keras.metrics.CategoricalCrossentropy()
    loss_calc.reset_states()
    loss_calc.update_state(y_test, test_preds)
    test_loss = loss_calc.result().numpy()

    acc_calc = keras.metrics.CategoricalAccuracy()
    acc_calc.reset_states()
    acc_calc.update_state(y_test, test_preds)
    test_accuracy = acc_calc.result().numpy()

    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)

    # save model
    model.save('CNN_models/model_fold_' + str(fold_idx+1))


Running on fold 1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: CNN_models/model_fold_1\assets
Running on fold 2
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: CNN_models/model_fold_2\assets
Running on fold 3
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: CNN_models/model_fold_3\assets


In [21]:
test_accuracies

[0.814285, 0.841973, 0.8328462]

In [22]:
np.mean(test_accuracies)

0.8297014