In [1]:
import pandas as pd
import os
import numpy as np

In [2]:
def compileFiles(directory):
    files = os.listdir(directory)

    dfs = list(map(lambda x : pd.read_csv(os.path.join(directory, x), skiprows=20), files))

    ch1 = list(map(lambda x : np.array(x["CH1"]), dfs))
    ch2 = list(map(lambda x : np.array(x["CH2"]), dfs))
    ch3 = list(map(lambda x : np.array(x["CH3"]), dfs))
    ch4 = list(map(lambda x : np.array(x["CH4"]), dfs))

    return files, ch1, ch2, ch3, ch4

In [3]:
files, ch1, ch2, ch3, ch4 = compileFiles("RAW_DATA")

In [4]:
ch1 = np.array(ch1)
ch2 = np.array(ch2)
ch3 = np.array(ch3)
ch4 = np.array(ch4)

In [5]:
files = os.listdir("RAW_DATA")

labels = np.array(list(map(lambda x : int(x[0]), files))).reshape(-1,1)

In [6]:
labels.shape

(2617, 1)

#### Storing the arrays locally for easy access

In [23]:
from scipy.io import savemat

savemat("RAW_DATA.mat", {"ch1" : ch1, "ch2" : ch2, "ch3" : ch3, "ch4" : ch4, "labels" : labels})

OSError: [Errno 28] No space left on device

#### Getting indices for records with null values

In [7]:
indices = list(np.argwhere(np.isnan(ch1).sum(axis = 1)))
indices += list(np.argwhere(np.isnan(ch1).sum(axis = 1)))
indices += list(np.argwhere(np.isnan(ch1).sum(axis = 1)))
indices += list(np.argwhere(np.isnan(ch1).sum(axis = 1)))
indices = list(map(lambda x : x[0], indices))

indices = list(set(indices))

#### Getting indices for records with inf values

In [8]:
indices += list(map(lambda x : x[0], list(np.argwhere((ch1 == np.inf).sum(axis = 1)))))
indices += list(map(lambda x : x[0], list(np.argwhere((ch2 == np.inf).sum(axis = 1)))))
indices += list(map(lambda x : x[0], list(np.argwhere((ch3 == np.inf).sum(axis = 1)))))
indices += list(map(lambda x : x[0], list(np.argwhere((ch4 == np.inf).sum(axis = 1)))))

#### Getting the indices to be included

In [9]:
includedIndices = np.arange(0, ch1.shape[0])
includedIndices = np.delete(includedIndices, indices)
includedIndices.shape

(2591,)

In [10]:
ch1 = ch1[includedIndices]
ch2 = ch2[includedIndices]
ch3 = ch3[includedIndices]
ch4 = ch4[includedIndices]

In [11]:
labels = labels[includedIndices]

In [12]:
from sklearn.model_selection import train_test_split

### CH1

#### Splitting

In [13]:
ch1_train, ch1_test, ch1_y_train, ch1_y_test = train_test_split(ch1, labels, test_size = 0.1, random_state = 0, stratify = labels)

In [14]:
ch1_train.shape

(2331, 100000)

In [15]:
ch1_train = np.reshape(ch1_train, (ch1_train.shape[0], ch1_train.shape[1], 1))
ch1_test = np.reshape(ch1_test, (ch1_test.shape[0], ch1_test.shape[1], 1))

#### Model

In [16]:
import tensorflow as tf

from keras import Sequential
from keras.layers import Conv1D, Dense, MaxPooling1D, Flatten
from keras.losses import SparseCategoricalCrossentropy 

In [17]:
model = Sequential([Conv1D(filters = 1, kernel_size = 4),
                    MaxPooling1D(2),
                    Conv1D(filters = 1, kernel_size = 4),
                    MaxPooling1D(2),
                    Conv1D(filters = 1, kernel_size = 4),
                    MaxPooling1D(2),
                    Conv1D(filters = 1, kernel_size = 4),
                    MaxPooling1D(2),
                    Conv1D(filters = 1, kernel_size = 4),
                    MaxPooling1D(2),
                    Conv1D(filters = 1, kernel_size = 4),
                    MaxPooling1D(2),
                    Flatten(),
                    Dense(1500, activation = "relu", input_shape = (1559,)),
                    Dense(750, activation = "relu"),
                    Dense(300, activation = "relu"),
                    Dense(150, activation = "relu"),
                    Dense(75, activation = "relu"),
                    Dense(20, activation = "relu"),
                    Dense(7, activation = "softmax")])

model.compile(loss = SparseCategoricalCrossentropy(), optimizer = "adam", metrics = "accuracy")

In [18]:
pred = model.predict(ch1_train)



In [19]:
pred

array([[0.1387787 , 0.14800897, 0.14160296, ..., 0.14262018, 0.14876102,
        0.14352004],
       [0.1395219 , 0.14911202, 0.14104955, ..., 0.14248724, 0.14837538,
        0.14326006],
       [0.13847148, 0.14799358, 0.1418604 , ..., 0.1430429 , 0.1486974 ,
        0.14354686],
       ...,
       [0.13903633, 0.14802751, 0.1414442 , ..., 0.14278702, 0.1485832 ,
        0.1434825 ],
       [0.13836911, 0.14829703, 0.14149764, ..., 0.14240208, 0.14879489,
        0.14393282],
       [0.13878866, 0.14813296, 0.14163451, ..., 0.14286482, 0.14898783,
        0.14326277]], dtype=float32)

In [20]:
model.fit(ch1_train, ch1_y_train, epochs = 100, validation_data=[ch1_test, ch1_y_test])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
 1/73 [..............................] - ETA: 5s - loss: 0.9307 - accuracy: 0.5312

KeyboardInterrupt: 