In [72]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from keras import Sequential
from keras.layers import Flatten, Dense, Dropout, Normalization, BatchNormalization, LayerNormalization
from keras.layers import Conv1D, MaxPooling1D
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler, normalize
from sklearn.decomposition import PCA
from keras.utils import to_categorical

import tensorflow as tf

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC  
from sklearn.metrics import accuracy_score

In [73]:
def readData3x(file):
    value = []
    temp = []
    x = []
    y = []
    z = []
    with open(file, 'r') as f:
        for line in f.readlines():
            if (line[0] != '('):
                if temp: 
                    value.append(np.array(temp))
                temp = []
            else:
                data = line[1:-2].split(',')
                temp.append([float(data[0]), float(data[1]), float(data[2])])
    return value


In [74]:
wL = readData3x("hundred_right_lines/ControllerAngularVelocity.txt")
vL = readData3x("hundred_right_lines/ControllerVelocity.txt")

wI = readData3x("hundredInfinity/ControllerAngularVelocity.txt")
vI = readData3x("hundredInfinity/ControllerVelocity.txt")

wC = readData3x("hundredrightcircles/ControllerAngularVelocity.txt")
vC = readData3x("hundredrightcircles/ControllerVelocity.txt")

wS1 = readData3x("spell1CWC/ControllerAngularVelocity.txt")
vS1 = readData3x("spell1CWC/ControllerVelocity.txt")

wS2 = readData3x("spell2line/ControllerAngularVelocity.txt")
vS2 = readData3x("spell2line/ControllerVelocity.txt")

wS3 = readData3x("spell3/ControllerAngularVelocity.txt")
vS3 = readData3x("spell3/ControllerVelocity.txt")
            # This order of w first then v is important
# DataSet = {'line': [wL, vL], 'infinity': [wI, vI], 'circle': [wC, vC]}
# lenghtestlist = [wL, wI, wC]

DataSet = {'S1': [wS1, vS1], 'S2': [wS2, vS2], 'S3': [wS3, vS3]}
lenghtestlist = [v[0] for v in DataSet.values()]
for label, data in DataSet.items():
    print(label, "->", len(data[0]))

S1 -> 264
S2 -> 183
S3 -> 202


In [75]:
longestLengths = []
for type in lenghtestlist:
    longestLengths.append(max([x.shape[0] for x in type]))

lengthFinal = max(longestLengths)
print("length of each 2D sample", lengthFinal)

minSampleSize = min([len(i[0]) for i in DataSet.values()])
print("minimum samples per category are", minSampleSize)

length of each 2D sample 96
minimum samples per category are 183


In [76]:
newDataSet = []
y = []
for label, dataType in DataSet.items():
    dataTypeMatriciesLists = [] # list of data per datatypes for angualrData, velocity data
    for dataMatrixList in dataType: # loops through angularV, linearV, accelerations etcs...
        newDataMatrixList = [] # list of all sampels for that dataType
        for dataMatrix in dataMatrixList:
            # zero padding on both sides
            zerosLeft = (lengthFinal - dataMatrix.shape[0]) // 2
            zerosRight = (lengthFinal - dataMatrix.shape[0] - zerosLeft)
            # zero pad right side
            # zerosLeft = 0
            # zerosRight = lengthFinal - dataMatrix.shape[0]
            # zero padding only on right size
            newDataMatrix = np.pad(dataMatrix, ((zerosLeft, zerosRight), (0, 0)), 'constant')
            # print(dataMatrix.shape, '->', newDataMatrix.shape)
            
            newDataMatrixList.append(newDataMatrix)
        dataTypeMatriciesLists.append(np.array(newDataMatrixList))
        
    [print(label,i.shape) for i in dataTypeMatriciesLists]
    
    combinedData = np.dstack(dataTypeMatriciesLists)
    print("dstacked:", combinedData.shape)
    
    combinedData = combinedData[:minSampleSize, ] # chop of so all labels are balanced
    
    print("chopped to min sample size:", combinedData.shape, end="\n\n")
    newDataSet.append(combinedData)
    y.append(np.full((combinedData.shape[0], 1), label))
    # print("label dim:", y[-1].shape, end="\n\n")
    
newDataSet = np.vstack(newDataSet)
DataSety = np.vstack(y)

S1 (264, 96, 3)
S1 (264, 96, 3)
dstacked: (264, 96, 6)
chopped to min sample size: (183, 96, 6)

S2 (183, 96, 3)
S2 (183, 96, 3)
dstacked: (183, 96, 6)
chopped to min sample size: (183, 96, 6)

S3 (202, 96, 3)
S3 (202, 96, 3)
dstacked: (202, 96, 6)
chopped to min sample size: (183, 96, 6)



In [77]:
newDataSet.shape, DataSety.shape

((549, 96, 6), (549, 1))

In [78]:
stdScaler = StandardScaler()
mmScaler = MinMaxScaler()
stdX = np.array([stdScaler.fit_transform(newDataSet[x,]) for x in range(newDataSet.shape[0])])
# mmX =  np.array([mmScaler.fit_transform(newDataSet[x,]) for x in range(newDataSet.shape[0])])
normX = np.array([normalize(newDataSet[x,], norm='l1') for x in range(newDataSet.shape[0])])

label = LabelEncoder()
DataSety = label.fit_transform(DataSety)

[(label.classes_[i], '->', i) for i in range(len(label.classes_))]

  y = column_or_1d(y, warn=True)


[('S1', '->', 0), ('S2', '->', 1), ('S3', '->', 2)]

In [79]:
trainX, testX, trainy, testy = train_test_split(newDataSet, DataSety, test_size = 0.1, random_state = 30, stratify = DataSety)
trainX2D = trainX.reshape((trainX.shape[0], trainX.shape[1]*trainX.shape[2]))
testX2D  = testX.reshape((testX.shape[0], testX.shape[1]*testX.shape[2]))

print("train shape:", trainX.shape, "test shape", testX.shape)
print("Y train shape:", trainy.shape, "Y test shape", testy.shape)

# one hot encode y
trainy1h = to_categorical(trainy)
testy1h = to_categorical(testy)
print("one hot train shape:->", trainy1h.shape, "one hot test shape:->", testy1h.shape)

train shape: (494, 96, 6) test shape (55, 96, 6)
Y train shape: (494,) Y test shape (55,)
one hot train shape:-> (494, 3) one hot test shape:-> (55, 3)


In [80]:
trainy1h.shape

(494, 3)

In [81]:
n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy1h.shape[1]
modelCNN = Sequential()
modelCNN.add(Normalization(input_shape=(n_timesteps,n_features), name='input'))
modelCNN.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
modelCNN.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
modelCNN.add(Dropout(0.5))
modelCNN.add(MaxPooling1D(pool_size=2))
modelCNN.add(Flatten())
modelCNN.add(Dense(100, activation='relu'))
modelCNN.add(Dense(n_outputs, activation='softmax', name='output'))
modelCNN.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


In [82]:
modelCNN.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (Normalization)       (None, 96, 6)             13        
                                                                 
 conv1d_8 (Conv1D)           (None, 94, 64)            1216      
                                                                 
 conv1d_9 (Conv1D)           (None, 92, 64)            12352     
                                                                 
 dropout_4 (Dropout)         (None, 92, 64)            0         
                                                                 
 max_pooling1d_4 (MaxPoolin  (None, 46, 64)            0         
 g1D)                                                            
                                                                 
 flatten_4 (Flatten)         (None, 2944)              0         
                                                      

In [83]:
modelCNN.fit(trainX, trainy1h, epochs=10, batch_size=32, verbose=1)
_, accuracy = modelCNN.evaluate(testX, testy1h, batch_size=32, verbose=0)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [84]:
accuracy

0.9818181991577148

In [85]:
rf = RandomForestClassifier(100)
rf.fit(trainX2D, trainy)
y_pred = rf.predict(testX2D)
accuracy_score(testy, y_pred)

1.0

In [86]:
clf = SVC(kernel='linear') 
clf.fit(trainX2D, trainy) 
clf_pred = clf.predict(testX2D)
accuracy_score(testy, clf_pred)

0.9454545454545454

In [87]:
def manyof1cat(DataList):
    dataTypeMatriciesLists = [] # list of data per datatypes for angualrData, velocity data
    for dataMatrixList in DataList: # loops through angularV, linearV, accelerations etcs...
        newDataMatrixList = [] # list of all sampels for that dataType
        for dataMatrix in dataMatrixList:
            if dataMatrix.shape[0] >= lengthFinal:
                print("Larger than allowed,", dataMatrix.shape)
                dataMatrix = dataMatrix[:lengthFinal-dataMatrix.shape[0],:]
            zerosLeft = (lengthFinal - dataMatrix.shape[0]) // 2
            zerosRight = (lengthFinal - dataMatrix.shape[0] - zerosLeft)
            # zerosLeft = 0
            # zerosRight = (lengthFinal - dataMatrix.shape[0])
            # print(dataMatrix.shape, "->", np.pad(dataMatrix, ((zerosLeft, zerosRight), (0, 0)), 'constant').shape)
            # (53, 3) -> (188, 3)
            
            newDataMatrix = np.pad(dataMatrix, ((zerosLeft, zerosRight), (0, 0)), 'constant')
            # print(dataMatrix.shape, '->', newDataMatrix.shape)
            
            newDataMatrixList.append(newDataMatrix)
        dataTypeMatriciesLists.append(np.array(newDataMatrixList))
         
    combinedData = np.dstack(dataTypeMatriciesLists)
    print("dstacked:", combinedData.shape)

    # combinedData = combinedData[:minSampleSize, ] # chop of so all labels are balanced
    
    return combinedData


In [88]:
tst = 'S3'
swishData = manyof1cat(DataSet[tst])
swishData = swishData[minSampleSize:,]
# np.random.shuffle(swishData)
print("Testing data shape not seen", swishData.shape)

swishDatastd = np.array([stdScaler.fit_transform(swishData[x,]) for x in range(swishData.shape[0])])
# swishDatamm =  np.array([mmScaler.fit_transform(swishData[x,]) for x in range(swishData.shape[0])])
swishDatanorm = np.array([normalize(swishData[x,], norm='l1') for x in range(swishData.shape[0])])

swishy = np.full((swishData.shape[0],), label.transform([tst]))
swishy1h = to_categorical(swishy, n_outputs)

_, accCNN = modelCNN.evaluate(swishDatastd, swishy1h, batch_size=32, verbose=0)
print("Priniting CNN: ", tst, "accuracy:", accCNN)

swishDatastd2D = swishDatastd.reshape((swishDatastd.shape[0], swishDatastd.shape[1]*swishDatastd.shape[2]))
y_pred = rf.predict(swishDatastd2D)
clf_pred = clf.predict(swishDatastd2D)
rf_acc = accuracy_score(swishy, y_pred)
clf_acc = accuracy_score(swishy, clf_pred)


print("Priniting RF: ", tst, "accuracy:", rf_acc)
print("Priniting CLF: ", tst, "accuracy:", clf_acc)


Larger than allowed, (96, 3)
Larger than allowed, (96, 3)
dstacked: (202, 96, 6)
Testing data shape not seen (19, 96, 6)
Priniting CNN:  S3 accuracy: 1.0
Priniting RF:  S3 accuracy: 0.9473684210526315
Priniting CLF:  S3 accuracy: 0.5789473684210527


In [89]:

modelCNN.save('bestFuckingMLModel')


INFO:tensorflow:Assets written to: bestFuckingMLModel\assets


INFO:tensorflow:Assets written to: bestFuckingMLModel\assets


In [90]:

import os
os.chdir('./')
os.system('python -m tf2onnx.convert --saved-model bestFuckingMLModel --output bestFuckingModel.onnx')

0