### Building MLP Model
This Notebook contains the minimum code needed to build, train and save the MLP model.

It starts with setting up required logic and pre-processing the data. Then builds and trains the model. Lastly the necessary models are saved.

In [28]:
import csv
import os

import numpy as np
import scipy as sp
import scipy.interpolate
import matplotlib.pyplot as plt

import seaborn as sns
from sklearn import *
from sklearn.model_selection import train_test_split

### Logic Setup

In [29]:
NUM_DATA_POINTS = 7000

In [30]:
ACC_PATH = './Accelerometer/'
GRAV_PATH = './Gravity/'
GYRO_PATH = './Gyroscope/'
ROT_PATH = './Rotation/'

In [31]:
file_names = os.listdir(ACC_PATH)
print(len(file_names))

1237


In [32]:
def fileName(file):
    return file.split('_')[0]

In [33]:
def parse(row):
    return list(map(lambda x: float(x), row))

def getData(file_path):
    res = []

    with open(file_path) as csvfile:
        r = csv.reader(csvfile, delimiter=',')

        for row in r:
            res.append(row)

    res = list(map(parse, res[1:]))
    
    for row in res:
        row[-1] = int(row[-1])
 
    return np.array(res)

In [34]:
def interpolate(data, numAfter):
    x = np.array(list(map(lambda x: x[-1], data)))

    new_x = np.linspace(x.min(), x.max(), numAfter)

    res = []
	
    for i in range(len(data[0]) - 1) :
        y = list(map(lambda x: x[i], data))

        new_y = sp.interpolate.interp1d(x, y, kind='cubic')(new_x)
        res.append(new_y)
        # np.append(res, [new_y])

    return np.array(res).transpose(1, 0)


In [35]:
def combineFile(file_name):
	name = fileName(file_name)

	accData = getData(ACC_PATH + file_name)
	gravData = getData(GRAV_PATH+ file_name)
	gyroData = getData(GYRO_PATH + file_name)
	rotData = getData(ROT_PATH + file_name)

	accData = interpolate(accData, NUM_DATA_POINTS)
	gravData = interpolate(gravData, NUM_DATA_POINTS)
	gyroData = interpolate(gyroData, NUM_DATA_POINTS)
	rotData = interpolate(rotData, NUM_DATA_POINTS)

	accumData = []

	for i in range(len(accData)):
		accumData.append(np.concatenate([accData[i], gyroData[i], gravData[i], rotData[i]]))

	return np.array(accumData)
	

In [36]:
combineFile('a_1_0.csv').shape

(7000, 12)

In [37]:
def fetchData(names):
    retDict = {}

    for file_name in names:
        name = fileName(file_name)
        data = combineFile(file_name)
        
        try:
            retDict[name].append(data)
        except:
            retDict[name] = [data]
    
    return retDict
            

In [38]:
dict = fetchData(os.listdir(ACC_PATH))

In [39]:
def setupData(dic, user, attacker):
    user_data = np.array(dic[user])
    attacker_data = np.concatenate(list(map(lambda x: dic[x], attacker)))

    user_labels = [1] * len(user_data)
    attacker_labels = [0] * len(attacker_data)

    all_labels = np.concatenate((user_labels, attacker_labels))
    all_data = np.concatenate([user_data, attacker_data])

    return (all_data, all_labels)

In [40]:
def setupDataAttackers(dic, attacker):
    attacker_data = np.concatenate(list(map(lambda x: dic[x], attacker)))
    attacker_labels = [0] * len(attacker_data)

    return (attacker_data,  attacker_labels)

In [41]:
def setupData_ident(dic, allpeople):
    all_data = np.concatenate(list(map(lambda x: dic[x], allpeople)))
    all_labels=[]
    
    i=0
    for peeps in dic.keys():
        all_labels = np.concatenate((all_labels, len(dict[peeps])*[i]))
        i=i+1

    return (all_data, all_labels)

In [42]:
%matplotlib inline
import IPython.core.display         
# setup output image format (Chrome works best)
IPython.core.display.set_matplotlib_formats("svg")
import matplotlib.pyplot as plt
import matplotlib
from numpy import *
import numpy as np

  IPython.core.display.set_matplotlib_formats("svg")


In [43]:
from scipy.fftpack import rfft, irfft, fftfreq, fft, ifft
import copy
from scipy import stats
from scipy import signal
from scipy import io

In [44]:
def lpbf(y):
    sos = signal.butter(10, 7, 'low', fs=1000, output='sos')
    filtered = signal.sosfilt(sos, y)
    #filtered=filtered[:, 200:6800]
    return filtered

In [45]:
d2 = {k: list(map(lambda a: lpbf(a.transpose(1,0)).transpose(1,0), v)) for k, v in dict.items()}

In [46]:
from sklearn import *
from scipy import stats
random.seed(100)
import csv
from scipy import io
import pickle
import keras
import pandas as pd
from keras import models
from keras import layers
from IPython.display import Audio, display
from sklearn.neural_network import MLPClassifier

import numpy as np
from sklearn.preprocessing import StandardScaler

In [47]:
def bow_transform(model, mfccs):
    numwords = model.cluster_centers_.shape[0]
    bows = zeros((len(mfccs), numwords))
    for i in range(len(mfccs)):
        w = model.predict(mfccs[i])
        bw = bincount(w, minlength=numwords)
        bows[i,:] = bw
    return bows

In [48]:
target = 'dd'
attackers = [x for x in dict.keys() if x != target]

(allData, allLabels) = setupData(dict, target, attackers)

print(allData.shape)
print(allLabels.shape)

(1237, 7000, 12)
(1237,)


In [49]:
combine=0
# for target in dict.keys():
for target in ['a']:
    attackers = [x for x in dict.keys() if x != target]
    (allData, allLabels) = setupData(d2, target, attackers)
    #allData=allData[:,:,-3:6]
    #np.delete(allData, np.s_[6:9], axis=1) 

    x_train, x_test, y_train, y_test = train_test_split(
        allData,
        allLabels,
        random_state=101,
        test_size = 0.3,
        shuffle=True
    )
    
    train_data=array(x_train)

    scaler = StandardScaler()
    num_instances, num_time_steps, num_features = train_data.shape
    train_data = np.reshape(train_data, newshape=(-1, num_features))
    train_data = scaler.fit_transform(train_data)

    x_train = np.reshape(train_data, newshape=(num_instances, num_time_steps, num_features))

    val_data=array(x_test)

    num_instances, num_time_steps, num_features = val_data.shape
    val_data = np.reshape(val_data, newshape=(-1, num_features))
    val_data = scaler.transform(val_data)

    x_test = np.reshape(val_data, newshape=(num_instances, num_time_steps, num_features)) 
    
    all_dmfccs = vstack(x_train)
    km = cluster.MiniBatchKMeans(n_clusters=80, random_state=5489, n_init=10, batch_size = 2048, verbose=0)
    km.fit(all_dmfccs[0::10])  # subsample by 10 to make it faster
    km.cluster_centers_
    train_bow = bow_transform(km, x_train)
    test_bow  = bow_transform(km, x_test)

    #feature extraction using BOAW
    tf_trans = feature_extraction.text.TfidfTransformer(use_idf=True, norm='l1')
    train_Xtf = tf_trans.fit_transform(train_bow)
    test_Xtf  = tf_trans.transform(test_bow)

    print(test_Xtf.shape)
    
    #print(train_Xtf.shape)
    
    paramgrid = {'C': logspace(-2,3,20), 
             'gamma': logspace(-4,3,20) }

    mlp = MLPClassifier(solver='adam', max_iter=5000, random_state=0, hidden_layer_sizes=[200, 200])
    mlp.fit(train_Xtf, y_train)

    predY = mlp.predict(test_Xtf)
    acc = metrics.accuracy_score(y_test, predY)
    print(target, ": ", acc)
    combine=combine+acc
    
combine/9

(372, 80)
a :  0.9435483870967742


0.10483870967741936

In [23]:
type(km)

sklearn.cluster._kmeans.MiniBatchKMeans

### Saving Model

In [50]:
import pickle

def save(model, name):
    with open(name, 'wb') as f:
        pickle.dump(model, f)

In [51]:
save(mlp, 'mlp_model.pkl')

In [52]:
save(km, 'km_model.pkl')

In [53]:
save(scaler, 'scaler.pkl')

In [54]:
save(tf_trans, 'tf_transformer.pkl')