Import Statements

In [69]:
import pandas as pd
import numpy as np
import os
from collections import defaultdict
from collections import OrderedDict
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier

reading one file at a time

In [2]:
#trim the start and end of the dataframe specified by cutoff seconds
def trim(dataframe, cutoff, windowSize):
    cutoffTime = cutoff * 1000
    
    #trim from beginning
    previousTimestamp = dataframe[0].iloc[0]
    timeElapsed = 0
    for index, row in dataframe.iterrows():
        nextTimestamp = row[0]
        timeElapsed += (nextTimestamp - previousTimestamp)
        previousTimestamp = nextTimestamp
        if timeElapsed >= cutoffTime:
            cutoffIndex = index
            break
    
    cutoffRange = list(range(cutoffIndex))
    dataframe.drop(dataframe.index[cutoffRange], inplace=True)
    
    #trim from end
    #discard dataframe if it is smaller than windowsize
    if not checkFrame(dataframe, windowSize):
        return None
    
    numRows = dataframe.shape[0]
    nextTimestamp = dataframe[0].iloc[-1]
    timeElapsed = 0
    for index in range(numRows - 1, -1, -1):
        dataRow = dataframe.iloc[index]
        previousTimestamp = dataRow[0]
        timeElapsed += (nextTimestamp - previousTimestamp)
        nextTimestamp = previousTimestamp
        if timeElapsed >= cutoffTime:
            cutoffIndex = index
            break
            
    cutoffRange = list(range(cutoffIndex, numRows))
    dataframe.drop(dataframe.index[cutoffRange], inplace=True) 
    
    #discard dataframe if smaller than window size after trimming
    if not checkFrame(dataframe, windowSize):
        return None
    
    return dataframe

In [3]:
#check size of the dataframe and return False if dataframe smaller than windowsize
def checkFrame(df, window):
    if df.shape == (0, 0):
        return False
    
    startRow = df.iloc[0]
    startTime = startRow[0]
    
    endRow = df.iloc[-1]
    endTime = endRow[0]
    
    diff = endTime - startTime
    if diff < (window * 1000):
        return False
    
    return True

In [4]:
#perform statistical feature extraction and perform windowing
def preprocess(dataFrame, windowSize, isOverlapping=False, overlappingFactor=0):
    
    #preprocess data to contain non overlapping windows of size denoted by windowSize
    #extract statistical features such as mean and variance from the data
    #return the transformed dataframe
    
    #use the preprocessOverlapping function if we need overlapping windows
    if isOverlapping:
        return preprocessOverlapping(dataFrame, windowSize, overlappingFactor)
    
    columns = ['x_mean', 'y_mean', 'z_mean', 'x_variance', 'y_variance', 'z_variance', 'class_label']
    transformedDataframe = pd.DataFrame(columns=columns)
    windowStart = 0
    
    while True:
        windowEl = defaultdict(list)
        window = OrderedDict()
        windowStartTime = dataFrame.iloc[windowStart][0]
        
        for index in range(windowStart, dataFrame.shape[0]):
            row = dataFrame.iloc[index]
            time = row[0]
            
            if (time - windowStartTime) > (windowSize * 1000):
                #elements in window collected, process and add them to the transformed dataframe
                window['x_mean'] = np.mean(windowEl['x'])
                window['y_mean'] = np.mean(windowEl['y'])
                window['z_mean'] = np.mean(windowEl['z'])
                window['x_variance'] = np.var(windowEl['x'])
                window['y_variance'] = np.var(windowEl['y'])
                window['z_variance'] = np.var(windowEl['z'])
                window['class_label'] = windowEl['class']
                transformedDataframe = transformedDataframe.append(window, ignore_index=True)
                
                #set the starting index of the next window
                windowStart = index
                break
            else:
                windowEl['x'].append(row[1])
                windowEl['y'].append(row[2])
                windowEl['z'].append(row[3])
                windowEl['class'] = row[5]
        
        if windowStart == dataFrame.shape[0] or index == (dataFrame.shape[0] - 1):
            break
    return transformedDataframe

In [5]:
def preprocessOverlapping(dataFrame, windowSize, overlappingFactor):
    
    #preprocess data to contain overlapping windows of size denoted by windowSize
    #extract statistical features such as mean and variance from the data
    #return the transformed dataframe
    
    columns = ['x_mean', 'y_mean', 'z_mean', 'x_variance', 'y_variance', 'z_variance', 'class_label']
    transformedDataframe = pd.DataFrame(columns=columns)
    windowStart = 0
    overlapTime = (1 - overlappingFactor) * windowSize * 1000
    
    while True:
        windowEl = defaultdict(list)
        window = OrderedDict()
        windowStartTime = dataFrame.iloc[windowStart][0]
        overlapFound = False
        
        for index in range(windowStart, dataFrame.shape[0]):
            row = dataFrame.iloc[index]
            time = row[0]
            
            if (time - windowStartTime) >= overlapTime and overlapFound == False:
                overlapFound = True
                windowStart = index
                
            if (time - windowStartTime) > (windowSize * 1000):
                #elements in window collected, process and add them to the transformed dataframe
                window['x_mean'] = np.mean(windowEl['x'])
                window['y_mean'] = np.mean(windowEl['y'])
                window['z_mean'] = np.mean(windowEl['z'])
                window['x_variance'] = np.var(windowEl['x'])
                window['y_variance'] = np.var(windowEl['y'])
                window['z_variance'] = np.var(windowEl['z'])
                window['class_label'] = windowEl['class']
                transformedDataframe = transformedDataframe.append(window, ignore_index=True)
                break
            else:
                windowEl['x'].append(row[1])
                windowEl['y'].append(row[2])
                windowEl['z'].append(row[3])
                windowEl['class'] = row[5]

        if windowStart == dataFrame.shape[0] or index == (dataFrame.shape[0] - 1):
            break
    return transformedDataframe

In [6]:
def createDataframe(windowSize, cutoffSize, overlapping, overlappingFactor):
    rootDir = "E:\\UCLA\\CourseWork\\Winter2018\\CS205\\DecentLogger\\CleanData"
    accList = list()
    length = 0
    windowSize = 4
    for root, dirs, files in os.walk(rootDir):
        path = root.split(os.sep)
        #print(root)
        if root[-4:] == 'attr':
            continue
        else:
            for f in files:
                if f == '1_android.sensor.accelerometer.data.csv':
                    full_path = "/".join(path) + "/" + f
                    df = pd.read_csv(full_path,header=None)

                    #discard dataframe if smaller than window size
                    if not checkFrame(df, windowSize):
                        continue
                    else:
                        #trim the dataframe from start and end
                        df = trim(df, cutoff=trimCutoff, windowSize=windowSize)
                        if df is None:
                            continue
                        #extract features from dataframe and perform windowing
                        #print("Before: ", df.shape)
                        transformedDF = preprocess(df, windowSize=windowSize, isOverlapping=overlapping, overlappingFactor=overlappingFactor)
                        #print('After: ', transformedDF.shape)
                        #print('After: ', transformedDF.columns.values)
                        #append to the list of dataframes
                        accList.append(transformedDF)

    #create final dataframe
    accFrame = pd.concat(accList)
    return accFrame           
                


In [75]:
def getModel(modelName, args):
    
    if modelName == 'LogisticRegression':
        model = LogisticRegression(random_state=42)
        
    if modelName == 'SVC':
        model = SVC(random_state=42, kernel=args[0], C=args[1], decision_function_shape=args[2])
        
    if modelName == 'DecisionTreeClassifier':
        model = DecisionTreeClassifier(random_state=42, max_features=args[0], criterion=args[1])
        
    if modelName == 'RandomForestClassifier':
        model = RandomForestClassifier(n_estimators=args[0], criterion=args[1], max_features=args[2], max_depth=args[3], oob_score=True, random_state=42)
        
    if modelName == 'MLPClassifier':
        model = MLPClassifier(hidden_layer_sizes=args[0], activation=args[1], solver=args[2], random_state=42, max_iter=500)
        
    return model
        

In [20]:
from sklearn.model_selection import KFold
from sklearn import metrics
import sys

def classify(modelName, X, y, args=None, verbose=False):
    
    kf = KFold(n_splits=10, shuffle=True)

    foldAccuracy = list()
    foldPrecision = list()
    foldRecall = list()
    bestModel = None
    bestAccuracy = float(sys.maxsize) * (-1)
    
    fold = 1
    
    for trainIndex, testIndex in kf.split(X):
        XTrain, XTest = X[trainIndex], X[testIndex]
        yTrain, yTest = y[trainIndex], y[testIndex]

        model = getModel(modelName, args)
        model.fit(XTrain, yTrain)

        prediction = model.predict(XTest)

        if verbose:
            print("\nFold: ", fold)
            print("Confusion Matrix:")
            cm = metrics.confusion_matrix(yTest, prediction)
            print(cm)
            plt.matshow(cm, cmap = plt.cm.Oranges)
            plt.title('Confusion matrix')
            plt.colorbar()
            plt.ylabel('True label')
            plt.xlabel('Predicted label')
            plt.show()

        accuracy = metrics.accuracy_score(yTest, prediction)
        precision = metrics.precision_score(yTest, prediction, average = None)
        recall = metrics.recall_score(yTest, prediction, average = None)

        foldAccuracy.append(accuracy)
        foldPrecision.append(precision)
        foldRecall.append(recall)
        
        if accuracy > bestAccuracy:
            bestAccuracy = accuracy
            bestModel = model
        
        fold += 1
        
    print("\nBest Accuracy: ", bestAccuracy)
    
    return bestModel, foldAccuracy, foldPrecision, foldRecall

In [9]:
#create data
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

def createDataMatrix(dataframe):

    dataMatrix = dataframe.as_matrix()
    X = dataMatrix[:, :dataMatrix.shape[1] - 1]

    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    y = dataMatrix[:, -1]

    le = LabelEncoder()
    y = le.fit_transform(y)
    y = y.astype('int')
    
    return X, y

1 <br>
    trimCutoff = 5s <br>
    windowSize = 2s <br>
    overlapping = False <br>
    overlappingFactor = 0 <br>

In [None]:
#windowing parameters
trimCutoff = 5
windowSize = 2
overlapping = False
overlappingFactor = 0

In [None]:
#create data
df = createDataframe(windowSize, trimCutoff, overlapping, overlappingFactor)
X, y = createDataMatrix(df)

print("Shape of X: ", X.shape)
print("Shape of Y: ", y.shape)

In [None]:
#logistic Regression
lrModel, lrAccuracy, lrPrecision, lrRecall = classify('LogisticRegression', X, y)

In [None]:
#hard margin SVM 1

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovo'

args = ['rbf', 1000, 'ovo']

hmSVC1Model, hmSVC1Accuracy, hmSVC1Precision, hmSVC1Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 2

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovr'

args = ['rbf', 1000, 'ovr']

hmSVC2Model, hmSVC2Accuracy, hmSVC2Precision, hmSVC2Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 3

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovo'

args = ['linear', 1000, 'ovo']

hmSVC3Model, hmSVC3Accuracy, hmSVC3Precision, hmSVC3Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 4

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovr'

args = ['linear', 1000, 'ovr']

hmSVC4Model, hmSVC4Accuracy, hmSVC4Precision, hmSVC4Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 1

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['rbf', 0.001, 'ovo']

smSVC1Model, smSVC1Accuracy, smSVC1Precision, smSVC1Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 2

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['rbf', 0.001, 'ovr']

smSVC2Model, smSVC2Accuracy, smSVC2Precision, smSVC2Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 3

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['linear', 0.001, 'ovo']

smSVC3Model, smSVC3Accuracy, smSVC3Precision, smSVC3Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 4

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['linear', 0.001, 'ovr']

smSVC4Model, smSVC4Accuracy, smSVC4Precision, smSVC4Recall = classify('SVC', X, y, args)

In [None]:
#decision Tree 1

#max_features = 6
#criterion = 'gini'

args = [6, 'gini']

dt1Model, dt1Accuracy, dt1Precision, dt1Recall = classify('DecisionTreeClassifier', X, y, args)

In [None]:
#decision Tree 2

#max_features = 6
#criterion = 'entropy'

args = [6, 'entropy']

dt2Model, dt2Accuracy, dt2Precision, dt2Recall = classify('DecisionTreeClassifier', X, y, args)

2 <br>
    trimCutoff = 5s <br>
    windowSize = 4s <br>
    overlapping = False <br>
    overlappingFactor = 0 <br>

In [13]:
#windowing parameters
trimCutoff = 5
windowSize = 4
overlapping = False
overlappingFactor = 0

In [14]:
#create data
df = createDataframe(windowSize, trimCutoff, overlapping, overlappingFactor)
X, y = createDataMatrix(df)

print("Shape of X: ", X.shape)
print("Shape of Y: ", y.shape)

Shape of X:  (1160, 6)
Shape of Y:  (1160,)




In [23]:
#logistic Regression
lrModel, lrAccuracy, lrPrecision, lrRecall = classify('LogisticRegression', X, y)


Best Accuracy:  0.8448275862068966


In [24]:
#hard margin SVM 1

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovo'

args = ['rbf', 1000, 'ovo']

hmSVC1Model, hmSVC1Accuracy, hmSVC1Precision, hmSVC1Recall = classify('SVC', X, y, args)


Best Accuracy:  0.9396551724137931


In [25]:
#hard margin SVM 2

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovr'

args = ['rbf', 1000, 'ovr']

hmSVC2Model, hmSVC2Accuracy, hmSVC2Precision, hmSVC2Recall = classify('SVC', X, y, args)


Best Accuracy:  0.9310344827586207


In [26]:
#hard margin SVM 3

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovo'

args = ['linear', 1000, 'ovo']

hmSVC3Model, hmSVC3Accuracy, hmSVC3Precision, hmSVC3Recall = classify('SVC', X, y, args)


Best Accuracy:  0.7931034482758621


In [27]:
#hard margin SVM 4

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovr'

args = ['linear', 1000, 'ovr']

hmSVC4Model, hmSVC4Accuracy, hmSVC4Precision, hmSVC4Recall = classify('SVC', X, y, args)


Best Accuracy:  0.8275862068965517


In [28]:
#soft margin SVM 1

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['rbf', 0.001, 'ovo']

smSVC1Model, smSVC1Accuracy, smSVC1Precision, smSVC1Recall = classify('SVC', X, y, args)

  'precision', 'predicted', average, warn_for)



Best Accuracy:  0.5


In [29]:
#soft margin SVM 2

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['rbf', 0.001, 'ovr']

smSVC2Model, smSVC2Accuracy, smSVC2Precision, smSVC2Recall = classify('SVC', X, y, args)

  'precision', 'predicted', average, warn_for)



Best Accuracy:  0.35344827586206895


In [30]:
#soft margin SVM 3

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['linear', 0.001, 'ovo']

smSVC3Model, smSVC3Accuracy, smSVC3Precision, smSVC3Recall = classify('SVC', X, y, args)


Best Accuracy:  0.5862068965517241


  'precision', 'predicted', average, warn_for)


In [31]:
#soft margin SVM 4

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['linear', 0.001, 'ovr']

smSVC4Model, smSVC4Accuracy, smSVC4Precision, smSVC4Recall = classify('SVC', X, y, args)

  'precision', 'predicted', average, warn_for)



Best Accuracy:  0.6206896551724138


In [32]:
#decision Tree 1

#max_features = 6
#criterion = 'gini'

args = [6, 'gini']

dt1Model, dt1Accuracy, dt1Precision, dt1Recall = classify('DecisionTreeClassifier', X, y, args)


Best Accuracy:  0.9482758620689655


In [33]:
#decision Tree 2

#max_features = 6
#criterion = 'entropy'

args = [6, 'entropy']

dt2Model, dt2Accuracy, dt2Precision, dt2Recall = classify('DecisionTreeClassifier', X, y, args)


Best Accuracy:  0.9396551724137931


In [72]:
#Random Forest Classification 1

#n_estimators = 20
#criterion = gini
#max_features = 6
#max_depth = 5

args = [20, 'gini', 6, 5]

rf1Model, rf1Accuracy, rf1Precision, rf1Recall = classify('RandomForestClassifier', X, y, args)

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])



Best Accuracy:  0.9482758620689655


  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


In [73]:
#Random Forest Classification 2

#n_estimators = 20
#criterion = entropy
#max_features = 6
#max_depth = 5

args = [20, 'entropy', 6, 5]

rf2Model, rf2Accuracy, rf2Precision, rf2Recall = classify('RandomForestClassifier', X, y, args)

  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])



Best Accuracy:  0.9310344827586207


  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])
  warn("Some inputs do not have OOB scores. "
  predictions[k].sum(axis=1)[:, np.newaxis])


In [76]:
#Multi Layer Perceptron Classification 1

#hidden_layer_sizes = 30
#activation = relu
#solver = adam

args = [30, 'relu', 'adam']

mlp1Model, mlp1Accuracy, mlp1Precision, mlp1Recall = classify('MLPClassifier', X, y, args)




Best Accuracy:  0.9224137931034483


In [77]:
#Multi Layer Perceptron Classification 2

#hidden_layer_sizes = 50
#activation = tanh
#solver = lbfgs

args = [50, 'tanh', 'lbfgs']

mlp2Model, mlp2Accuracy, mlp2Precision, mlp2Recall = classify('MLPClassifier', X, y, args)


Best Accuracy:  0.9655172413793104


3 <br>
    trimCutoff = 5s <br>
    windowSize = 8s <br>
    overlapping = False <br>
    overlappingFactor = 0 <br>

In [None]:
#windowing parameters
trimCutoff = 5
windowSize = 8
overlapping = False
overlappingFactor = 0

In [None]:
#create data
df = createDataframe(windowSize, trimCutoff, overlapping, overlappingFactor)
X, y = createDataMatrix(df)

print("Shape of X: ", X.shape)
print("Shape of Y: ", y.shape)

In [None]:
#logistic Regression
lrModel, lrAccuracy, lrPrecision, lrRecall = classify('LogisticRegression', X, y)

In [None]:
#hard margin SVM 1

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovo'

args = ['rbf', 1000, 'ovo']

hmSVC1Model, hmSVC1Accuracy, hmSVC1Precision, hmSVC1Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 2

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovr'

args = ['rbf', 1000, 'ovr']

hmSVC2Model, hmSVC2Accuracy, hmSVC2Precision, hmSVC2Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 3

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovo'

args = ['linear', 1000, 'ovo']

hmSVC3Model, hmSVC3Accuracy, hmSVC3Precision, hmSVC3Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 4

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovr'

args = ['linear', 1000, 'ovr']

hmSVC4Model, hmSVC4Accuracy, hmSVC4Precision, hmSVC4Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 1

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['rbf', 0.001, 'ovo']

smSVC1Model, smSVC1Accuracy, smSVC1Precision, smSVC1Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 2

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['rbf', 0.001, 'ovr']

smSVC2Model, smSVC2Accuracy, smSVC2Precision, smSVC2Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 3

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['linear', 0.001, 'ovo']

smSVC3Model, smSVC3Accuracy, smSVC3Precision, smSVC3Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 4

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['linear', 0.001, 'ovr']

smSVC4Model, smSVC4Accuracy, smSVC4Precision, smSVC4Recall = classify('SVC', X, y, args)

In [None]:
#decision Tree 1

#max_features = 6
#criterion = 'gini'

args = [6, 'gini']

dt1Model, dt1Accuracy, dt1Precision, dt1Recall = classify('DecisionTreeClassifier', X, y, args)

In [None]:
#decision Tree 2

#max_features = 6
#criterion = 'entropy'

args = [6, 'entropy']

dt2Model, dt2Accuracy, dt2Precision, dt2Recall = classify('DecisionTreeClassifier', X, y, args)

4<br>
    trimCutoff = 5s <br>
    windowSize = 4s <br>
    overlapping = True <br>
    overlappingFactor = 0.5 <br>

In [34]:
#windowing parameters
trimCutoff = 5
windowSize = 4
overlapping = True
overlappingFactor = 0.5

In [36]:
#create data
o_df = createDataframe(windowSize, trimCutoff, overlapping, overlappingFactor)
o_X, o_y = createDataMatrix(o_df)

print("Shape of X: ", o_X.shape)
print("Shape of Y: ", o_y.shape)

Shape of X:  (2310, 6)
Shape of Y:  (2310,)




In [57]:
#logistic Regression
o_lrModel, o_lrAccuracy, o_lrPrecision, o_lrRecall = classify('LogisticRegression', o_X, o_y)


Best Accuracy:  0.8354978354978355


In [58]:
#hard margin SVM 1

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovo'

args = ['rbf', 1000, 'ovo']

o_hmSVC1Model, o_hmSVC1Accuracy, o_hmSVC1Precision, o_hmSVC1Recall = classify('SVC', o_X, o_y, args)


Best Accuracy:  0.961038961038961


In [60]:
#hard margin SVM 2

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovr'

args = ['rbf', 1000, 'ovr']

o_hmSVC2Model, o_hmSVC2Accuracy, o_hmSVC2Precision, o_hmSVC2Recall = classify('SVC', o_X, o_y, args)


Best Accuracy:  0.9523809523809523


In [61]:
#hard margin SVM 3

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovo'

args = ['linear', 1000, 'ovo']

o_hmSVC3Model, o_hmSVC3Accuracy, o_hmSVC3Precision, o_hmSVC3Recall = classify('SVC', o_X, o_y, args)


Best Accuracy:  0.7922077922077922


In [62]:
#hard margin SVM 4

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovr'

args = ['linear', 1000, 'ovr']

o_hmSVC4Model, o_hmSVC4Accuracy, o_hmSVC4Precision, o_hmSVC4Recall = classify('SVC', o_X, o_y, args)


Best Accuracy:  0.7922077922077922


In [63]:
#soft margin SVM 1

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['rbf', 0.001, 'ovo']

o_smSVC1Model, o_smSVC1Accuracy, o_smSVC1Precision, o_smSVC1Recall = classify('SVC', o_X, o_y, args)

  'precision', 'predicted', average, warn_for)



Best Accuracy:  0.33766233766233766


In [64]:
#soft margin SVM 2

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['rbf', 0.001, 'ovr']

o_smSVC2Model, o_smSVC2Accuracy, o_smSVC2Precision, o_smSVC2Recall = classify('SVC', o_X, o_y, args)

  'precision', 'predicted', average, warn_for)



Best Accuracy:  0.36796536796536794


In [65]:
#soft margin SVM 3

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['linear', 0.001, 'ovo']

o_smSVC3Model, o_smSVC3Accuracy, o_smSVC3Precision, o_smSVC3Recall = classify('SVC', o_X, o_y, args)

  'precision', 'predicted', average, warn_for)



Best Accuracy:  0.5757575757575758


In [66]:
#soft margin SVM 4

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['linear', 0.001, 'ovr']

o_smSVC4Model, o_smSVC4Accuracy, o_smSVC4Precision, o_smSVC4Recall = classify('SVC', o_X, o_y, args)

  'precision', 'predicted', average, warn_for)



Best Accuracy:  0.6103896103896104


In [67]:
#decision Tree 1

#max_features = 6
#criterion = 'gini'

args = [6, 'gini']

o_dt1Model, o_dt1Accuracy, o_dt1Precision, o_dt1Recall = classify('DecisionTreeClassifier', o_X, o_y, args)


Best Accuracy:  0.9523809523809523


In [68]:
#decision Tree 2

#max_features = 6
#criterion = 'entropy'

args = [6, 'entropy']

o_dt2Model, o_dt2Accuracy, o_dt2Precision, o_dt2Recall = classify('DecisionTreeClassifier', o_X, o_y, args)


Best Accuracy:  0.961038961038961


5 <br>
    trimCutoff = 5s <br>
    windowSize = 8s <br>
    overlapping = True <br>
    overlappingFactor = 0.5 <br>

In [None]:
#windowing parameters
trimCutoff = 5
windowSize = 8
overlapping = True
overlappingFactor = 0.5

In [None]:
#create data
df = createDataframe(windowSize, trimCutoff, overlapping, overlappingFactor)
X, y = createDataMatrix(df)

print("Shape of X: ", X.shape)
print("Shape of Y: ", y.shape)

In [None]:
#logistic Regression
lrModel, lrAccuracy, lrPrecision, lrRecall = classify('LogisticRegression', X, y)

In [None]:
#hard margin SVM 1

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovo'

args = ['rbf', 1000, 'ovo']

hmSVC1Model, hmSVC1Accuracy, hmSVC1Precision, hmSVC1Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 2

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovr'

args = ['rbf', 1000, 'ovr']

hmSVC2Model, hmSVC2Accuracy, hmSVC2Precision, hmSVC2Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 3

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovo'

args = ['linear', 1000, 'ovo']

hmSVC3Model, hmSVC3Accuracy, hmSVC3Precision, hmSVC3Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 4

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovr'

args = ['linear', 1000, 'ovr']

hmSVC4Model, hmSVC4Accuracy, hmSVC4Precision, hmSVC4Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 1

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['rbf', 0.001, 'ovo']

smSVC1Model, smSVC1Accuracy, smSVC1Precision, smSVC1Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 2

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['rbf', 0.001, 'ovr']

smSVC2Model, smSVC2Accuracy, smSVC2Precision, smSVC2Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 3

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['linear', 0.001, 'ovo']

smSVC3Model, smSVC3Accuracy, smSVC3Precision, smSVC3Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 4

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['linear', 0.001, 'ovr']

smSVC4Model, smSVC4Accuracy, smSVC4Precision, smSVC4Recall = classify('SVC', X, y, args)

In [None]:
#decision Tree 1

#max_features = 6
#criterion = 'gini'

args = [6, 'gini']

dt1Model, dt1Accuracy, dt1Precision, dt1Recall = classify('DecisionTreeClassifier', X, y, args)

In [None]:
#decision Tree 2

#max_features = 6
#criterion = 'entropy'

args = [6, 'entropy']

dt2Model, dt2Accuracy, dt2Precision, dt2Recall = classify('DecisionTreeClassifier', X, y, args)

6 <br>
    trimCutoff = 5s <br>
    windowSize = 4s <br>
    overlapping = True <br>
    overlappingFactor = 0.3 <br>

In [None]:
#windowing parameters
trimCutoff = 5
windowSize = 4
overlapping = True
overlappingFactor = 0.3

In [None]:
#create data
df = createDataframe(windowSize, trimCutoff, overlapping, overlappingFactor)
X, y = createDataMatrix(df)

print("Shape of X: ", X.shape)
print("Shape of Y: ", y.shape)

In [None]:
#logistic Regression
lrModel, lrAccuracy, lrPrecision, lrRecall = classify('LogisticRegression', X, y)

In [None]:
#hard margin SVM 1

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovo'

args = ['rbf', 1000, 'ovo']

hmSVC1Model, hmSVC1Accuracy, hmSVC1Precision, hmSVC1Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 2

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovr'

args = ['rbf', 1000, 'ovr']

hmSVC2Model, hmSVC2Accuracy, hmSVC2Precision, hmSVC2Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 3

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovo'

args = ['linear', 1000, 'ovo']

hmSVC3Model, hmSVC3Accuracy, hmSVC3Precision, hmSVC3Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 4

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovr'

args = ['linear', 1000, 'ovr']

hmSVC4Model, hmSVC4Accuracy, hmSVC4Precision, hmSVC4Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 1

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['rbf', 0.001, 'ovo']

smSVC1Model, smSVC1Accuracy, smSVC1Precision, smSVC1Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 2

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['rbf', 0.001, 'ovr']

smSVC2Model, smSVC2Accuracy, smSVC2Precision, smSVC2Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 3

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['linear', 0.001, 'ovo']

smSVC3Model, smSVC3Accuracy, smSVC3Precision, smSVC3Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 4

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['linear', 0.001, 'ovr']

smSVC4Model, smSVC4Accuracy, smSVC4Precision, smSVC4Recall = classify('SVC', X, y, args)

In [None]:
#decision Tree 1

#max_features = 6
#criterion = 'gini'

args = [6, 'gini']

dt1Model, dt1Accuracy, dt1Precision, dt1Recall = classify('DecisionTreeClassifier', X, y, args)

In [None]:
#decision Tree 2

#max_features = 6
#criterion = 'entropy'

args = [6, 'entropy']

dt2Model, dt2Accuracy, dt2Precision, dt2Recall = classify('DecisionTreeClassifier', X, y, args)

7<br>
    trimCutoff = 5s <br>
    windowSize = 8s <br>
    overlapping = True <br>
    overlappingFactor = 0.3 <br>

In [None]:
#windowing parameters
trimCutoff = 5
windowSize = 8
overlapping = True
overlappingFactor = 0.3

In [None]:
#create data
df = createDataframe(windowSize, trimCutoff, overlapping, overlappingFactor)
X, y = createDataMatrix(df)

print("Shape of X: ", X.shape)
print("Shape of Y: ", y.shape)

In [None]:
#logistic Regression
lrModel, lrAccuracy, lrPrecision, lrRecall = classify('LogisticRegression', X, y)

In [None]:
#hard margin SVM 1

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovo'

args = ['rbf', 1000, 'ovo']

hmSVC1Model, hmSVC1Accuracy, hmSVC1Precision, hmSVC1Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 2

#kernel = 'rbf'
#C = 1000
#decision_function_shape = 'ovr'

args = ['rbf', 1000, 'ovr']

hmSVC2Model, hmSVC2Accuracy, hmSVC2Precision, hmSVC2Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 3

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovo'

args = ['linear', 1000, 'ovo']

hmSVC3Model, hmSVC3Accuracy, hmSVC3Precision, hmSVC3Recall = classify('SVC', X, y, args)

In [None]:
#hard margin SVM 4

#kernel = 'linear'
#C = 1000
#decision_function_shape = 'ovr'

args = ['linear', 1000, 'ovr']

hmSVC4Model, hmSVC4Accuracy, hmSVC4Precision, hmSVC4Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 1

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['rbf', 0.001, 'ovo']

smSVC1Model, smSVC1Accuracy, smSVC1Precision, smSVC1Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 2

#kernel = 'rbf'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['rbf', 0.001, 'ovr']

smSVC2Model, smSVC2Accuracy, smSVC2Precision, smSVC2Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 3

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovo'

args = ['linear', 0.001, 'ovo']

smSVC3Model, smSVC3Accuracy, smSVC3Precision, smSVC3Recall = classify('SVC', X, y, args)

In [None]:
#soft margin SVM 4

#kernel = 'linear'
#C = 0.001
#decision_function_shape = 'ovr'

args = ['linear', 0.001, 'ovr']

smSVC4Model, smSVC4Accuracy, smSVC4Precision, smSVC4Recall = classify('SVC', X, y, args)

In [None]:
#decision Tree 1

#max_features = 6
#criterion = 'gini'

args = [6, 'gini']

dt1Model, dt1Accuracy, dt1Precision, dt1Recall = classify('DecisionTreeClassifier', X, y, args)

In [None]:
#decision Tree 2

#max_features = 6
#criterion = 'entropy'

args = [6, 'entropy']

dt2Model, dt2Accuracy, dt2Precision, dt2Recall = classify('DecisionTreeClassifier', X, y, args)