In [9]:
import csv
import numpy as np
import random
from sklearn import svm, linear_model, neighbors
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

## Get Data from CSV
def process_csv(file):
    headers = []
    data =  []
    csv_data = csv.reader(open(file))
    next(csv_data)
    for i, row in enumerate(csv_data):
      if i == 0:
        headers = row
       # continue;
      field = []
      for i in range(len(headers)):
        field.append(float(row[i]))
      data.append(field)
    return data
    
## Take mean of each column
def mean(A):
    ten = np.asarray(A)
    avg = []
    for i in range(len(ten[0])):
        avg.append(sum(ten[:,i])/len(ten))
    return avg
    

#Take standard deviation of each column
def std(A):
    ten = np.asarray(A)
    avg = []
    for i in range(len(ten[0])):
        avg.append(np.std(ten[:,i]))
    return avg


#Extract features and randomly split data into test and train for each activity.
#This method handles data collected in .5 second intervals
def getFeat20(data):

    features = []
    for j in range(30):
        t = j * 20
        tempTen = []
        for i in range(t, t+20):
            tempTen.append(data[i])
        m = mean(tempTen)
        n = std(tempTen)
        n.pop(0)
        f = m + n
        features.append(f)

    random.shuffle(features)

    train_data = features[0:24]
    test_data = features[24:30]

    return train_data, test_data 

#Extract features and randomly split data into test and train for each activity.
#This method handles data collected in .1 second intervals
def getFeat100(data):

    features = []
    for j in range(30):
        t = j * 100
        tempTen = []
        for i in range(t, t+100):
            tempTen.append(data[i])
        m = mean(tempTen)
        n = std(tempTen)
        n.pop(0)
        f = m + n
        features.append(f)

    random.shuffle(features)

    train_data = features[0:24]
    test_data = features[24:30]

    return train_data, test_data 

## Runs training and test sets through learning algorithms
def MLrun(drive, rest, stair, run, walk):


    trd,tsd  = getFeat100(drive)
    trr,tsr  = getFeat100(rest)
    trs,tss  = getFeat20(stair)
    trrun,tsrun = getFeat100(run)
    trw,tsw  = getFeat100(walk)


    train = trd + trr + trs + trrun + trw

    random.shuffle(train)

    test = tsd + tsr + tss + tsrun + tsw

    random.shuffle(test)

    train = np.asarray(train)
    test = np.asarray(test)

    trainX = train[:,1:12]

    trainY = train[:,0]

    testX = test[:,1:12]

    testY = test[:,0]


    ############ Support Vector Machine ############
    SVM1 = svm.SVC(kernel='linear')
    SVM1.fit(trainX, trainY)

    svmP = SVM1.predict(testX)

    score = SVM1.score(testX, testY) * 100

    ############ Decision Tree ############
    DT = DecisionTreeClassifier()
    DT.fit(trainX, trainY)

    dtP = DT.predict(testX)

    score2 = DT.score(testX, testY) * 100

    ############ Logistic Regression ############
    log = linear_model.LogisticRegression()
    log.fit(trainX, trainY)
    logP = log.predict(testX)

    score3 = log.score(testX, testY) * 100

    ############ K-Nearest Neighbors ############
    KNN = neighbors.KNeighborsClassifier()
    KNN.fit(trainX, trainY)
    knnP = KNN.predict(testX)

    score4 = KNN.score(testX, testY) * 100


    return score, score2, score3, score4


############### Run Experiment ###############

stair1 = "Stairs.csv"
rest1 = "Rest10.csv"
run1 = "Run.csv"
drive1 = "Drive10.csv"
walk1 = "Walk.csv"


stair = process_csv(stair1)
rest = process_csv(rest1)
run = process_csv(run1)
drive = process_csv(drive1)
walk = process_csv(walk1)


totSVM = 0
totDT = 0
totLOG = 0
totKNN = 0

# iterations
N = 100

for i in range(N):

    SVM, DT, LOG, KNN = MLrun(drive, rest, stair, run, walk)    
    totSVM += SVM
    totDT += DT
    totLOG += LOG
    totKNN += KNN

avgSVM = totSVM/N
avgDT = totDT/N
avgLOG = totLOG/N
avgKNN = totKNN/N
 
print "SVM:",avgSVM

print "Decision Tree:",avgDT

print "Logistic Regression:",avgLOG

print "K-Nearest Neighbors:",avgKNN

SVM: 97.6333333333
Decision Tree: 95.1666666667
Logistic Regression: 96.1666666667
K-Nearest Neighbors: 89.6
