# Data Aquisition

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit

In [2]:
#Loading in the training data
dfLean = pd.read_csv("Data with 3 sitting positions\lean forward.csv", sep=';')
dfNeutral = pd.read_csv("Data with 3 sitting positions\Neutral.csv", sep=';')
dfCrossed = pd.read_csv("Data with 3 sitting positions\Right leg crossed.csv", sep=';')

# Feature extraction

In [3]:
def slidingWindowTrain(data, windowSize):
    #Extract features from the raw data through sliding window
    #Stepsize is set to half the windowsize as a default
    dfOutput = pd.DataFrame(columns=['Sensor1','Sensor2', 'Sensor3', 'Sensor4', 'Sensor5', 'Label'])
    for i in range(len(data)/windowSize):
        lowLimit = int(i*0.5*windowSize)
        highLimit = int(windowSize+windowSize*i*0.5)
        dfTemp = data[lowLimit:highLimit]
        
        dfTemp2 = pd.DataFrame({'Sensor1': [dfTemp['Sensor1'].mean()],
                               'Sensor2': [dfTemp['Sensor2'].mean()],
                               'Sensor3': [dfTemp['Sensor3'].mean()],
                               'Sensor4': [dfTemp['Sensor4'].mean()],
                               'Sensor5': [dfTemp['Sensor5'].mean()],
                               'Label': [dfTemp['Label'].mode()[0]]})
        
        dfOutput = dfOutput.append(dfTemp2)
        
        
    return dfOutput
        
        
        
    

In [4]:
dfNeutral = dfNeutral.rename(columns={' Sensor5': 'Sensor5'})
dfWindowNeutral = slidingWindowTrain(dfNeutral, 5)

In [5]:
dfCrossed = dfCrossed.rename(columns={' Sensor5': 'Sensor5'})
dfWindowCrossed = slidingWindowTrain(dfCrossed, 5)

In [6]:
dfLean = dfLean.rename(columns={' Sensor5': 'Sensor5'})
dfWindowLean = slidingWindowTrain(dfLean, 5)

In [7]:
dfWindow = dfWindowLean.append(dfWindowCrossed)
dfWindow = dfWindow.append(dfWindowNeutral)

In [8]:
dfWindow['Label'].value_counts()

Right leg crossed    602
Lean forward         602
Neutral              601
Name: Label, dtype: int64

In [9]:
#Creates target for the machine learning model
y = dfWindow['Label']

#Excluding the training data
X = dfWindow.drop('Label', axis=1)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=37)

# Building a ML model


In [11]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier


classifier = RandomForestClassifier(n_estimators = 100)
classifier = classifier.fit(X_train, y_train)

In [12]:
predict = classifier.predict(X_test)

In [13]:
confusion_matrix(y_test, predict)

array([[298,   0,   0],
       [  0, 303,   0],
       [  0,   0, 302]])

# Predicting new data

In [14]:
#Create a new feature extraction function
def slidingWindowTest(data, windowSize):
    #Extract features from the raw data through sliding window
    #Stepsize is set to half the windowsize as a default
    dfOutput = pd.DataFrame(columns=['Sensor1','Sensor2', 'Sensor3', 'Sensor4', 'Sensor5'])
    for i in range(len(data)/windowSize):
        lowLimit = int(i*0.5*windowSize)
        highLimit = int(windowSize+windowSize*i*0.5)
        dfTemp = data[lowLimit:highLimit]
        
        dfTemp2 = pd.DataFrame({'Sensor1': [dfTemp['Sensor1'].mean()],
                               'Sensor2': [dfTemp['Sensor2'].mean()],
                               'Sensor3': [dfTemp['Sensor3'].mean()],
                               'Sensor4': [dfTemp['Sensor4'].mean()],
                               'Sensor5': [dfTemp['Sensor5'].mean()]})
        
        dfOutput = dfOutput.append(dfTemp2)
        
        
    return dfOutput

In [15]:
#loading new data
dfNew = pd.read_csv("", sep=';')

#Extracting features
dfNew = slidingWindowTest(dfNew, 5)

IOError: File  does not exist