# Using the Fast Fourier Transform and Support Vector Machines to classify activity data

In [None]:
%matplotlib inline
import matplotlib
matplotlib.rcParams.update({'figure.figsize': (12.0, 8.0)})

In [None]:
from pylab import *
from scipy.fft import fft
from sklearn import svm
import numpy as np

## We have logged some sensor data for three different activities - let's see whether we can use it to classify activities

In [None]:
# remove the trailing empty line from all csv files before importing them!
# you can also get the list of files via `glob.glob("./stand_*.csv")`
stand_csv = ["./stand_1.csv", "./stand_2.csv", "./stand_3.csv", "./stand_4.csv"]
walk_csv = ["./walk_1.csv", "./walk_2.csv", "./walk_3.csv", "./walk_4.csv"]
hop_csv = ["./hop1.csv", "./hop2.csv", "./hop3.csv", "./hop4.csv"]

In [None]:
# in practice, you would do this with csv.Reader or pandas
def read_data(filename):
    x = []
    y = []
    z = []
    avg = []
    for line in open(filename, "r").readlines():
        _x, _y, _z = map(int,line.strip().split(","))
        x.append(_x)
        y.append(_y)
        z.append(_z)
        avg.append((_x+_y+_z)/3)
    return avg

In [None]:
stand_raw = [read_data(f) for f in stand_csv]
walk_raw  = [read_data(f) for f in walk_csv]
hop_raw  = [read_data(f) for f in hop_csv]

In [None]:
# cut off 
all = stand_raw + walk_raw + hop_raw
minlen = min([len(x) for x in all])
print("Cutting off after", minlen , "samples")
stand_cut = [l[:minlen] for l in stand_raw]
walk_cut = [l[:minlen] for l in walk_raw]
hop_cut = [l[:minlen] for l in hop_raw]

In [None]:
print(stand_cut[0])

## Naive implementation: just throw data at the classifier

In [None]:
c = svm.SVC()

In [None]:
# we will use the first example of each class as test data and the rest as training data
STAND = 0
WALK = 1
HOP = 2
categories = [STAND] * 3 + [WALK] * 3 + [HOP] * 3
training_data = stand_cut[1:] + walk_cut[1:] + hop_cut[1:]
c.fit(training_data,categories)

In [None]:
c.predict([stand_cut[0], walk_cut[0], hop_cut[0]]) # should be [0, 1, 2]

In [None]:
# we will use the first example of each class as test data and the rest as training data
STAND = 0
WALK = 1
HOP = 2
categories = [STAND] * 3 + [WALK] * 3 + [HOP] * 3
training_data = stand_cut[1:] + walk_cut[1:] + hop_cut[1:]
c.fit(training_data,categories)

## Let's try a neural network!

In [None]:
from sklearn.neural_network import MLPClassifier
mlc = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)

mlc.fit(training_data, categories)
mlc.predict([stand_cut[0], walk_cut[0], hop_cut[0]])

General principle: "garbage in - garbage out".

While sufficiently large neural networks may find patterns, you would need large training data sets.


## Try No. 2: Let's help the classifier...

### First step: take a look at the data

In [None]:
for example in stand_cut:
    plot(example, c='r')

In [None]:
for example in walk_cut:
    plot(example, c='g')

In [None]:
for example in hop_cut:
    plot(example, c='b')

In [None]:
plot(stand_cut[0], c='r')
plot(walk_cut[0], c='b')
plot(hop_cut[0], c='g')

### Preprocessing Raw Data using the FFT

In [None]:
stand_freq = [np.abs(fft(l)/len(l))[1:len(l)//2] for l in stand_cut]
walk_freq = [np.abs(fft(l)/len(l))[1:len(l)//2] for l in walk_cut]
hop_freq = [np.abs(fft(l)/len(l))[1:len(l)//2] for l in hop_cut]

In [None]:
for x in range(4):
    plot(range(len(hop_freq[x])), hop_freq[x])

In [None]:
for x in range(4):
    plot(range(len(walk_freq[x])), walk_freq[x])

In [None]:
for x in range(4):
    plot(range(len(stand_freq[x])), stand_freq[x])

### Train an SVM classifier

In [None]:
c = svm.SVC()

In [None]:
STAND = 0
WALK = 1
HOP = 2
categories = [STAND] * 3 + [WALK] * 3 + [HOP] * 3
training_data = stand_freq[1:] + walk_freq[1:] + hop_freq[1:]
c.fit(training_data,categories)

In [None]:
c.predict([stand_freq[0], walk_freq[0], hop_freq[0]])

## Yay!

(Bonus question: why not just use the mean value of each example for classification)