In [12]:
import os

import pandas as pd
import numpy as np

In [37]:
feature_vector_size = 1395 // 5
columns = list(range(0, feature_vector_size))
columns.append('subject')
columns.append('activity')

In [38]:
def feature_extraction(data: pd.DataFrame) -> pd.Series:
    def nlargest_index(df, n):
        return df.nlargest(n).index.unique()[0:n]

    # first 225 statistical features
    statistical = data.min()
    statistical = statistical.append(data.max(), ignore_index=True)
    statistical = statistical.append(data.mean(), ignore_index=True)
    statistical = statistical.append(data.skew(), ignore_index=True)
    statistical = statistical.append(data.kurtosis(), ignore_index=True)

    # FFT features
    fft = pd.DataFrame(np.fft.fft(data))
    fft_angle = fft.applymap(np.angle)
    fft = fft.applymap(np.abs)
    largest_values = pd.Series()
    largest_angles = pd.Series()
    largest_indices = pd.Series()
    for i in range(0, 9):
        five_largest_idx = nlargest_index(fft.ix[:, i].map(abs), 5)  # is map(abs) redundant?
        largest_indices = largest_indices.append(pd.Series(five_largest_idx),
                                                 ignore_index=True)
        five_largest = fft_angle.ix[five_largest_idx, i].T
        largest_angles = largest_angles.append(five_largest)
        five_largest = fft.ix[five_largest_idx, i].T
        largest_values = largest_values.append(five_largest)

    # Autocorrelation
    autocorrelation = pd.Series()
    autocorrelation = autocorrelation.append(data.apply(lambda col: col.autocorr(1), axis=0))
    for i in range(5, 51, 5):
        autocorrelation = autocorrelation.append(data.apply(lambda col: col.autocorr(i), axis=0))

    # Make result
    feature_vector = pd.Series()
    feature_vector = feature_vector.append(statistical)
    feature_vector = feature_vector.append(largest_values)
    feature_vector = feature_vector.append(largest_angles)
    feature_vector = feature_vector.append(largest_indices)
    feature_vector = feature_vector.append(autocorrelation)
    return feature_vector

In [64]:
def load_instance(path=os.sep.join(['.', 'data', 'a01', 'p1', 's01.txt'])):
        sensors_data = pd.read_csv(path, header=None)
        sensors_data = sensors_data.iloc[:, 0:9]  # T
        # sensors_data = sensors_data.iloc[:, 9:18]  # RA
        # sensors_data = sensors_data.iloc[:, 18:27]  # LA
        # sensors_data = sensors_data.iloc[:, 27:36]  # RL
        # sensors_data = sensors_data.iloc[:, 36:45]  # LL
        # sensors_data = sensors_data.iloc[:, 18:36]  # LA + RL
        sensors_data = feature_extraction(sensors_data)
        sensors_data.set_value(feature_vector_size, 'p?')
        sensors_data.set_value(feature_vector_size + 1, 'a??')
        sensors_data = sensors_data.to_frame().T
        sensors_data.columns = columns
        return pd.DataFrame().append(sensors_data)

In [65]:
instance = load_instance()

In [66]:
instance.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,271,272,273,274,275,276,277,278,subject,activity
0,7.6823,0.99744,5.3169,-0.039399,-0.080639,-0.030754,-0.79554,-0.071582,0.10956,8.1605,...,0.362237,-0.493389,-0.108582,-0.190732,0.135185,-0.67951,-0.746612,-0.829863,p?,a??


In [44]:
data = pd.read_pickle(os.sep.join(['.', 'statistical_feature_extraction', 'sample.pkl']))
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1387,1388,1389,1390,1391,1392,1393,1394,subject,activity
0,7.6823,0.99744,5.3169,-0.039399,-0.080639,-0.030754,-0.79554,-0.071582,0.10956,0.58542,...,0.0361526,0.196974,-0.00147586,0.1165,-0.267331,-0.0298675,-0.0931819,-0.0229622,p1,a01
0,7.8472,1.0581,5.1551,-0.033077,-0.041961,-0.02365,-0.79471,-0.072472,0.12376,0.53064,...,-0.116458,-0.169632,-0.0521531,-0.00309607,-0.0368345,-0.126062,0.0267694,0.01302,p1,a01
0,7.847,1.0796,5.4232,-0.029334,-0.035494,-0.019995,-0.79534,-0.072702,0.12447,0.53546,...,0.142753,0.0919386,0.182269,-0.301815,-0.000606122,0.0262859,-0.0188468,-0.0476569,p1,a01
0,7.6901,1.0787,5.3787,-0.038487,-0.035029,-0.03662,-0.79577,-0.076761,0.11873,0.51524,...,-0.0518174,-0.0965734,-0.0530901,0.0133725,0.0469105,0.0664641,-0.0400787,0.165778,p1,a01
0,7.8322,1.0945,5.4544,-0.044333,-0.067467,-0.02282,-0.79592,-0.07504,0.12086,0.50535,...,-0.0741882,-0.0220788,0.072601,-0.0208807,-0.14066,0.15003,-0.0200462,-0.143992,p1,a01


In [70]:
data2 = data[(data.subject == 'p1') & (data.activity == 'a01')]
data2 = data2[data2[0] == 7.6823]
data2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1387,1388,1389,1390,1391,1392,1393,1394,subject,activity
0,7.6823,0.99744,5.3169,-0.039399,-0.080639,-0.030754,-0.79554,-0.071582,0.10956,0.58542,...,0.0361526,0.196974,-0.00147586,0.1165,-0.267331,-0.0298675,-0.0931819,-0.0229622,p1,a01


In [78]:
RIGHT_ARM = 0
LEFT_ARM = 1
RIGHT_LEG = 2
TORSO = 3
LEFT_LEG = 4

ACCELEROMETER = 0
GYROSCOOPE = 1
MAGNETOMETER = 2

# dimensions: x, y, z

def select_data(
    units = [TORSO, RIGHT_ARM, LEFT_ARM, RIGHT_LEG, LEFT_LEG],
    sensors = [ACCELEROMETER, GYROSCOOPE, MAGNETOMETER]
):
    cols = list()
    sensor_count = 5 * 3 * 3
    feature_count = 31
    for offset in range(0, feature_count):
        offset *= sensor_count
        cols.append(offset)

cols = select_data()
data2[cols]

Unnamed: 0,0,45,90,135,180,225,270,315,360,405,...,945,990,1035,1080,1125,1170,1215,1260,1305,1350
0,7.6823,8.1605,7.97571,-0.0236032,-1.01148,22.8035,8.32262,38.8842,38.8842,8.32262,...,0.867044,0.80327,0.68772,0.550995,0.37507,0.14234,-0.123724,-0.353796,-0.549117,-0.640203


0    8.1605
Name: 9, dtype: object