## Import libraries

In [1]:
import numpy as np
import pandas as pd
import scipy as sc
import sys
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

sys.path.append('./source/')

from data_processing import *
from feature_extraction import *
from scoring import *

## Read data

In [2]:
data_uschad = pd.read_table("../data/USC-HAD/USC-HAD_cleared.txt", delimiter=',', header=None)
data_uschad.columns = ['id_user', 'activity', 'timestamp', 'x', 'y', 'z']

data_wisdm = pd.read_table("../data/WISDM/WISDM_ar_v1.1_raw_cleared.txt", delimiter=',', header=None)
data_wisdm.columns = ['id_user', 'activity', 'timestamp', 'x', 'y', 'z']

## Feature extraction

### Expert functions

The idea is the following: we will consider 10 seconds time series (or 200 points of measurements) and calculate 40 features:
* ```[3]``` - mean acceleration of each axis;
* ```[3]``` - std of acceleration of each axis;
* ```[3]``` - mean absolute deviation of acceleration of each axis;
* ```[1]``` - mean acceleration;
* ```[30]``` - distribution of time series values of each axis. First of all we calculate min and max of each component ($X, Y, Z$) from the whole interval. Then we divide the range of values of each component into 10 equal intervals and calculate on each each interval the percent of values that are in it (in the corresponding interval).  

And apply LogisticRegression, SVM and Random Forest.

Example:

In [None]:
df_expert_wisdm = get_feature_matrix(data_wisdm, 'WISDM', 
                                     get_expert_names, get_expert_features)
get_distribution(data_wisdm, df_expert_wisdm)

Standing            229      5.30  %
Walking             1917     44.36 %
Upstairs            466      10.78 %
Sitting             277      6.41  %
Jogging             1075     24.88 %
Downstairs          357      8.26  %

Number of objects: 4321


In [None]:
df_expert_uschad = get_feature_matrix(data_uschad, 'USCHAD', 
                                      get_expert_names, get_expert_features)
get_distribution(data_uschad, df_expert_uschad)

### Autoregression model

Example:

In [None]:
params = list(10)
n = params[0]

df_ar_wisdm = get_feature_matrix(data_wisdm, 'WISDM', get_autoregressive_names,
                                 get_autoregressive_features, params)
get_distribution(data_wisdm, df_ar_wisdm)

In [None]:
df_ar_uschad = get_feature_matrix(data_uschad, 'USCHAD', get_autoregressive_names,
                                  get_autoregressive_features, params)
get_distribution(data_uschad, df_ar_uschad)

### Spectrum analysis

Example:

In [None]:
params = list(10)
n = params[0]

df_ssa_wisdm = get_feature_matrix(data_wisdm, 'WISDM', get_spectrum_names,
                                  get_spectrum_features, params)
get_distribution(data_wisdm, df_ssa_wisdm)

In [None]:
df_ssa_uschad = get_feature_matrix(data_uschad, 'USCHAD', get_spectrum_names,
                                   get_spectrum_features, params)
get_distribution(data_uschad, df_ssa_uschad)

## Testing part 

In [None]:
parameters = {'penalty': ['l1', 'l2'], 
              'class_weight': ['balanced', None], 
              'C': 10. ** np.arange(0, 4, 1)}

scores_wisdm = {}
scores_uschad = {}

**Expert** features:

In [None]:
scores_wisdm['lr_expert'] = get_score(df_expert_wisdm, LogisticRegression(), parameters)
scores_uschad['lr_expert'] = get_score(df_expert_uschad, LogisticRegression(), parameters)

From **autoregression model** features:

In [None]:
scores_wisdm['lr_ar_' + str(n)] = get_score(df_ar_wisdm, LogisticRegression(), parameters)
scores_uschad['lr_ar_' + str(n)] = get_score(df_ar_uschad, LogisticRegression(), parameters)

From **spectrum analysis** features:

In [None]:
scores_wisdm['lr_ssa_' + str(n)] = get_score(df_ssa_wisdm, LogisticRegression(), parameters)
scores_uschad['lr_ssa_' + str(n)] = get_score(df_ssa_uschad, LogisticRegression(), parameters)

From **splines** features:

In [None]:
scores_wisdm['splines' + str(n)] = get_score(df_ssa_wisdm, LogisticRegression(), parameters)
scores_uschad['splines' + str(n)] = get_score(df_ssa_uschad, LogisticRegression(), parameters)

## Results 

In [None]:
results_wisdm = pd.DataFrame.from_dict(scores_wisdm, orient='index')
results_wisdm.columns = ['all'] + list(set(data_wisdm['activity']))

results_uschad = pd.DataFrame.from_dict(scores_uschad, orient='index')
results_uschad.columns = ['all'] + list(set(data_uschad['activity']))

In [None]:
results_wisdm

In [None]:
results_uschad