** Some imports **

In [1]:
from scipy import signal
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn
%matplotlib inline

** Load Data ** 

In [2]:
names = ['ind', 'ax', 'ay', 'az', 'label']
acc_names = ['ax', 'ay', 'az']
label2str = {1:'Working at Computer', 2:'Standing Up, Walking and Going up-down stairs', 
             3:'Standing', 4:'Walking',5:'Going Up\Down Stairs', 6:'Walking and Talking with Someone', 
             7:'Talking while Standing'}
df = pd.read_csv("Dataset/1.csv", sep=',', names=names)
del df['ind']
df = df[df.label != 0]
df['label_str'] = df.label.apply(lambda x:label2str[x])

** Add signals ** 

In [3]:
# Magnitude
df['mag'] = np.sqrt(np.square(df[acc_names]).sum(axis=1)) 
# Median filter - 3rd ordre
def med_fil(df, names):
    df_r = pd.DataFrame()
    df = df[names]
    for column in df.columns:
        name = column+'_mf'
        df_r[name] = signal.medfilt(df[column].values)
    return df_r
df_med = med_fil(df, acc_names)
# Diffrential
def diffrential(df, names):
    df = df[names]
    df_r = df.diff(periods=1, axis=0).fillna(method='backfill')
    df_r.columns = [names[0]+'_d', names[1]+'_d', names[2]+'_d']
    return df_r
df_diff = diffrential(df, acc_names)
# Low pass filter
def lowpass(df, names):
    df = df[names]
    df_r = pd.DataFrame()
    fs = 52 # frequence sampling is 52
    f_cut = 1 # cutoff frequency
    fs_n = f_cut*2.0/fs # normalized frequency
    b,a = signal.butter(N=3, Wn=fs_n, btype='low')
    for column in df.columns : 
        name = column+'_lp'
        df_r[name] = signal.lfilter(b,a,df[column].values)
    return df_r
df_lp = lowpass(df, acc_names)  
# High pass filter 
def highpass(df, names):
    df = df[names]
    df_r = pd.DataFrame()
    fs = 52 # frequence sampling is 52
    f_cut = 1 # cutoff frequency
    fs_n = f_cut*2.0/fs # normalized frequency
    b,a = signal.butter(N=3, Wn=fs_n, btype='high')
    for column in df.columns : 
        name = column+'_hp'
        df_r[name] = signal.lfilter(b,a,df[column].values)
    return df_r
df_hp = highpass(df, acc_names)

# Total
df = pd.concat([df, df_med, df_diff, df_lp, df_hp], axis=1)
df.head()

Unnamed: 0,ax,ay,az,label,label_str,mag,ax_mf,ay_mf,az_mf,ax_d,ay_d,az_d,ax_lp,ay_lp,az_lp,ax_hp,ay_hp,az_hp
0,1502,2215,2153,1,Working at Computer,3434.768988,1502.0,2072.0,2047.0,165.0,-143.0,-106.0,0.29457,0.434402,0.422243,1330.950456,1962.753168,1907.813802
1,1667,2072,2047,1,Working at Computer,3355.932359,1611.0,2072.0,2047.0,165.0,-143.0,-106.0,2.023204,2.907854,2.832932,1155.714581,1362.002403,1353.118154
2,1611,1957,1906,1,Working at Computer,3171.435952,1611.0,1957.0,1906.0,-56.0,-115.0,-141.0,7.011663,9.821684,9.584772,789.314495,875.625099,847.401747
3,1601,1939,1831,1,Working at Computer,3110.543843,1611.0,1957.0,1879.0,-10.0,-18.0,-75.0,16.960462,23.21797,22.667546,517.382527,551.056967,481.033069
4,1643,1965,1879,1,Working at Computer,3176.683018,1604.0,1959.0,1879.0,42.0,26.0,48.0,33.066246,44.414894,43.325413,330.972495,313.877533,283.050573


In [4]:
# names = ['ax','ay','az','mag','ax_mf','ay_mf','az_mf','ax_lp','ay_lp','az_lp', 'ax_hp','ay_hp','az_hp']


def extract_variables(df):
#     def rms(df, names):
#         # TODO
#         pass
#     def minmax(df, names=names):
#         res = []
#         for column in df.columns:   
#             if column in names:
#                 i_min = df[column].argmin()
#                 i_max = df[column].argmax()
#                 i1 = min(i_min, i_max)
#                 i2 = max(i_min, i_max)
#                 m = df[column].values[i1:i2].mean()
#                 res.append(m)
#         return res
        
    m = df.mean(axis=0).values
    ma = df.mad(axis=0).values
    std = df.std(axis=0).values
    minimum = df.min(axis=0).values
    maximum = df.max(axis=0).values
    skew = df.skew(axis=0).values
    kurt = df.kurtosis(axis=0).values
#     minmax = minmax(df, names) 
#     rms = rms(df, names) # TODO
    inteQ = (df.quantile(q=0.75) - df.quantile(q=0.25)).values
    r = np.hstack([m, ma, std, minimum, maximum, skew, kurt, inteQ])    
    return r

### Windowing

In [5]:
def windowing(signal,size,step):
    d = len(signal) #length of the signal
    nk = int(np.floor((d-size+1)/step))+1 #le nombre de fenetres
    wk = np.zeros((nk,size)) #windows
    for j in range(nk):
        wk[j,:] = signal[j*step:j*step+size]
    return wk

In [6]:
def window_labels(labels,size,step):
    d = len(labels) #length of the signal
    nk = int(np.floor((d-size+1)/step))+1 #le nombre de fenetres
    labelwk = np.zeros((nk)) #window labels
    for j in range(nk):
        labelwk[j] = np.max(np.argmax(np.bincount(labels[j*step:j*step+size])))
    return labelwk

In [7]:
def extract_windows(df,size,step):
    
    """
    extract windows with the specified size and step from the dataframe df
    
    Returns:
    L : List of dataframes. Each dataframe contains a window extracted from each signal in df.
    labels: labels of windows
    """
    
    L = []
    n = df.shape[0]
    L_windows = dict()
    n_windows = int(np.floor((n-size+1)/step))+1
    for column in df.columns:
        if column not in ['label','label_str']:
            L_windows[column] = windowing(df[column],size,step)
    for i in range(n_windows):
        ddf = pd.DataFrame()
        for column in df.columns:
            if column not in ['label','label_str']:
                ddf[column] = L_windows[column][i,:]
        L.append(ddf)
    labels = window_labels(df['label'],size,step)
    
    return L,labels

In [8]:
Nbr_samples = 52
percentage = 0.5 # Between 0 & 1
df_X, df_Y = extract_windows(df,Nbr_samples,int(percentage*Nbr_samples))

** Extract Matrix of Features ** 

In [9]:
X = extract_variables(df_X[0])
for i in range(1,len(df_X)):
    vec = extract_variables(df_X[i])
    X = np.vstack([X,vec])
    
y = np.array(df_Y) 



** HMM **

In [47]:
from hmmlearn.hmm import GMMHMM

In [55]:
hmm = GMMHMM(n_components=7,n_iter=10,covariance_type='diag')



In [56]:
hmm.fit(X)



GMMHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
    init_params='stmcw', n_components=7, n_iter=10, n_mix=1,
    params='stmcw', random_state=None, startprob_prior=1.0, tol=0.01,
    transmat_prior=1.0, verbose=False)

In [57]:
from sklearn.metrics import accuracy_score
accuracy_score(y,hmm.predict(X))



0.0