** Some imports **

In [16]:
from scipy import signal
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn
%matplotlib inline

** Load Data ** 

In [17]:
names = ['ind', 'ax', 'ay', 'az', 'label']
acc_names = ['ax', 'ay', 'az']
label2str = {1:'Working at Computer', 2:'Standing Up, Walking and Going up-down stairs', 
             3:'Standing', 4:'Walking',5:'Going Up\Down Stairs', 6:'Walking and Talking with Someone', 
             7:'Talking while Standing'}
df = pd.read_csv("Dataset/1.csv", sep=',', names=names)
del df['ind']
df = df[df.label != 0]
df['label_str'] = df.label.apply(lambda x:label2str[x])

** Add signals ** 

In [18]:
# Magnitude
df['mag'] = np.sqrt(np.square(df[acc_names]).sum(axis=1)) 
# Median filter - 3rd ordre
def med_fil(df, names):
    df_r = pd.DataFrame()
    df = df[names]
    for column in df.columns:
        name = column+'_mf'
        df_r[name] = signal.medfilt(df[column].values)
    return df_r
df_med = med_fil(df, acc_names)
# Diffrential
def diffrential(df, names):
    df = df[names]
    df_r = df.diff(periods=1, axis=0).fillna(method='backfill')
    df_r.columns = [names[0]+'_d', names[1]+'_d', names[2]+'_d']
    return df_r
df_diff = diffrential(df, acc_names)
# Low pass filter
def lowpass(df, names):
    df = df[names]
    df_r = pd.DataFrame()
    fs = 52 # frequence sampling is 52
    f_cut = 1 # cutoff frequency
    fs_n = f_cut*2.0/fs # normalized frequency
    b,a = signal.butter(N=3, Wn=fs_n, btype='low')
    for column in df.columns : 
        name = column+'_lp'
        df_r[name] = signal.lfilter(b,a,df[column].values)
    return df_r
df_lp = lowpass(df, acc_names)  
# High pass filter 
def highpass(df, names):
    df = df[names]
    df_r = pd.DataFrame()
    fs = 52 # frequence sampling is 52
    f_cut = 1 # cutoff frequency
    fs_n = f_cut*2.0/fs # normalized frequency
    b,a = signal.butter(N=3, Wn=fs_n, btype='high')
    for column in df.columns : 
        name = column+'_hp'
        df_r[name] = signal.lfilter(b,a,df[column].values)
    return df_r
df_hp = highpass(df, acc_names)

# Total
df = pd.concat([df, df_med, df_diff, df_lp, df_hp], axis=1)
df.head()

Unnamed: 0,ax,ay,az,label,label_str,mag,ax_mf,ay_mf,az_mf,ax_d,ay_d,az_d,ax_lp,ay_lp,az_lp,ax_hp,ay_hp,az_hp
0,1502,2215,2153,1,Working at Computer,3434.768988,1502.0,2072.0,2047.0,165.0,-143.0,-106.0,0.29457,0.434402,0.422243,1330.950456,1962.753168,1907.813802
1,1667,2072,2047,1,Working at Computer,3355.932359,1611.0,2072.0,2047.0,165.0,-143.0,-106.0,2.023204,2.907854,2.832932,1155.714581,1362.002403,1353.118154
2,1611,1957,1906,1,Working at Computer,3171.435952,1611.0,1957.0,1906.0,-56.0,-115.0,-141.0,7.011663,9.821684,9.584772,789.314495,875.625099,847.401747
3,1601,1939,1831,1,Working at Computer,3110.543843,1611.0,1957.0,1879.0,-10.0,-18.0,-75.0,16.960462,23.21797,22.667546,517.382527,551.056967,481.033069
4,1643,1965,1879,1,Working at Computer,3176.683018,1604.0,1959.0,1879.0,42.0,26.0,48.0,33.066246,44.414894,43.325413,330.972495,313.877533,283.050573


In [None]:
names = ['ax','ay','az','mag','ax_mf','ay_mf','az_mf','ax_lp','ay_lp','az_lp', 'ax_hp','ay_hp','az_hp']


def extract_variables(df):
    def rms(df, names):
        # TODO
        pass
    def minmax(df, names=names):
        res = []
        for column in df.columns:   
            if column not in  :
                i_min = df[column].argmin()
                i_max = df[column].argmax()
                i1 = min(i_min, i_max)
                i2 = max(i_min, i_max)
                m = df[column].values[i1:i2].mean()
                res.append(m)
        return res
        
    m = df.mean(axis=0).values
    ma = df.mad(axis=0).values
    std = df.std(axis=0).values
    minimum = df.min(axis=0).values
    maximum = df.max(axis=0).values
    skew = df.skew(axis=0).values
    kurt = df.kurtosis(axis=0).values
#     minmax = minmax(df, names) 
#     rms = rms(df, names) # TODO
    inteQ = (df.quantile(q=0.75) - df.quantile(q=0.25)).values
    r = np.hstack([m, ma, std, minimum, maximum, skew, kurt, inteQ])    
    return r

### Windowing

In [None]:
def windowing(signal,size,step):
    d = len(signal) #length of the signal
    nk = int(np.floor((d-size+1)/step))+1 #le nombre de fenetres
    wk = np.zeros((nk,size)) #windows
    for j in range(nk):
        wk[j,:] = signal[j*step:j*step+size]
    return wk

In [None]:
def window_labels(labels,size,step):
    d = len(labels) #length of the signal
    nk = int(np.floor((d-size+1)/step))+1 #le nombre de fenetres
    labelwk = np.zeros((nk)) #window labels
    for j in range(nk):
        labelwk[j] = np.max(np.argmax(np.bincount(labels[j*step:j*step+size])))
    return labelwk

In [None]:
def extract_windows(df,size,step):
    
    """
    extract windows with the specified size and step from the dataframe df
    
    Returns:
    L : List of dataframes. Each dataframe contains a window extracted from each signal in df.
    labels: labels of windows
    """
    
    L = []
    n = df.shape[0]
    L_windows = dict()
    n_windows = int(np.floor((n-size+1)/step))+1
    for column in df.columns:
        if column not in ['label','label_str']:
            L_windows[column] = windowing(df[column],size,step)
    for i in range(n_windows):
        ddf = pd.DataFrame()
        for column in df.columns:
            if column not in ['label','label_str']:
                ddf[column] = L_windows[column][i,:]
        L.append(ddf)
    labels = window_labels(df['label'],size,step)
    
    return L,labels

In [None]:
Nbr_samples = 52
percentage = 0.5 # Between 0 & 1
df_X, df_Y = extract_windows(df,Nbr_samples,percentage*Nbr_samples)

** Extract Matrix of Features ** 

In [None]:
X = extract_variables(df_X[0])
for i in range(1,len(df_X)):
    vec = extract_variables(df_X[i])
    X = np.vstack([X,vec])
    
y = np.array(df_Y) 



** Machine Learning **

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import scale
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegressionCV

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
X = scale(X)
knn.fit(X,y)
print('training score: '+str(knn.score(X,y)))

In [None]:
confusion_matrix(y, knn.predict(X))

In [None]:
lrcv = LogisticRegressionCV(Cs=10, cv=None, penalty='l1', solver='liblinear', n_jobs=8, multi_class='ovr')
lrcv.fit(X,y)
print('training score: '+str(lrcv.score(X,y)))

In [None]:
param_grid = {'n_neighbors':[3,5,10,15,20,25]}
knncv = GridSearchCV(estimator=knn, param_grid=parm_grid)
knncv.fit(X,y)

In [None]:
print(knncv.cv_results_)
print(knncv.get_params())

In [None]:
from sklearn.model_selection import cross_val_score
clf = KNeighborsClassifier(n_neighbors=5)
print(cross_val_score(clf, X, y, cv=5, scoring='accuracy'))

In [50]:
a = df.groupby('label')
S = a.get_group(1).cov()
del S['label']
S = S.drop(['label'])
for i in range(2,len(a)):
    S1 = a.get_group(i).cov()
    S1 = S1.drop(['label'])
    del S1['label']
    S = S + S1
S

Unnamed: 0,ax,ay,az,mag,ax_mf,ay_mf,az_mf,ax_d,ay_d,az_d,ax_lp,ay_lp,az_lp,ax_hp,ay_hp,az_hp
ax,9004.164152,-3680.213124,2595.348912,3678.971246,8534.725442,-3554.544187,2539.065519,1128.829227,-296.110993,-281.235836,1567.75041,1041.158271,1982.950361,3339.144856,-4025.097503,-126.387458
ay,-3680.213124,25370.306739,2242.697273,15918.913913,-3511.187298,24625.612554,2182.113383,-389.386865,2328.314423,635.7296,828.961015,-1441.250058,-495.02288,-1235.016848,13644.532406,5410.232425
az,2595.348912,2242.697273,15503.125633,11550.684851,2527.168268,2159.524934,14733.336952,638.79462,-222.278282,1876.469929,1382.411055,-918.935886,5793.732645,2388.08677,-3772.848145,4518.307442
mag,3678.971246,15918.913913,11550.684851,18793.051816,3511.580196,15449.172444,11049.704052,688.055569,1244.955357,1329.478709,2108.934171,-951.228995,3960.043436,2261.074279,4694.410116,6013.025214
ax_mf,8534.725442,-3511.187298,2527.168268,3511.580196,8311.988245,-3397.638802,2503.239599,790.94845,-341.657266,-329.169989,1569.922791,1041.745047,1980.797971,2913.603838,-3932.284882,-166.134206
ay_mf,-3554.544187,24625.612554,2159.524934,15449.172444,-3397.638802,24130.195125,2118.260421,-343.768815,1983.66442,642.648303,829.554421,-1402.491279,-500.229708,-1163.16827,13056.51623,5314.881329
az_mf,2539.065519,2182.113383,14733.336952,11049.704052,2503.239599,2118.260421,14431.526766,573.537196,-413.952427,1220.430514,1399.931719,-906.563708,5809.990518,2320.016956,-3842.670817,3791.627596
ax_d,1128.829227,-389.386865,638.79462,688.055569,790.94845,-343.768815,573.537196,2265.050761,-684.055116,360.493298,-118.916392,-115.186972,-40.916913,2174.855947,-1368.864836,661.374681
ay_d,-296.110993,2328.314423,-222.278282,1244.955357,-341.657266,1983.66442,-413.952427,-684.055116,4648.506691,412.03996,43.203109,165.292686,223.896258,-870.285979,6842.625872,961.620627
az_d,-281.235836,635.7296,1876.469929,1329.478709,-329.169989,642.648303,1220.430514,360.493298,412.03996,3752.250355,103.494987,-136.909733,-43.146507,255.807771,330.861516,3227.159863


# KDA