** Some imports **

In [3]:
from scipy import signal
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn
%matplotlib inline

** Load Data ** 

In [4]:
names = ['ind', 'ax', 'ay', 'az', 'label']
acc_names = ['ax', 'ay', 'az']
label2str = {1:'Working at Computer', 2:'Standing Up, Walking and Going up-down stairs', 
             3:'Standing', 4:'Walking',5:'Going Up\Down Stairs', 6:'Walking and Talking with Someone', 
             7:'Talking while Standing'}
df = pd.read_csv("Dataset/1.csv", sep=',', names=names)
del df['ind']
df = df[df.label != 0]
df['label_str'] = df.label.apply(lambda x:label2str[x])

** Add signals ** 

In [5]:
# Magnitude
df['mag'] = np.sqrt(np.square(df[acc_names]).sum(axis=1)) 
# Median filter - 3rd ordre
def med_fil(df, names):
    df_r = pd.DataFrame()
    df = df[names]
    for column in df.columns:
        name = column+'_mf'
        df_r[name] = signal.medfilt(df[column].values)
    return df_r
df_med = med_fil(df, acc_names)
# Diffrential
def diffrential(df, names):
    df = df[names]
    df_r = df.diff(periods=1, axis=0).fillna(method='backfill')
    df_r.columns = [names[0]+'_d', names[1]+'_d', names[2]+'_d']
    return df_r
df_diff = diffrential(df, acc_names)
# Low pass filter
def lowpass(df, names):
    df = df[names]
    df_r = pd.DataFrame()
    fs = 52 # frequence sampling is 52
    f_cut = 1 # cutoff frequency
    fs_n = f_cut*2.0/fs # normalized frequency
    b,a = signal.butter(N=3, Wn=fs_n, btype='low')
    for column in df.columns : 
        name = column+'_lp'
        df_r[name] = signal.lfilter(b,a,df[column].values)
    return df_r
df_lp = lowpass(df, acc_names)  
# High pass filter 
def highpass(df, names):
    df = df[names]
    df_r = pd.DataFrame()
    fs = 52 # frequence sampling is 52
    f_cut = 1 # cutoff frequency
    fs_n = f_cut*2.0/fs # normalized frequency
    b,a = signal.butter(N=3, Wn=fs_n, btype='high')
    for column in df.columns : 
        name = column+'_hp'
        df_r[name] = signal.lfilter(b,a,df[column].values)
    return df_r
df_hp = highpass(df, acc_names)

# Total
df = pd.concat([df, df_med, df_diff, df_lp, df_hp], axis=1)
df.head()

Unnamed: 0,ax,ay,az,label,label_str,mag,ax_mf,ay_mf,az_mf,ax_d,ay_d,az_d,ax_lp,ay_lp,az_lp,ax_hp,ay_hp,az_hp
0,1502,2215,2153,1,Working at Computer,3434.768988,1502,2072,2047,165,-143,-106,0.29457,0.434402,0.422243,1330.950456,1962.753168,1907.813802
1,1667,2072,2047,1,Working at Computer,3355.932359,1611,2072,2047,165,-143,-106,2.023204,2.907854,2.832932,1155.714581,1362.002403,1353.118154
2,1611,1957,1906,1,Working at Computer,3171.435952,1611,1957,1906,-56,-115,-141,7.011663,9.821684,9.584772,789.314495,875.625099,847.401747
3,1601,1939,1831,1,Working at Computer,3110.543843,1611,1957,1879,-10,-18,-75,16.960462,23.21797,22.667546,517.382527,551.056967,481.033069
4,1643,1965,1879,1,Working at Computer,3176.683018,1604,1959,1879,42,26,48,33.066246,44.414894,43.325413,330.972495,313.877533,283.050573


In [6]:
# names = ['ax','ay','az','mag','ax_mf','ay_mf','az_mf','ax_lp','ay_lp','az_lp', 'ax_hp','ay_hp','az_hp']


def extract_variables(df):
#     def rms(df, names):
#         # TODO
#         pass
#     def minmax(df, names=names):
#         res = []
#         for column in df.columns:   
#             if column in names:
#                 i_min = df[column].argmin()
#                 i_max = df[column].argmax()
#                 i1 = min(i_min, i_max)
#                 i2 = max(i_min, i_max)
#                 m = df[column].values[i1:i2].mean()
#                 res.append(m)
#         return res
        
    m = df.mean(axis=0).values
    ma = df.mad(axis=0).values
    std = df.std(axis=0).values
    minimum = df.min(axis=0).values
    maximum = df.max(axis=0).values
    skew = df.skew(axis=0).values
    kurt = df.kurtosis(axis=0).values
#     minmax = minmax(df, names) 
#     rms = rms(df, names) # TODO
    inteQ = (df.quantile(q=0.75) - df.quantile(q=0.25)).values
    r = np.hstack([m, ma, std, minimum, maximum, skew, kurt, inteQ])    
    return r

### Windowing

In [7]:
def windowing(signal,size,step):
    d = len(signal) #length of the signal
    nk = int(np.floor((d-size+1)/step))+1 #le nombre de fenetres
    wk = np.zeros((nk,size)) #windows
    for j in range(nk):
        wk[j,:] = signal[j*step:j*step+size]
    return wk

In [8]:
def window_labels(labels,size,step):
    d = len(labels) #length of the signal
    nk = int(np.floor((d-size+1)/step))+1 #le nombre de fenetres
    labelwk = np.zeros((nk)) #window labels
    for j in range(nk):
        labelwk[j] = np.max(np.argmax(np.bincount(labels[j*step:j*step+size])))
    return labelwk

In [9]:
def extract_windows(df,size,step):
    
    """
    extract windows with the specified size and step from the dataframe df
    
    Returns:
    L : List of dataframes. Each dataframe contains a window extracted from each signal in df.
    labels: labels of windows
    """
    
    L = []
    n = df.shape[0]
    L_windows = dict()
    n_windows = int(np.floor((n-size+1)/step))+1
    for column in df.columns:
        if column not in ['label','label_str']:
            L_windows[column] = windowing(df[column],size,step)
    for i in range(n_windows):
        ddf = pd.DataFrame()
        for column in df.columns:
            if column not in ['label','label_str']:
                ddf[column] = L_windows[column][i,:]
        L.append(ddf)
    labels = window_labels(df['label'],size,step)
    
    return L,labels

In [10]:
Nbr_samples = 52
percentage = 0.5 # Between 0 & 1
df_X, df_Y = extract_windows(df,Nbr_samples,int(percentage*Nbr_samples))

** Extract Matrix of Features ** 

In [11]:
X = extract_variables(df_X[0])
for i in range(1,len(df_X)):
    vec = extract_variables(df_X[i])
    X = np.vstack([X,vec])
    
y = np.array(df_Y) 

In [12]:
cla = [2,5]
ix = np.in1d(y, cla)
y = y[ix]
X = X[ix]

In [39]:
from sklearn import hmm
model = hmm.GaussianHMM(2, "full")
model.fit(X)

ImportError: cannot import name 'hmm'

** Machine Learning **

In [36]:
from sklearn.preprocessing import PolynomialFeatures
from scipy.stats import multivariate_normal
from sklearn.preprocessing import normalize
from scipy.misc import logsumexp
from scipy.linalg import inv
from scipy.linalg import pinv
from numpy.matlib import repmat

def log_normalize(x):
    (n,d) = x.shape
    a = np.max(x,axis=1)
    x = x - repmat(a+np.log( np.sum( np.exp(x-repmat(a,1,d).reshape((n,d)))  , axis=1 )  ) ,1,d ).reshape((n,d))
    return x
    #x - repmat(a + log( sum( exp( x - repmat(a,1,d) ) , 2 ) ) ,1,d) ;

def compute_tau(n,K,pi,mean,sigma,Y):
    log_unnormalized = np.zeros((n,K))
    (m,d) = Y.shape
    for b in range(K):
        invsigma = inv(sigma[b])
        xc = (Y-repmat(mean[b],n,1))
        A1 = -0.5*np.sum(np.multiply(np.dot(xc,invsigma),xc),axis=1)
        A2 = -0.5*np.sum(np.log(np.linalg.eig(sigma[b])[0])) 
        A3 = -0.5*d*np.log(2*np.pi) + np.log(pi[b])
        log_unnormalized[:,b] =  A1+A2+A3
    logtau = log_normalize(log_unnormalized)
    tau = np.exp(logtau)
    return tau

def sumLogExp(x):
    a = np.max(x)
    return a + np.log(np.sum(np.exp(x - a)))
    
    

def fit(Y,K,p,labels,max_iter):
    #p is the degree of the polynomial regression
    #K is the number of classes
    #Y is a n*d obeservation matrix
    #labels is the labels matrix of Y
    
    """ Initialisation """
    n,d = Y.shape
    pf = PolynomialFeatures(degree=p) #transformer to get polynomial features of the time
    t = pf.fit_transform(np.arange(n).reshape(-1,1)) #matrix containing the polynomial features of the time
    pi = (1.0/K)*np.ones((K))
    A = (1.0/(2.0*(K-1)))*np.ones((K,K))
    for i in range(K):
        A[i,i] = 0.5
    B = [] #list of coefficient matrices
    sigma = [] #list of covariance matrices 
    for i in range(K):
        B.append(np.random.uniform(-1,1,(p+1,d)))
        s = np.transpose(Y[labels==i,:])
        #sigma.append(np.cov(s))
        sigma.append(np.eye(d))
    
   
    #F = np.zeros((K,n))
    F_log = np.zeros((K,n))
    for k in range(K):
        for i in range(n):
            mean = np.mean(Y,axis=0) #np.dot(B[k].T,t[k,:])
            cov = 100*np.eye(d) #sigma[k]
            F_log[k,i]=multivariate_normal.pdf(Y[i,:],mean,cov)
    print(F_log)


    
    
    
    for i in range(max_iter):
        """ E step """
        #appeler tranpose(F) :c
        #calculate P1 and P2
        #P1 is a n*K matrix
        #P2 is a matrix of size n*K*K
        P11,P2 = fwd_bkw(pi,np.transpose(F),A)
        P1 = np.transpose(P11)
        """ M step """
        pi = P1[0,:]
        A = np.transpose(np.sum(P2[1:,:,:],axis=0))
        for j in range(K):
            A[j,:] = A[j,:]/np.sum(P1[1:,:],axis=0)
            W = np.diag(P1[:,j])
            #print(np.sum(P1[:,j]))
            M = np.dot(np.transpose(t),np.dot(W,t))
            m = M.shape[0]
            B[j] = np.dot(inv(M),np.transpose(t))
            B[j] = np.dot(B[j],np.dot(W,Y))
            c = 1.0/np.sum(P1[:,j])
            M = Y-np.dot(t,B[j])
            sigma[j] = c*np.dot(np.transpose(M),np.dot(W,M))
        #calculate the Bs and the sigmas
        """ Update F """
        F = np.zeros((n,K))
        for i in range(n):
            for k in range(K):
                mean = np.dot(B[k].T,t[k,:])
                cov = np.eye(d) # sigma[k]
                F[i,k]=multivariate_normal.pdf(Y[i,:],mean, cov)


        
    return pi,A,B,sigma

def fwd_bkw(init_dist,F,A):
    [K,T] = F.shape
    alpha = np.zeros((K,T))
    beta = np.zeros((K,T))
    
    # Forward recursion
    t=1
    alpha[:,t] = np.multiply(init_dist,F[:,t])
    alpha[:,t] = normalize(alpha[:,t].reshape(1,-1),norm='l1')
    
    for t in np.arange(2,T):
        m = np.dot(A.T,alpha[:,t-1])
        alpha[:,t]=np.multiply(m,F[:,t])
        alpha[:,t] = normalize(alpha[:,t].reshape(1,-1),norm='l1')
        
    
    # Backward recursion
    beta[:,T-1] = normalize(np.ones(K).reshape(1,-1),norm='l1')
    for t in np.arange(1,T-1)[::-1]:
        b = np.multiply(beta[:,t+1], F[:,t+1])
        beta[:,t] = np.dot(A,b)
        beta[:,t] = normalize(beta[:,t].reshape(1,-1),norm='l1')
    
    # Compute P1=p(kt|y1,...,yT)=alpha*beta/sum(alpha*beta)
    P1 = np.zeros((K,T))
    for t in np.arange(T):
        P1[:,t] = np.multiply(alpha[:,t],beta[:,t])/logsumexp(np.multiply(alpha[:,t],beta[:,t]))
        P1[:,t] = normalize(P1[:,t].reshape(1,-1),norm='l1')
    
    # Compute P2=p(kt,kt+1|y1,...,yT)
    P2 = np.zeros((T-1,K,K))
    for t in np.arange(1,T-2):
        for i in np.arange(1,K-1):
            for j in np.arange(1,K-1):
                P2[t,i,j]=alpha[i,t]*beta[j,t+1]*F[j,t+1]
        P2[t,:,:] = normalize(P2[t,:,:],norm='l1')
    return P1,P2


def forward(params, observations):
    pi, A, O = params
    N = len(observations)
    S = pi.shape[0]
    
    alpha = np.zeros((N, S))
    
    # base case
    alpha[0, :] = pi * O[:,observations[0]]
    
    # recursive case
    for i in range(1, N):
        for s2 in range(S):
            for s1 in range(S):
                alpha[i, s2] += alpha[i-1, s1] * A[s1, s2] * O[s2, observations[i]]
    
    return (alpha, np.sum(alpha[N-1,:]))


def backward(params, observations):
    pi, A, O = params
    N = len(observations)
    S = pi.shape[0]
    
    beta = np.zeros((N, S))
    
    # base case
    beta[N-1, :] = 1
    
    # recursive case
    for i in range(N-2, -1, -1):
        for s1 in range(S):
            for s2 in range(S):
                beta[i, s1] += beta[i+1, s2] * A[s1, s2] * O[s2, observations[i+1]]
    
    return (beta, np.sum(pi * O[:, observations[0]] * beta[0,:]))




In [37]:
fit(X,2,10,y,2)

[[  0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    2.05697730e-234   6.90476213e-299   0.00000000e+000   0.00000000e+000
    0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    8.47512342e-300   3.61399027e-264   3.76383856e-264   0.00000000e+000
    0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    0.00000000e+000   0.00000000e+000   0.00000000e+000   0.00000000e+000
    0.00000000e+000   0.00000000e+000   0.00000000e+000   2.73646947e-296
    6.31904330e-311   0.00000000e+000 

UnboundLocalError: local variable 'F' referenced before assignment

In [183]:
X.shape

(6249, 128)

In [146]:
log_normalize(A)

array([[-1.71544965, -2.15266041, -2.02453012, -2.54952197, -2.02417456,
        -1.70436512, -2.57615563],
       [-2.50832937, -1.89793982, -2.16478836, -1.13529234, -1.78184726,
        -2.25810163, -2.11220545],
       [-2.54640136, -1.42185538, -2.13004245, -2.27354118, -1.74131026,
        -1.73367537, -1.84182151],
       [-1.62480363, -1.86999193, -2.15792254, -1.82266231, -1.91896535,
        -1.98840168, -1.88231953],
       [-2.21747678, -2.07752282, -1.65354362, -2.22374016, -2.48947861,
        -1.66741601, -1.66105115]])

In [109]:
repmat(A,1,3)

array([[ 0.96753754,  0.13259923,  0.3372037 ,  0.97948979,  0.43865387,
         0.96753754,  0.13259923,  0.3372037 ,  0.97948979,  0.43865387,
         0.96753754,  0.13259923,  0.3372037 ,  0.97948979,  0.43865387],
       [ 0.54535783,  0.81468884,  0.28034059,  0.83642568,  0.17197204,
         0.54535783,  0.81468884,  0.28034059,  0.83642568,  0.17197204,
         0.54535783,  0.81468884,  0.28034059,  0.83642568,  0.17197204],
       [ 0.56144315,  0.32934019,  0.07719071,  0.03714683,  0.05935149,
         0.56144315,  0.32934019,  0.07719071,  0.03714683,  0.05935149,
         0.56144315,  0.32934019,  0.07719071,  0.03714683,  0.05935149],
       [ 0.57415875,  0.38052674,  0.82188269,  0.8828057 ,  0.40605557,
         0.57415875,  0.38052674,  0.82188269,  0.8828057 ,  0.40605557,
         0.57415875,  0.38052674,  0.82188269,  0.8828057 ,  0.40605557],
       [ 0.22841158,  0.92882622,  0.86445451,  0.13419212,  0.50790019,
         0.22841158,  0.92882622,  0.86445451, 

In [106]:
A

array([[ 0.96753754,  0.13259923,  0.3372037 ,  0.97948979,  0.43865387],
       [ 0.54535783,  0.81468884,  0.28034059,  0.83642568,  0.17197204],
       [ 0.56144315,  0.32934019,  0.07719071,  0.03714683,  0.05935149],
       [ 0.57415875,  0.38052674,  0.82188269,  0.8828057 ,  0.40605557],
       [ 0.22841158,  0.92882622,  0.86445451,  0.13419212,  0.50790019]])