# Some imports

In [None]:
from scipy import signal # Signal Processing Library
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd # DataBase management
import seaborn # Improve Images
%matplotlib inline

# Feature Engineering

In [None]:
## Column Names ##
nm = ['ind', 'ax', 'ay', 'az', 'label'] # Names of columns
acc_nm = ['ax', 'ay', 'az'] # Names of signal features
## Load Dataset
link = "Dataset/1.csv"
def loadDB(link):
    "A function that import the database from Dataset folder and return df"
    
    label2str = {1:'Working at Computer', 2:'Standing Up, Walking and Going up-down stairs', 
                 3:'Standing', 4:'Walking',5:'Going Up\Down Stairs', 6:'Walking and Talking with Someone', 
                 7:'Talking while Standing'}
    df = pd.read_csv(link, sep=',', names=nm)
    del df['ind']
    df = df[df.label != 0] # Unusable row
    df['label_str'] = df.label.apply(lambda x:label2str[x]) # Important to some plots
    return df

df_raw = loadDB(link)

def widen_signal(df):
    # Magnitude
    df['mag'] = np.sqrt(np.square(df[acc_nm]).sum(axis=1)) 
    # Median filter - 3rd ordre
    def med_fil(df, names):
        """Filter the signal by a median filter"""
        df_r = pd.DataFrame()
        df = df[names]
        for column in df.columns:
            name = column+'_median'
            df_r[name] = signal.medfilt(df[column].values)
        return df_r
    df_med = med_fil(df, acc_nm)
    # Diffrential
    def diffrential(df, names):
        """Compute the differentials of acceleration - Jerk"""
        df = df[names]
        df_r = df.diff(periods=1, axis=0).fillna(method='backfill')
        df_r.columns = [names[0]+'_diff', names[1]+'_diff', names[2]+'_diff']
        return df_r
    df_diff = diffrential(df, acc_nm)
    # Low pass filter
    def lowpass(df, names):
        """Compute low-pass filter"""
        df = df[names]
        df_r = pd.DataFrame()
        fs = 52 # frequence sampling is 52
        f_cut = 1 # cutoff frequency
        fs_n = f_cut*2.0/fs # normalized frequency
        b,a = signal.butter(N=3, Wn=fs_n, btype='low')
        for column in df.columns : 
            name = column+'_low-p'
            df_r[name] = signal.lfilter(b,a,df[column].values)
        return df_r
    df_lp = lowpass(df, acc_nm)  
    # High pass filter 
    def highpass(df, names):
        """Compute high-pass filter"""
        df = df[names]
        df_r = pd.DataFrame()
        fs = 52 # frequence sampling is 52
        f_cut = 1 # cutoff frequency
        fs_n = f_cut*2.0/fs # normalized frequency
        b,a = signal.butter(N=3, Wn=fs_n, btype='high')
        for column in df.columns : 
            name = column+'_high-p'
            df_r[name] = signal.lfilter(b,a,df[column].values)
        return df_r
    df_hp = highpass(df, acc_nm)

    # Compute the total Total
    df = pd.concat([df, df_med, df_diff, df_lp, df_hp], axis=1)
    return df
df_widen = widen_signal(df_raw)
df_widen.shape

In [None]:
def windowing(signal,size,step):
    """Compute the window"""
    d = len(signal) #length of the signal
    nk = int(np.floor((d-size+1)/step))+1 #le nombre de fenetres
    wk = np.zeros((nk,size)) #windows
    for j in range(nk):
        wk[j,:] = signal[j*step:j*step+size]
    return wk
def window_labels(labels,size,step):
    """Compute the label of the window"""
    d = len(labels) #length of the signal
    nk = int(np.floor((d-size+1)/step))+1 #le nombre de fenetres
    labelwk = np.zeros((nk)) #window labels
    for j in range(nk):
        labelwk[j] = np.max(np.argmax(np.bincount(labels[j*step:j*step+size])))
    return labelwk
def extract_windows(df,size,step):
    
    """
    extract windows with the specified size and step from the dataframe df
    
    Returns:
    L : List of dataframes. Each dataframe contains a window extracted from each signal in df.
    labels: labels of windows
    """
    
    L = []
    n = df.shape[0]
    L_windows = dict()
    n_windows = int(np.floor((n-size+1)/step))+1
    for column in df.columns:
        if column not in ['label','label_str']:
            L_windows[column] = windowing(df[column],size,step)
    for i in range(n_windows):
        ddf = pd.DataFrame()
        for column in df.columns:
            if column not in ['label','label_str']:
                ddf[column] = L_windows[column][i,:]
        L.append(ddf)
    labels = window_labels(df['label'],size,step)
    return L,labels



def compute_features(df):
    """Compute features from a give dataframe"""
    ## Basic Statistics
    m = df.mean(axis=0).values # Mean
    ma = df.mad(axis=0).values # Median
    std = df.std(axis=0).values # Standard Deviation
    var = df.var(axis=0).values # Variance
    minimum = df.min(axis=0).values # Minimum
    maximum = df.max(axis=0).values # Maximum
    skew = df.skew(axis=0).values # Skewness
    kurt = df.kurtosis(axis=0).values # Kurtosis
    inteQ = (df.quantile(q=0.75, axis=0).values - df.quantile(q=0.25, axis=0).values) # Interquantile
    r = np.hstack([m, ma, std, var, minimum, maximum, skew, kurt, inteQ]) # Compute vector of features
    ## Auto-regressive coefficients
    
    ## Minmax
    
    ## Signal Integration
    
    return r


def compute_matrix_data(df, N_samples=52, percentage=0.5):
    """Extract Matrix of data"""
    df_X, df_Y = extract_windows(df,N_samples,int(percentage*N_samples))
    X = compute_features(df_X[0])
    for i in range(1,len(df_X)):
        vec = compute_features(df_X[i])
        X = np.vstack([X,vec])
    
    y = np.array(df_Y) # Compute the vector of labels
    return X, y

X, y = compute_matrix_data(df_widen) # Compute matrix of data
# Put in a dataframe (More flexible)
df = pd.DataFrame(X)
df['label'] = y

# Classical algorithms

In [None]:
## Methods
from sklearn.neighbors import KNeighborsClassifier # KNN
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier # Ensemble
from sklearn.svm import SVC # SVM
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression # Logistic Regression
## Utils
from sklearn.model_selection import GridSearchCV # Choose parameters
from sklearn.preprocessing import scale # Normalise matrix
from sklearn.metrics import confusion_matrix 
from sklearn.utils import shuffle
from sklearn.model_selection import cross_val_score

In [None]:
X_train = X
y_train = y
X_train, y_train = shuffle(X_train, y_train, random_state=0) # Shuffle data
X_train = scale(X_train) # Scale the matrix of data

In [None]:
# lr = LogisticRegressionCV()
# cv_lr = min(cross_val_score(lr, X_train, y_train, cv=5))
# print("Logistic Regression :"+str(cv_lr))
# svm = SVC(C=8, kernel='rbf') # Parameters chosen by GridSearh
# cv_svm = min(cross_val_score(svm, X_train, y_train, cv=5))
# print("SVM :"+str(cv_svm))
# gb = GradientBoostingClassifier()
# cv_gb = min(cross_val_score(gb, X_train, y_train, cv=5))
# print("GradientBoosting :"+str(cv_gb))
# knn = KNeighborsClassifier() # Parameters chosen by GridSearh
# cv_knn = min(cross_val_score(knn, X_train, y_train, cv=5))
# print("Knn :"+str(cv_knn))
# lda = LinearDiscriminantAnalysis() # Parameters chosen by GridSearh
# cv_lda = min(cross_val_score(lda, X_train, y_train, cv=5))
# print("LDA :"+str(cv_lda))

# Discriminant Analysis
**  LDA **

In [None]:
def lda(X,y, cla = None):
    if cla==None:
        cla = np.unique(y)
        ix = np.in1d(y, cla)
    else :
        ix = np.in1d(y, cla)
        y = y[ix]
        X = X[ix]
        
    N_features = X.shape[1]
    Sw = np.zeros((N_features, N_features)) # Within Matrix
    Sb = np.zeros((N_features, N_features)) # Between Class Matrix
    u = np.mean(X, axis=0)
    ind = np.array([])
    for idx, cl in enumerate(np.unique(y)):
        if cl in cla:  
            index = np.where(y==cl)
            Sw += np.cov(X[index].T)
            Ni = len(index[0])
            mn = np.mean(X[index], axis=0)
            x = mn - u
            x = x[:, None]
            Sb += Ni*np.dot(x,x.T)
    
        
    # Projection Matrix Theta
    Proj_dim = np.unique(y).shape[0]-1
    w,v =  np.linalg.eig(np.dot(np.linalg.inv(Sw),Sb))
    Theta = np.real(v[:,0:Proj_dim])
    ind = ind.astype(int)
    projected = np.dot(Theta.T, X.T).T # Projected data
    
    return projected, ix

# cla_t = [2,3,4,5]
# Y, ix = lda(X,y, cla=None)



** KDA ** 

In [None]:
from sklearn.metrics.pairwise import rbf_kernel

def w_matrix(x1,x2):
    """ Matrix of i and j : 1/mk if i==j, 0 else"""
    n = len(x1)
    M = np.zeros((n,n))
    for i in range(n):
        mk =  len(np.where(y==y[i])[0])
        M[i,:] = np.equal(x1[i],x2).astype(int)*(1./mk)
    return M

def kda(X, y, cla = None):
    if cla==None:
        cla = np.unique(y)
        ix = np.in1d(y, cla)
    else :
        ix = np.in1d(y, cla)
        y = y[ix]
        X = X[ix]
        
    K=rbf_kernel(X,X)
    n = K.shape[0]
    W = w_matrix(y,y)
    # Within-class scatter matrix
    Sw = np.dot(K,K)
    # Between-class scatter matrix
    Sb = np.dot(K,np.dot(W,K))
    Proj_dim = np.unique(y).shape[0]-1
    w,v =  np.linalg.eig(np.dot(np.linalg.inv(Sw),Sb))
    Alpha = np.real(v[:,0:Proj_dim])
    projected = np.dot(Alpha.T, K).T
    
    return projected, ix  

# Y, ix = kda(X,y, cla=None)

In [None]:
# def predict(projected, y):
#     lr = LogisticRegressionCV()
#     lr.fit(projected, y)
#     y_pred = lr.predict(projected)
#     return y_pred

# C = confusion_matrix(y[ix], predict(Y,y[ix]))

In [None]:



# from mpl_toolkits.mplot3d import Axes3D
# from matplotlib.colors import Colormap
# fig = plt.figure()
# ax = Axes3D(fig)
# ax.scatter(Y[:,0],Y[:,1], Y[:,2], c=y[ix], cmap='viridis_r')

In [None]:
# import itertools
# def plot_confusion_matrix(cm, classes,
#                           normalize=False,
#                           title='Confusion matrix',
#                           cmap=plt.cm.Blues):
#     """
#     This function prints and plots the confusion matrix.
#     Normalization can be applied by setting `normalize=True`.
#     """
#     plt.imshow(cm, interpolation='nearest', cmap=cmap)
#     plt.title(title)
#     plt.colorbar()
#     tick_marks = np.arange(len(classes))
#     plt.xticks(tick_marks, classes, rotation=45)
#     plt.yticks(tick_marks, classes)

#     if normalize:
#         cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
#         print("Normalized confusion matrix")
#     else:
#         print('Confusion matrix, without normalization')

#     print(cm)

#     thresh = cm.max() / 2.
#     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
#         plt.text(j, i, cm[i, j],
#                  horizontalalignment="center",
#                  color="black" if cm[i, j] > thresh else "black")

#     plt.tight_layout()
#     plt.ylabel('True label')
#     plt.xlabel('Predicted label')
    
# class_names = np.unique(y[ix])    
# cnf_matrix = confusion_matrix(y[ix], predict(Y,y[ix]))
# np.set_printoptions(precision=2)

# # Plot non-normalized confusion matrix
# plt.figure()
# plot_confusion_matrix(cnf_matrix, classes=class_names,
#                       title='Confusion matrix, without normalization')

# # Plot normalized confusion matrix
# plt.figure()
# plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
#                       title='Normalized confusion matrix')

# plt.show()


In [None]:
# def predict(projected, y):
#     lr = LogisticRegressionCV()
#     lr.fit(projected, y)
#     y_pred = lr.predict(projected)
#     return y_pred


# from mpl_toolkits.mplot3d import Axes3D
# from matplotlib.colors import Colormap
# fig = plt.figure()
# ax = Axes3D(fig)
# ax.scatter(Y[:,0],Y[:,1], Y[:,2], c=y[ix], cmap='viridis_r')

# confusion_matrix(y[ix], predict(Y,y[ix]))