# 合成加速度

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from statistics import median,mean
%matplotlib inline
from pylab import rcParams
import sympy as sym
import numpy as np
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.preprocessing import StandardScaler
from scipy import stats as s
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
#import sklearn.cross_validation as crv
import random
from sklearn.metrics import mean_squared_error

In [2]:
def read_acc(file):
    data_acc = pd.read_csv(file,sep=' ',header=None)
   
    data_acc = data_acc.rename(columns={data_acc.columns[0]:'X_acc', data_acc.columns[1]:'Y_acc', data_acc.columns[2]:'Z_acc',data_acc.columns[3]:'TimeStamp'} )
    
    return data_acc

In [3]:
#fix NaN
def fix_NaN(data):
    for  c  in  data.columns:
        col = data[c]  #'X'列から1列ずつ見る
        for r in range(len(data)):  #1行ずつ見る
            if col[r] == 0.0:       #0値が見つかれば
                data.iloc[r][c] = 'NaN' #'NaN'を代入
    mc_clean = data.interpolate()
    mc_clean = mc_clean.fillna(0)
    return mc_clean

In [4]:
#線形加速度を得る
def linear_acc(data):
    alpha = 0.9
    for  c  in  ['X_acc','Y_acc','Z_acc']:
        g = 0  
        col = data[c]           #'X'列から1列ずつ見る
        for r in range(len(data)):         #1行ずつ見る
            #Isolate the force of gravity with the low-pass filter.
            g = alpha*g + (1-alpha) * col[r]
            #Remove the gravity contribution with the high-pass filter.
            linear = col[r] - g
            data.iloc[r][c] = linear
        linear_acc = data
    return linear_acc

In [5]:
def obtain_statistical_feature(window):
    #features = np.array(np.array(np.var(window, axis=0)))
    #features=np.array(np.array(np.mean(window-np.mean(window), axis=0)))
    features=np.array(np.array(np.mean(window-np.median(window), axis=0))) 
    features=np.append(features, np.array(np.std(window, axis=0))) 
    features=np.append(features, np.array(np.var(window, axis=0)))
    features=np.append(features, np.array(s.skew(window, axis=0)))
    features=np.append(features, np.array(s.kurtosis(window, axis=0)))
    #features=np.append(features, np.array(get_tw_variance(window)))
    features=np.append(features, np.array(get_tw_col_var(window)))
    return features.reshape(1,len(features))

In [6]:
#def get_tw_variance(window):
    #total_cols = window.shape[1]
    #tw_var = []
    #for i in range (len(window)):
        #column = window.iloc[:,i]
        #tw_var.append(get_tw_col_var(column))
    #return tw_var


def get_tw_col_var(column):
    mean = np.mean(column)
    total = len(column)
    
    v_sum = 0
    for i in range(total):
        w = np.exp(-0.5*(total-i))
        #print("el peso", w, "indice", i, "total", total)
        v = w*np.square(column.iloc[i]-mean)
        v_sum += v
    return v_sum

def my_index_multi(l, x):
    return [i for i, _x in enumerate(l) if _x == x]

In [7]:
def one_acc(data):
    xyz = pd.DataFrame(columns=["acc"])
    accs = []
    for i in range(len(data)):
        #ベクトルの大きさを求める
        acc = np.linalg.norm(data.loc[i][data.columns != "TimeStamp"])
        accs.append(acc)
    xyz["acc"]= accs
    xyz["TimeStamp"] = data["TimeStamp"].tolist()
    return xyz

In [8]:
def real_accel(*sensor_n):
    x = np.empty([0,6])
    y = np.empty([0,])
    random.seed(0)
    print(sensor_n)
    #Read all mocap data
    #b = random.sample( ["01","02","03","04","05","06","07","08","09","10","11","12"],6)
    #a =  ["01","02","03","04","05","06","07","08","09","10","11","12"]
    #subjects = list(set(a) - set(b))
    subjects =  ["01","02","03","04","05","06","07","08","09","10","11","12"]
    #subjects =  ["07","08","09","10","11","12"]
    actions = ["01","02","03","04","05","06","07","08","09","10","11"]
    for i in sensor_n: 
        for subject in subjects:
            for action in actions:
                for record in range(1,6):
                    try:
                        data = read_acc('/Users/takeshin/odrive/sozolab_gdrive/students/Shingo/BerkeleyMHAD/Accelerometer/Shimmer0'+str(i)+'/acc_h0'+str(i)+'_s'+str(subject)+'_a'+str(action)+'_r0'+str(record)+'.txt')
                        print('acc_h0'+str(i)+'_s'+str(subject)+'_a'+str(action)+'_r0'+str(record)+'.txt')
                        data = fix_NaN(data)
                        data = linear_acc(data)  
                        data = one_acc(data)
                        #print(data.head())
                    
                        sf = obtain_statistical_feature(data["acc"])
                        x =np.append(x,sf, axis=0)
                        y = np.append(y,[action], axis=0)
                    except FileNotFoundError:
                        pass

    return x, y   

In [9]:
x_real,y_real = real_accel(3)

(3,)
acc_h03_s01_a01_r01.txt
acc_h03_s01_a01_r02.txt
acc_h03_s01_a01_r03.txt
acc_h03_s01_a01_r04.txt
acc_h03_s01_a01_r05.txt
acc_h03_s01_a02_r01.txt
acc_h03_s01_a02_r02.txt
acc_h03_s01_a02_r03.txt
acc_h03_s01_a02_r04.txt
acc_h03_s01_a02_r05.txt
acc_h03_s01_a03_r01.txt
acc_h03_s01_a03_r02.txt
acc_h03_s01_a03_r03.txt
acc_h03_s01_a03_r04.txt
acc_h03_s01_a03_r05.txt
acc_h03_s01_a04_r01.txt
acc_h03_s01_a04_r02.txt
acc_h03_s01_a04_r03.txt
acc_h03_s01_a04_r04.txt
acc_h03_s01_a04_r05.txt
acc_h03_s01_a05_r01.txt
acc_h03_s01_a05_r02.txt
acc_h03_s01_a05_r03.txt
acc_h03_s01_a05_r04.txt
acc_h03_s01_a05_r05.txt
acc_h03_s01_a06_r01.txt
acc_h03_s01_a06_r02.txt
acc_h03_s01_a06_r03.txt
acc_h03_s01_a06_r04.txt
acc_h03_s01_a06_r05.txt
acc_h03_s01_a07_r01.txt
acc_h03_s01_a07_r02.txt
acc_h03_s01_a07_r03.txt
acc_h03_s01_a07_r04.txt
acc_h03_s01_a07_r05.txt
acc_h03_s01_a08_r01.txt
acc_h03_s01_a08_r02.txt
acc_h03_s01_a08_r03.txt
acc_h03_s01_a08_r04.txt
acc_h03_s01_a08_r05.txt
acc_h03_s01_a09_r01.txt
acc_h03_s01

acc_h03_s07_a03_r05.txt
acc_h03_s07_a04_r01.txt
acc_h03_s07_a04_r02.txt
acc_h03_s07_a04_r03.txt
acc_h03_s07_a04_r04.txt
acc_h03_s07_a04_r05.txt
acc_h03_s07_a05_r01.txt
acc_h03_s07_a05_r02.txt
acc_h03_s07_a05_r03.txt
acc_h03_s07_a05_r04.txt
acc_h03_s07_a05_r05.txt
acc_h03_s07_a06_r01.txt
acc_h03_s07_a06_r02.txt
acc_h03_s07_a06_r03.txt
acc_h03_s07_a06_r04.txt
acc_h03_s07_a06_r05.txt
acc_h03_s07_a07_r01.txt
acc_h03_s07_a07_r02.txt
acc_h03_s07_a07_r03.txt
acc_h03_s07_a07_r04.txt
acc_h03_s07_a07_r05.txt
acc_h03_s07_a08_r01.txt
acc_h03_s07_a08_r02.txt
acc_h03_s07_a08_r03.txt
acc_h03_s07_a08_r04.txt
acc_h03_s07_a08_r05.txt
acc_h03_s07_a09_r01.txt
acc_h03_s07_a09_r02.txt
acc_h03_s07_a09_r03.txt
acc_h03_s07_a09_r04.txt
acc_h03_s07_a09_r05.txt
acc_h03_s07_a10_r01.txt
acc_h03_s07_a10_r02.txt
acc_h03_s07_a10_r03.txt
acc_h03_s07_a10_r04.txt
acc_h03_s07_a10_r05.txt
acc_h03_s07_a11_r01.txt
acc_h03_s07_a11_r02.txt
acc_h03_s07_a11_r03.txt
acc_h03_s07_a11_r04.txt
acc_h03_s07_a11_r05.txt
acc_h03_s08_a01_

In [10]:
print(x_real.shape)
print(y_real.shape)

(658, 6)
(658,)


In [11]:
#len(x_real[my_index_multi(y_real, '01')])

In [12]:
def ml_ar(X,y):
    x_train, x_test, y_train, y_test = crv.train_test_split(X, y, test_size=0.30, random_state=42)
    clf = RandomForestClassifier(n_estimators=20, random_state=42)
    clf.fit(x_train, y_train)
    y_predict = clf.predict(x_test)
    conf = confusion_matrix(y_test, y_predict)
    print (accuracy_score(y_test, y_predict) ) 
    actions = ["01","02","03","04","05","06","07","08","09","10","11"]
    print(classification_report(y_test, y_predict, target_names=actions))
    print(conf)

In [13]:
def ml_ar2(X,y):                              
    #Try classifier
    estimators = []
    estimators.append(('standardize', StandardScaler()))
    #estimators.append(('clf', SVC(kernel='linear')))
    estimators.append(('clf', RandomForestClassifier(n_estimators=20, random_state=42)))
    model = Pipeline(estimators)
    
    seed = 11
    kf = KFold(n_splits=3, random_state=seed, shuffle=True)
    accuracy = [] 
    
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(X_train, y_train)
        acc_train = model.score(X_train, y_train)
        acc_test = model.score(X_test, y_test)
        accuracy.append(acc_test)
        #print('Accuracy on training set: {:.2f}'
        #  .format(model.score(X_train, y_train)))
        #print('Accuracy on test set: {:.2f}'
        #  .format(model.score(X_test, y_test)))
        y_pred = model.predict(X_test)
        conf = confusion_matrix(y_test, y_pred)
        print("*Accuracy* train:",acc_train, "test:",acc_test)
        actions = ["01","02","03","04","05","06","07","08","09","10","11"]
        print(classification_report(y_test, y_pred, target_names=actions))
        #print(conf)
        print("-----------")
    print("*test_ave*",sum(accuracy)/len(accuracy))

In [14]:
ml_ar2(x_real,y_real) 

*Accuracy* train: 0.9931506849315068 test: 0.6272727272727273
             precision    recall  f1-score   support

         01       0.70      0.89      0.78        18
         02       0.90      0.73      0.81        26
         03       0.42      0.71      0.53        14
         04       0.79      0.62      0.70        24
         05       0.48      0.50      0.49        20
         06       0.48      0.57      0.52        21
         07       0.78      0.78      0.78        18
         08       0.62      0.42      0.50        19
         09       0.74      0.70      0.72        20
         10       0.43      0.63      0.51        19
         11       0.89      0.38      0.53        21

avg / total       0.67      0.63      0.63       220

-----------
*Accuracy* train: 1.0 test: 0.6438356164383562
             precision    recall  f1-score   support

         01       0.67      0.78      0.72        18
         02       0.60      0.46      0.52        13
         03       0.73     

   H3(合成加速度)
   
   0.6060606060606061
   
      precision    recall  f1-score   support

         01       0.67      0.80      0.73        20
         02       0.75      0.60      0.67        20
         03       0.64      0.78      0.70        18
         04       0.65      0.83      0.73        18
         05       0.35      0.35      0.35        17
         06       0.59      0.54      0.57        24
         07       0.68      0.93      0.79        14
         08       0.59      0.67      0.62        15
         09       0.50      0.36      0.42        14
         10       0.50      0.45      0.47        20
         11       0.70      0.39      0.50        18

avg / total       0.60      0.61      0.60       198

[[16  4  0  0  0  0  0  0  0  0  0]
 [ 8 12  0  0  0  0  0  0  0  0  0]
 [ 0  0 14  1  0  0  0  0  2  1  0]
 [ 0  0  0 15  2  0  0  0  0  0  1]
 [ 0  0  0  1  6  9  1  0  0  0  0]
 [ 0  0  0  0  8 13  3  0  0  0  0]
 [ 0  0  0  0  1  0 13  0  0  0  0]
 [ 0  0  1  0  0  0  2 10  0  1  1]
 [ 0  0  2  3  0  0  0  3  5  1  0]
 [ 0  0  3  3  0  0  0  1  3  9  1]
 [ 0  0  2  0  0  0  0  3  0  6  7]]


In [15]:
#x_real,y_real = real_accel(1)

In [16]:
#ml_ar(x_real,y_real) 