# 合成加速度

In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.preprocessing import StandardScaler
from scipy import stats as s
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
import sklearn.cross_validation as crv
from sklearn.metrics import accuracy_score
import random
from sklearn.metrics import mean_squared_error

  from numpy.core.umath_tests import inner1d


In [2]:
pwd

'/Users/takeshin/odrive/sozolab_gdrive/students/Shingo/BerkeleyMHAD/virtual_acc'

In [3]:
def col_name():
    p0 = ['x','y','z']
    marker =[]
    p2 =[]
    for a in range(1,44):       #Mocapマーカー数:43個
        p1='m'+ str(a)
        for b in p0:
            p2 = p1 + b
            marker.append(p2)
    return marker

def read_mocap(file):
    data_mocap = pd.read_csv(file,sep=' ',header=None)
    #座標値列の列名変更
    data_mocap=data_mocap.rename(columns={data_mocap.columns[a]:col_name()[a] for a in range(129)} )
    #座標値列以外の列名変更
    data_mocap=data_mocap.rename(columns={129:'FrameNumber',130:'TimeStamp'})
    return data_mocap

#fix NaN
def fix_NaN(data):
    for  c  in  data.columns:
        col = data[c]  #'X'列から1列ずつ見る
        for r in range(len(data)):  #1行ずつ見る
            if col[r] == 0.0:       #0値が見つかれば
                data.iloc[r][c] = 'NaN' #'NaN'を代入
    mc_clean = data.interpolate()
    mc_clean = mc_clean.fillna(0)
    return mc_clean

def obtain_statistical_feature(window):
    #features = np.array(np.array(np.var(window, axis=0)))
    #features=np.array(np.array(np.mean(window-np.mean(window), axis=0)))
    features=np.array( np.array(np.mean(window-np.median(window), axis=0))) 
    features=np.append(features, np.array(np.std(window, axis=0))) 
    features=np.append(features, np.array(np.var(window, axis=0)))
    features=np.append(features, np.array(s.skew(window, axis=0)))
    features=np.append(features, np.array(s.kurtosis(window, axis=0)))
    features=np.append(features, np.array(get_tw_col_var(window)))
    return features.reshape(1,len(features))


def get_tw_variance(window):
    total_cols = window.shape[1]
    tw_var = []
    for i in range (total_cols):
        column = window.iloc[:,i]
        tw_var.append(get_tw_col_var(column))
    return tw_var


def get_tw_col_var(column):
    mean = np.mean(column)
    total = len(column)
    
    v_sum = 0
    for i in range(total):
        w = np.exp(-0.5*(total-i))
        #print("el peso", w, "indice", i, "total", total)
        v = w*np.square(column.iloc[i]-mean)
        v_sum += v
    return v_sum

#速度加速度を求める関数を定義
def cal_va(data):
    diffs = pd.DataFrame()
    for i in range(len(data)-1):
        diff = data.iloc[i+1]-data.iloc[i]
        diffs = diffs.append(diff,ignore_index=True)
        
    vs = pd.DataFrame()
    for i in range(len(diffs)):
        v = diffs.iloc[i][1:]/diffs.iloc[i]["TimeStamp"]
        vs = vs.append(v,ignore_index=True)
    return vs



def my_index_multi(l, x):
    return [i for i, _x in enumerate(l) if _x == x]

In [4]:
def one_acc(data):
    xyz = pd.DataFrame(columns=["acc"])
    accs = []
    for i in range(len(data)):
        #ベクトルの大きさを求める
        acc = np.linalg.norm(data.loc[i])
        accs.append(acc)
    xyz["acc"]= accs
    #xyz["TimeStamp"] = data["TimeStamp"].tolist()
    return xyz

In [5]:
def main_functuion(sampling_rate,*markers):
    x = np.empty([0,6*len(markers)])
    y = np.empty([0,])
    random.seed(0)
    #Read all mocap data
    #b = random.sample( ["01","02","03","04","05","06","07","08","09","10","11","12"],6)
    #a =  ["01","02","03","04","05","06","07","08","09","10","11","12"]
    #subjects = list(set(a) - set(b))
    subjects =  ["01","02","03","04","05","06","07","08","09","10","11","12"]
    actions = ["01","02","03","04","05","06","07","08","09","10","11"]
    for subject in subjects:
        for action in actions:
            for record in range(1,6):
                try:
                    data = read_mocap('/Users/takeshin/odrive/sozolab_gdrive/students/Shingo/BerkeleyMHAD/Mocap/OpticalData/moc_s'+str(subject)+'_a'+str(action)+'_r0'+str(record)+'.txt')
                    print('moc_s'+str(subject)+'_a'+str(action)+'_r0'+str(record)+'.txt')
                    columns = [['m'+str(i)+'x','m'+str(i)+'y','m'+str(i)+'z'] for i in markers]
                    cols = np.array(columns)
                    cols = list(np.ravel(cols))
                    cols.append("TimeStamp")
                    data = data[cols] 
                    data = fix_NaN(data)
                    
                    i = 0
                    mc_ds = pd.DataFrame()
                    #処理
                    while  i < (len(data)//(480//sampling_rate))*(480//sampling_rate):
                        mc_ds = mc_ds.append(data.iloc[i : i+(480//sampling_rate),:].mean(),ignore_index=True)
                        i+=(480//sampling_rate)
                    if i < len(data):
                        mc_ds = mc_ds.append(data.iloc[i :,:].mean(),ignore_index=True)
                    
                    v = cal_va(mc_ds)
                    times = mc_ds['TimeStamp'].shift(periods=-1)
                    mc_v = pd.DataFrame(v)
                    mc_v["TimeStamp"]=times
                    mc_v = mc_v.dropna()

                    
                    a = cal_va(mc_v)
                    times = mc_v['TimeStamp'].shift(periods=-1)
                    mc_a = pd.DataFrame(a)
                    mc_a["TimeStamp"]=times
                    mc_a = mc_a.dropna()
                    #print(mc_a)                                           
                
                    
                    #単位を[G]に変換  [mm/s^2] →  [G]
                    accel = mc_a.loc[:, mc_a.columns != 'TimeStamp']*0.001/9.80665
                    accel = accel.rolling(window= 10, center=False).mean()
                    accel = accel.dropna()
                    accel = accel.reset_index()
                    #print(accel)
                    accel = one_acc(accel)
                    #print(accel)
                   
                
                    sf = obtain_statistical_feature(accel["acc"])
                    x =np.append(x,sf, axis=0)
                    y = np.append(y,[action], axis=0)
                
                
                except FileNotFoundError:
                    pass
                    
    return x, y

In [6]:
x,y=main_functuion(30,30)   

moc_s01_a01_r01.txt
moc_s01_a01_r02.txt
moc_s01_a01_r03.txt
moc_s01_a01_r04.txt
moc_s01_a01_r05.txt
moc_s01_a02_r01.txt
moc_s01_a02_r02.txt
moc_s01_a02_r03.txt
moc_s01_a02_r04.txt
moc_s01_a02_r05.txt
moc_s01_a03_r01.txt
moc_s01_a03_r02.txt
moc_s01_a03_r03.txt
moc_s01_a03_r04.txt
moc_s01_a03_r05.txt
moc_s01_a04_r01.txt
moc_s01_a04_r02.txt
moc_s01_a04_r03.txt
moc_s01_a04_r04.txt
moc_s01_a04_r05.txt
moc_s01_a05_r01.txt
moc_s01_a05_r02.txt
moc_s01_a05_r03.txt
moc_s01_a05_r04.txt
moc_s01_a05_r05.txt
moc_s01_a06_r01.txt
moc_s01_a06_r02.txt
moc_s01_a06_r03.txt
moc_s01_a06_r04.txt
moc_s01_a06_r05.txt
moc_s01_a07_r01.txt
moc_s01_a07_r02.txt
moc_s01_a07_r03.txt
moc_s01_a07_r04.txt
moc_s01_a07_r05.txt
moc_s01_a08_r01.txt
moc_s01_a08_r02.txt
moc_s01_a08_r03.txt
moc_s01_a08_r04.txt
moc_s01_a08_r05.txt
moc_s01_a09_r01.txt
moc_s01_a09_r02.txt
moc_s01_a09_r03.txt
moc_s01_a09_r04.txt
moc_s01_a09_r05.txt
moc_s01_a10_r01.txt
moc_s01_a10_r02.txt
moc_s01_a10_r03.txt
moc_s01_a10_r04.txt
moc_s01_a10_r05.txt


moc_s08_a06_r02.txt
moc_s08_a06_r03.txt
moc_s08_a06_r04.txt
moc_s08_a06_r05.txt
moc_s08_a07_r01.txt
moc_s08_a07_r02.txt
moc_s08_a07_r03.txt
moc_s08_a07_r04.txt
moc_s08_a07_r05.txt
moc_s08_a08_r01.txt
moc_s08_a08_r02.txt
moc_s08_a08_r03.txt
moc_s08_a08_r04.txt
moc_s08_a08_r05.txt
moc_s08_a09_r01.txt
moc_s08_a09_r02.txt
moc_s08_a09_r03.txt
moc_s08_a09_r04.txt
moc_s08_a09_r05.txt
moc_s08_a10_r01.txt
moc_s08_a10_r02.txt
moc_s08_a10_r03.txt
moc_s08_a10_r04.txt
moc_s08_a10_r05.txt
moc_s08_a11_r01.txt
moc_s08_a11_r02.txt
moc_s08_a11_r03.txt
moc_s08_a11_r04.txt
moc_s08_a11_r05.txt
moc_s09_a01_r01.txt
moc_s09_a01_r02.txt
moc_s09_a01_r03.txt
moc_s09_a01_r04.txt
moc_s09_a01_r05.txt
moc_s09_a02_r01.txt
moc_s09_a02_r02.txt
moc_s09_a02_r03.txt
moc_s09_a02_r04.txt
moc_s09_a02_r05.txt
moc_s09_a03_r01.txt
moc_s09_a03_r02.txt
moc_s09_a03_r03.txt
moc_s09_a03_r04.txt
moc_s09_a03_r05.txt
moc_s09_a04_r01.txt
moc_s09_a04_r02.txt
moc_s09_a04_r03.txt
moc_s09_a04_r04.txt
moc_s09_a04_r05.txt
moc_s09_a05_r01.txt


In [7]:
def ml_ar(X,y):
    x_train, x_test, y_train, y_test = crv.train_test_split(X, y, test_size=0.30, random_state=42)
    clf = RandomForestClassifier(n_estimators=20, random_state=42)
    clf.fit(x_train, y_train)
    y_predict = clf.predict(x_test)
    conf = confusion_matrix(y_test, y_predict)
    print (accuracy_score(y_test, y_predict) ) 
    actions = ["01","02","03","04","05","06","07","08","09","10","11"]
    print(classification_report(y_test, y_predict, target_names=actions))
    print(conf)

In [8]:
ml_ar(x,y)

0.6616161616161617
             precision    recall  f1-score   support

         01       1.00      0.68      0.81        22
         02       0.65      0.83      0.73        18
         03       0.86      0.60      0.71        20
         04       0.52      0.61      0.56        18
         05       0.65      0.55      0.59        20
         06       0.53      0.43      0.47        21
         07       0.61      0.93      0.74        15
         08       0.42      0.53      0.47        15
         09       0.67      0.92      0.77        13
         10       0.62      0.53      0.57        19
         11       0.93      0.82      0.87        17

avg / total       0.69      0.66      0.66       198

[[15  1  0  0  1  0  3  2  0  0  0]
 [ 0 15  0  2  0  1  0  0  0  0  0]
 [ 0  0 12  0  1  1  0  0  6  0  0]
 [ 0  2  1 11  2  1  1  0  0  0  0]
 [ 0  1  1  1 11  5  1  0  0  0  0]
 [ 0  4  0  6  2  9  0  0  0  0  0]
 [ 0  0  0  0  0  0 14  0  0  0  1]
 [ 0  0  0  0  0  0  3  8  0  4  0]
 

In [9]:
def ml_ar2(X,y):                              
    #Try classifier
    estimators = []
    estimators.append(('standardize', StandardScaler()))
    #estimators.append(('clf', SVC(kernel='linear')))
    estimators.append(('clf', RandomForestClassifier(n_estimators=20, random_state=42)))
    model = Pipeline(estimators)
    
    seed = 11
    kf = KFold(n_splits=3, random_state=seed, shuffle=True)
    accuracy = [] 
    
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(X_train, y_train)
        acc_train = model.score(X_train, y_train)
        acc_test = model.score(X_test, y_test)
        accuracy.append(acc_test)
        #print('Accuracy on training set: {:.2f}'
        #  .format(model.score(X_train, y_train)))
        #print('Accuracy on test set: {:.2f}'
        #  .format(model.score(X_test, y_test)))
        y_pred = model.predict(X_test)
        conf = confusion_matrix(y_test, y_pred)
        print("*Accuracy* train:",acc_train, "test:",acc_test)
        actions = ["01","02","03","04","05","06","07","08","09","10","11"]
        print(classification_report(y_test, y_pred, target_names=actions))
        #print(conf)
        print("-----------")
    print("*test_ave*",sum(accuracy)/len(accuracy))

In [10]:
ml_ar2(x,y)

*Accuracy* train: 0.9977220956719818 test: 0.6681818181818182
             precision    recall  f1-score   support

         01       0.88      0.83      0.86        18
         02       0.90      0.78      0.84        23
         03       0.63      0.86      0.73        14
         04       0.57      0.52      0.55        23
         05       0.46      0.65      0.54        20
         06       0.56      0.43      0.49        23
         07       0.81      0.85      0.83        20
         08       0.55      0.55      0.55        20
         09       0.86      0.67      0.75        18
         10       0.54      0.59      0.57        22
         11       0.78      0.74      0.76        19

avg / total       0.68      0.67      0.67       220

-----------
*Accuracy* train: 0.9977220956719818 test: 0.6363636363636364
             precision    recall  f1-score   support

         01       0.75      0.79      0.77        19
         02       0.80      0.75      0.77        16
         03 