In [2]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.preprocessing import StandardScaler
from scipy import stats as s
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
#import sklearn.cross_validation as crv
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
import random
from sklearn.metrics import mean_squared_error
from sklearn import ensemble ,tree

In [2]:
def col_name():
    p0 = ['x','y','z']
    marker =[]
    p2 =[]
    for a in range(1,44):       #Mocapマーカー数:43個
        p1='m'+ str(a)
        for b in p0:
            p2 = p1 + b
            marker.append(p2)
    return marker

def read_mocap(file):
    data_mocap = pd.read_csv(file,sep=' ',header=None)
    #座標値列の列名変更
    data_mocap=data_mocap.rename(columns={data_mocap.columns[a]:col_name()[a] for a in range(129)} )
    #座標値列以外の列名変更
    data_mocap=data_mocap.rename(columns={129:'FrameNumber',130:'TimeStamp'})
    return data_mocap

#fix NaN
def fix_NaN(data):
    for  c  in  data.columns:
        col = data[c]  #'X'列から1列ずつ見る
        for r in range(len(data)):  #1行ずつ見る
            if col[r] == 0.0:       #0値が見つかれば
                data.iloc[r][c] = 'NaN' #'NaN'を代入
    mc_clean = data.interpolate()
    mc_clean = mc_clean.fillna(0)
    return mc_clean

def obtain_statistical_feature(window):
    #features = np.array(np.array(np.var(window, axis=0)))
    #features=np.array(np.array(np.mean(window-np.mean(window), axis=0)))
    features=np.array( np.array(np.mean(window-np.median(window), axis=0))) 
    features=np.append(features, np.array(np.std(window, axis=0))) 
    features=np.append(features, np.array(np.var(window, axis=0)))
    features=np.append(features, np.array(s.skew(window, axis=0)))
    features=np.append(features, np.array(s.kurtosis(window, axis=0)))
    features=np.append(features, np.array(get_tw_col_var(window)))
    return features.reshape(1,len(features))


def get_tw_variance(window):
    total_cols = window.shape[1]
    tw_var = []
    for i in range (total_cols):
        column = window.iloc[:,i]
        tw_var.append(get_tw_col_var(column))
    return tw_var


def get_tw_col_var(column):
    mean = np.mean(column)
    total = len(column)
    
    v_sum = 0
    for i in range(total):
        w = np.exp(-0.5*(total-i))
        #print("el peso", w, "indice", i, "total", total)
        v = w*np.square(column[i]-mean)
        v_sum += v
    return v_sum

#速度加速度を求める関数を定義
def cal_va(data):
    diffs = pd.DataFrame()
    for i in range(len(data)-1):
        diff = data.iloc[i+1]-data.iloc[i]
        diffs = diffs.append(diff,ignore_index=True)
        
    vs = pd.DataFrame()
    for i in range(len(diffs)):
        v = diffs.iloc[i][1:]/diffs.iloc[i]["TimeStamp"]
        vs = vs.append(v,ignore_index=True)
    return vs




In [3]:
def read_acc(file):
    data_acc = pd.read_csv(file,sep=' ',header=None)
   
    data_acc = data_acc.rename(columns={data_acc.columns[0]:'X_acc', data_acc.columns[1]:'Y_acc', data_acc.columns[2]:'Z_acc',data_acc.columns[3]:'TimeStamp'} )
    
    return data_acc



#線形加速度を得る
def linear_acc(data):
    alpha = 0.9
    for  c  in  ['X_acc','Y_acc','Z_acc']:
        g = 0  
        col = data[c]           #'X'列から1列ずつ見る
        for r in range(len(data)):         #1行ずつ見る
            #Isolate the force of gravity with the low-pass filter.
            g = alpha*g + (1-alpha) * col[r]
            #Remove the gravity contribution with the high-pass filter.
            linear = col[r] - g
            data.iloc[r][c] = linear
        linear_acc = data
    return linear_acc

In [4]:
def one_acc1(data):
    xyz = pd.DataFrame(columns=["acc"])
    accs = []
    for i in range(len(data)):
        #ベクトルの大きさを求める
        acc = np.linalg.norm(data.loc[i])
        accs.append(acc)
    xyz["acc"]= accs
    #xyz["TimeStamp"] = data["TimeStamp"].tolist()
    return xyz

In [5]:
def one_acc2(data):
    xyz = pd.DataFrame(columns=["acc"])
    accs = []
    for i in range(len(data)):
        #ベクトルの大きさを求める
        acc = np.linalg.norm(data.loc[i][data.columns != "TimeStamp"])
        accs.append(acc)
    xyz["acc"]= accs
    #xyz["TimeStamp"] = data["TimeStamp"].tolist()
    return xyz

In [6]:
def features(f):
    x = np.empty([0,3])
    i = 0
    while i < len(f)-1:
        k = np.array(f.loc[i, f.columns != 'TimeStamp'])
        k = np.append(k,f["acc"][i+1])
        k = k.reshape(1,len(k))
        x =np.append(x,k,axis=0)
        i +=1
    return x

def my_index_multi(l, x):
    return [i for i, _x in enumerate(l) if _x == x]

In [7]:
def main_functuion(sampling_rate,marker,sensor):
    x = np.empty([0,16])
    y = np.empty([0,])
    random.seed(0)
    print("marker:",marker)
    print("sensor:",sensor)
    #Read all mocap data and acc data
    #subjects =  ["01","02","03","04","05","06","07","08","09","10","11","12"]
    subjects = random.sample( ["01","02","03","04","05","06","07","08","09","10","11","12"],6)
    actions = ["01","02","03","04","05","06","07","08","09","10","11"]
   
    for subject in subjects:
        for action in actions:
            for record in range(1,6):
            #for record in random.sample( [1,2,3,4,5],2):    
                try:
                    data = read_mocap('/Users/takeshin/odrive/sozolab_gdrive/students/Shingo/BerkeleyMHAD/Mocap/OpticalData/moc_s'+str(subject)+'_a'+str(action)+'_r0'+str(record)+'.txt')
                    print('moc_s'+str(subject)+'_a'+str(action)+'_r0'+str(record)+'.txt')
                    columns = ['m'+str(marker)+'x','m'+str(marker)+'y','m'+str(marker)+'z']
                    cols = np.array(columns)
                    cols = list(np.ravel(cols))
                    cols.append("TimeStamp")
                    data = data[cols]
                    data = fix_NaN(data)
                    data = one_acc2(data)
                    #print(data)
                    
                    #リアル加速度データの読み込み
                    r_a = read_acc('/Users/takeshin/odrive/sozolab_gdrive/students/Shingo/BerkeleyMHAD/Accelerometer/Shimmer0'+str(sensor)+'/acc_h0'+str(sensor)+'_s'+str(subject)+'_a'+str(action)+'_r0'+str(record)+'.txt')
                    print('acc_h0'+str(sensor)+'_s'+str(subject)+'_a'+str(action)+'_r0'+str(record)+'.txt')
                    r_a = fix_NaN(r_a)
                    r_a = linear_acc(r_a) 
                    r_a = one_acc2(r_a)
                     
                    #s = len(data)//len(r_a)
                    
                    
                    i = 0
                    mc_ds = np.empty([0,16])
                    #処理
                    #while i < len(r_a):
                        
                       # x = np.append(x,mc_ds,axis =0)
                        #y = np.append(y,r_a["acc"][i], axis=0)
                        
                            
                    while  i < (len(data)//(480//sampling_rate))*(480//sampling_rate):
                        #mc_ds = np.append(mc_ds, data.iloc[i : i+(480//sampling_rate),:], axis=0)
                        k = np.array(data.iloc[i : i+(480//sampling_rate),:])
                        k = k.reshape(1,len(k))
                        mc_ds =  np.append(mc_ds,k,axis=0)
                        # features=np.array(np.array(np.mean(data.iloc[i : i+(480//sampling_rate),:], axis=0)))
                        #features=np.append(features, np.array(np.std(data.iloc[i : i+(480//sampling_rate),:], axis=0)))
                        #features=np.append(features, np.array(np.var(data.iloc[i : i+(480//sampling_rate),:], axis=0))) 
                        #print(features)
                        #mc_ds = np.append(mc_ds, features.reshape(1,len(features)),axis =0)
                        #features.remove()
                        i+=(480//sampling_rate)
                    #if i < len(data):
                       # k = np.array(data.iloc[i : ,:])
                        #k = k.reshape(1,len(k))
                        #mc_ds =  np.append(x,k,axis=0)
                        #features=np.array(np.array(np.mean(data.iloc[i :,:], axis=0)))
                        #features=np.append(features, np.array(np.std(data.iloc[i : ,:], axis=0)))
                        #features=np.append(features, np.array(np.var(data.iloc[i : ,:], axis=0))) 
                        #mc_ds = np.append(mc_ds, features.reshape(1,len(features)),axis =0)
                    
                    #k = mc_ds.loc[:, mc_ds.columns != 'TimeStamp']
                    #print(mc_ds)
                    

                   
                    
                    
                    
                    #処理
                    if len(mc_ds) <= len(r_a):
                        x = np.append(x,mc_ds,axis =0)
                        y = np.append(y,r_a["acc"][0:len(mc_ds)], axis=0)
                        #y = np.append(y,r_a["acc"][0:len(k)], axis=0)
                    else:
                        #x = np.append(x,k[0:len(r_a)],axis =0)
                        # y = np.append(y,r_a["acc"], axis=0)
                        x = np.append(x,mc_ds[0:len(r_a)],axis =0)
                        y = np.append(y,r_a["acc"][:], axis=0)
                        
                    
                except FileNotFoundError:
                    pass
                    
    return x, y

In [8]:
x,y = main_functuion(30,30,3)   

marker: 30
sensor: 3
moc_s07_a01_r01.txt
acc_h03_s07_a01_r01.txt
moc_s07_a01_r02.txt
acc_h03_s07_a01_r02.txt
moc_s07_a01_r03.txt
acc_h03_s07_a01_r03.txt
moc_s07_a01_r04.txt
acc_h03_s07_a01_r04.txt
moc_s07_a01_r05.txt
acc_h03_s07_a01_r05.txt
moc_s07_a02_r01.txt
acc_h03_s07_a02_r01.txt
moc_s07_a02_r02.txt
acc_h03_s07_a02_r02.txt
moc_s07_a02_r03.txt
acc_h03_s07_a02_r03.txt
moc_s07_a02_r04.txt
acc_h03_s07_a02_r04.txt
moc_s07_a02_r05.txt
acc_h03_s07_a02_r05.txt
moc_s07_a03_r01.txt
acc_h03_s07_a03_r01.txt
moc_s07_a03_r02.txt
acc_h03_s07_a03_r02.txt
moc_s07_a03_r03.txt
acc_h03_s07_a03_r03.txt
moc_s07_a03_r04.txt
acc_h03_s07_a03_r04.txt
moc_s07_a03_r05.txt
acc_h03_s07_a03_r05.txt
moc_s07_a04_r01.txt
acc_h03_s07_a04_r01.txt
moc_s07_a04_r02.txt
acc_h03_s07_a04_r02.txt
moc_s07_a04_r03.txt
acc_h03_s07_a04_r03.txt
moc_s07_a04_r04.txt
acc_h03_s07_a04_r04.txt
moc_s07_a04_r05.txt
acc_h03_s07_a04_r05.txt
moc_s07_a05_r01.txt
acc_h03_s07_a05_r01.txt
moc_s07_a05_r02.txt
acc_h03_s07_a05_r02.txt
moc_s07_a05

moc_s05_a05_r02.txt
acc_h03_s05_a05_r02.txt
moc_s05_a05_r03.txt
acc_h03_s05_a05_r03.txt
moc_s05_a05_r04.txt
acc_h03_s05_a05_r04.txt
moc_s05_a05_r05.txt
acc_h03_s05_a05_r05.txt
moc_s05_a06_r01.txt
acc_h03_s05_a06_r01.txt
moc_s05_a06_r02.txt
acc_h03_s05_a06_r02.txt
moc_s05_a06_r03.txt
acc_h03_s05_a06_r03.txt
moc_s05_a06_r04.txt
acc_h03_s05_a06_r04.txt
moc_s05_a06_r05.txt
acc_h03_s05_a06_r05.txt
moc_s05_a07_r01.txt
acc_h03_s05_a07_r01.txt
moc_s05_a07_r02.txt
acc_h03_s05_a07_r02.txt
moc_s05_a07_r03.txt
acc_h03_s05_a07_r03.txt
moc_s05_a07_r04.txt
acc_h03_s05_a07_r04.txt
moc_s05_a07_r05.txt
acc_h03_s05_a07_r05.txt
moc_s05_a08_r01.txt
acc_h03_s05_a08_r01.txt
moc_s05_a08_r02.txt
acc_h03_s05_a08_r02.txt
moc_s05_a08_r03.txt
acc_h03_s05_a08_r03.txt
moc_s05_a08_r04.txt
acc_h03_s05_a08_r04.txt
moc_s05_a08_r05.txt
acc_h03_s05_a08_r05.txt
moc_s05_a09_r01.txt
acc_h03_s05_a09_r01.txt
moc_s05_a09_r02.txt
acc_h03_s05_a09_r02.txt
moc_s05_a09_r03.txt
acc_h03_s05_a09_r03.txt
moc_s05_a09_r04.txt
acc_h03_s05_

In [9]:
print(x.shape)
print(y.shape)

(69637, 16)
(69637,)


In [10]:
#変換モデル 回帰
#clf2 = RandomForestRegressor()
clf2 = ensemble.BaggingRegressor(tree.DecisionTreeRegressor(), n_estimators=100, max_samples=0.3)
#clf2 = LinearRegression()
clf2.fit(x, y)

BaggingRegressor(base_estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=0.3, n_estimators=100, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

In [11]:
def main_functuion2(sampling_rate,*markers):
    X = np.empty([0,6*len(markers)])
    y = np.empty([0,])
    random.seed(0)
    #Read all mocap data
    b = random.sample( ["01","02","03","04","05","06","07","08","09","10","11","12"],6)
    a =  ["01","02","03","04","05","06","07","08","09","10","11","12"]
    subjects = list(set(a) - set(b))
    #subjects =  ["01","02","03","04","05","06","07","08","09","10","11","12"]
    actions = ["01","02","03","04","05","06","07","08","09","10","11"]
    for subject in subjects:
        for action in actions:
            for record in range(1,6):
            #for record in range(4,6):    
                try:
                    data = read_mocap('/Users/takeshin/odrive/sozolab_gdrive/students/Shingo/BerkeleyMHAD/Mocap/OpticalData/moc_s'+str(subject)+'_a'+str(action)+'_r0'+str(record)+'.txt')
                    print('moc_s'+str(subject)+'_a'+str(action)+'_r0'+str(record)+'.txt')
                    columns = [['m'+str(i)+'x','m'+str(i)+'y','m'+str(i)+'z'] for i in markers]
                    cols = np.array(columns)
                    cols = list(np.ravel(cols))
                    cols.append("TimeStamp")
                    data = data[cols]
                    data = fix_NaN(data)
                    data = one_acc2(data)
                    
                    i = 0
                    mc_ds = np.empty([0,16])
                    while  i < (len(data)//(480//sampling_rate))*(480//sampling_rate):
                        #mc_ds = np.append(mc_ds, data.iloc[i : i+(480//sampling_rate),:], axis=0)
                        k = np.array(data.iloc[i : i+(480//sampling_rate),:])
                        k = k.reshape(1,len(k))
                        mc_ds =  np.append(mc_ds,k,axis=0)
                        i+=(480//sampling_rate)
                    
                    a_predict = clf2.predict(mc_ds).tolist()
                    #print(a_predict)
                    
                    sf = obtain_statistical_feature(a_predict)
                    X =np.append(X,sf, axis=0)
                    y = np.append(y,[action], axis=0)
                
                
                except FileNotFoundError:
                    pass
                    
    return X, y

In [None]:
x2,y2= main_functuion2(30,30)

moc_s03_a01_r01.txt
moc_s03_a01_r02.txt
moc_s03_a01_r03.txt
moc_s03_a01_r04.txt
moc_s03_a01_r05.txt
moc_s03_a02_r01.txt
moc_s03_a02_r02.txt
moc_s03_a02_r03.txt
moc_s03_a02_r04.txt
moc_s03_a02_r05.txt
moc_s03_a03_r01.txt
moc_s03_a03_r02.txt
moc_s03_a03_r03.txt
moc_s03_a03_r04.txt
moc_s03_a03_r05.txt
moc_s03_a04_r01.txt
moc_s03_a04_r02.txt
moc_s03_a04_r03.txt
moc_s03_a04_r04.txt
moc_s03_a04_r05.txt
moc_s03_a05_r01.txt
moc_s03_a05_r02.txt
moc_s03_a05_r03.txt
moc_s03_a05_r04.txt
moc_s03_a05_r05.txt
moc_s03_a06_r01.txt
moc_s03_a06_r02.txt
moc_s03_a06_r03.txt
moc_s03_a06_r04.txt
moc_s03_a06_r05.txt
moc_s03_a07_r01.txt
moc_s03_a07_r02.txt
moc_s03_a07_r03.txt
moc_s03_a07_r04.txt
moc_s03_a07_r05.txt
moc_s03_a08_r01.txt
moc_s03_a08_r02.txt
moc_s03_a08_r03.txt
moc_s03_a08_r04.txt
moc_s03_a08_r05.txt
moc_s03_a09_r01.txt
moc_s03_a09_r02.txt
moc_s03_a09_r03.txt
moc_s03_a09_r04.txt
moc_s03_a09_r05.txt
moc_s03_a10_r01.txt
moc_s03_a10_r02.txt
moc_s03_a10_r03.txt
moc_s03_a10_r04.txt
moc_s03_a10_r05.txt


In [None]:
def ml_ar(X,y):
    x_train, x_test, y_train, y_test = crv.train_test_split(X, y, test_size=0.30, random_state=42)
    clf = RandomForestClassifier(n_estimators=20, random_state=42)
    clf.fit(x_train, y_train)
    y_predict = clf.predict(x_test)
    conf = confusion_matrix(y_test, y_predict)
    print (accuracy_score(y_test, y_predict) ) 
    actions = ["01","02","03","04","05","06","07","08","09","10","11"]
    print(classification_report(y_test, y_predict, target_names=actions))
    print(conf)

In [None]:
#ml_ar(x2,y2)

In [None]:
def ml_ar2(X,y):                              
    #Try classifier
    estimators = []
    estimators.append(('standardize', StandardScaler()))
    #estimators.append(('clf', SVC(kernel='linear')))
    estimators.append(('clf', RandomForestClassifier(n_estimators=20, random_state=42)))
    model = Pipeline(estimators)
    
    seed = 11
    kf = KFold(n_splits=3, random_state=seed, shuffle=True)
    accuracy = [] 
    
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(X_train, y_train)
        acc_train = model.score(X_train, y_train)
        acc_test = model.score(X_test, y_test)
        accuracy.append(acc_test)
        #print('Accuracy on training set: {:.2f}'
        #  .format(model.score(X_train, y_train)))
        #print('Accuracy on test set: {:.2f}'
        #  .format(model.score(X_test, y_test)))
        y_pred = model.predict(X_test)
        conf = confusion_matrix(y_test, y_pred)
        print("*Accuracy* train:",acc_train, "test:",acc_test)
        actions = ["01","02","03","04","05","06","07","08","09","10","11"]
        print(classification_report(y_test, y_pred, target_names=actions))
        #print(conf)
        print("-----------")
    print("*test_ave*",sum(accuracy)/len(accuracy))

In [None]:
ml_ar2(x2,y2)