In [161]:
import os
import numpy as np
from sklearn.decomposition import PCA
import torch

def read_amc_file(file_path):
    print('Reading file %s'%file_path)
    num_sensors = 29
    walk_timeseries_data = []
    with open(file_path, 'r') as file:
        lines = file.read().splitlines() # Each line is a list of elements
        lines = lines[3:]
        p = 0 # index in lines
        while True: # Each line in [lines] is 'sensor_name sensor_data
            timestamp = int(lines[p])
            timestamp_data = []

            for i in range(num_sensors):
                sensor_line = lines[p+i+1].split(' ') # Read from p + i + 1 line (measurement of first sensor of timestamp p)
                next = False
                sensor_data = []
                k = 1
                while next == False:  # Reading sensor data
                    try:
                        s = float(sensor_line[k])
                        sensor_data.append(s)
                        k += 1
                    except:
                        next = True
                timestamp_data.append(sensor_data) 
                
            walk_timeseries_data.append(timestamp_data)
            p += num_sensors + 1
            if p >= len(lines):
                break
        return walk_timeseries_data

def column_pca(walk_timeseries_data, rowshape=False):
    pca = PCA(1)
    num_sensors = 29 # D
    walk_timeseries_data_pca = np.empty((len(walk_timeseries_data), num_sensors)) # T x D
    j = 0
    for sensor in range(num_sensors):
        sensor_data_over_time = np.array([walk_timeseries_data[i][sensor] for i in range(len(walk_timeseries_data))]) # T x M, M:[1..6]
        if sensor_data_over_time.shape[1] > 1: # M > 1
            sensor_data_over_time_pca = pca.fit_transform(sensor_data_over_time) # T x 1
        else:
            sensor_data_over_time_pca = sensor_data_over_time # M == 1
        walk_timeseries_data_pca[:, j:j+1] = sensor_data_over_time_pca
        j += 1
    if rowshape:
        walk_timeseries_data_pca = walk_timeseries_data_pca.reshape(walk_timeseries_data_pca.shape[0]*walk_timeseries_data_pca.shape[1]) # (1, T*D) shape
    return walk_timeseries_data_pca

def build_dataset_long(X):
    limit_time = 100 # T
    num_sensors = 29 # D
    nrows = limit_time * len(X)
    X_pca = np.empty((nrows, num_sensors)) # NT x D
    i = 0
    for walk_timeseries_data in X:
        # apply column-PCA separately to each file/time-series
        walk_timeseries_data_pca = column_pca(walk_timeseries_data, rowshape=False)[:limit_time,:] # T x D
        X_pca[i:i+limit_time, :] = walk_timeseries_data_pca
        i += limit_time 
    return X_pca

def build_dataset_wide_full(X, T):
    limit_time = T # T
    nrows = len(X)
    num_sensors = 29
    D = 62
    X_full = np.empty((nrows, limit_time*D)) # N x TD
    j = 0
    for walk_timeseries_data in X: # for each file/time-series
        file_measures = []
        for t in range(limit_time):
            file_measures_time_t = []
            for i in range(num_sensors):
                file_measures_time_t.append(walk_timeseries_data[t][i])
            file_measures_time_t = np.array([m for sublist in file_measures_time_t for m in sublist]) # D,
            file_measures.append(file_measures_time_t)
        X_full[j:j+1,:] = np.concatenate(file_measures, axis=0).reshape(1,-1)
        j += 1
    return X_full

def build_dataset_wide(X):
    limit_time = 100
    num_sensors = 29
    ncols = num_sensors * limit_time
    X_pca = np.empty((len(X), num_sensors * limit_time)) # N x TD
    i = 0
    for walk_timeseries_data in X:
        walk_timeseries_data_pca = column_pca(walk_timeseries_data, rowshape=True)[:ncols] # 1 x TD
        X_pca[i:i+1, :] = walk_timeseries_data_pca.reshape(1,-1)
        i += 1
    return X_pca

def build_dataset_tensor(X):
    limit_time = 100
    num_sensors = 29
    N = len(X)
    X_pca = np.empty((N, limit_time, num_sensors))  # N x T x D
    i = 0
    for walk_timeseries_data in X:
        walk_timeseries_data_pca = column_pca(walk_timeseries_data, rowshape=False)[:limit_time,:] # T x D
        X_pca[i:i+1, :, :] = walk_timeseries_data_pca
        i += 1
    return X_pca

In [168]:
# Read walk files
X = []
for filename in os.listdir('walk/'):
    if filename.endswith(".amc") and filename.startswith("07"):
        file_path = os.path.join('walk/', filename)
        walk_timeseries_data = read_amc_file(file_path)
        X.append(walk_timeseries_data)
        #walk_timeseries_data_pca = column_pca(walk_timeseries_data)
# NOTE: SUBJECT 07_12 THERE WAS NOT ltoes MEASUREMENT FOR THE LAST TIMESTAMP, I PUT IT EQUAL TO THE PREVIOUS TIMESTAMP


T = 100 # -> I fixed a limit on the number of timestaps i consider for each file 
N = len(X) # of files (samples)
D = 62
X_NxTD_walk = build_dataset_wide_full(X, T) # N x TD [D=62]
X_NTxD_walk = X_NxTD_walk.reshape(N*T,D) # TD x N [D=62]
#X_long_walk = build_dataset_long(X) # NT x D [D=29]
#X_wide_walk = build_dataset_wide(X) # N x TD [D=29]
#X_tensor_walk = build_dataset_tensor(X) # N x T x D [D=29]

Reading file walk/15_03.amc
Reading file walk/07_03.amc
Reading file walk/07_02.amc
Reading file walk/16_47.amc
Reading file walk/15_14.amc
Reading file walk/07_01.amc
Reading file walk/15_01.amc
Reading file walk/07_05.amc
Reading file walk/07_11.amc
Reading file walk/07_10.amc
Reading file walk/07_04.amc
Reading file walk/05_01.amc
Reading file walk/07_12.amc
Reading file walk/07_06.amc
Reading file walk/07_07.amc
Reading file walk/16_31.amc
Reading file walk/16_25.amc
Reading file walk/16_19.amc
Reading file walk/16_18.amc
Reading file walk/16_24.amc
Reading file walk/16_30.amc
Reading file walk/16_26.amc
Reading file walk/16_32.amc
Reading file walk/16_33.amc
Reading file walk/16_27.amc
Reading file walk/16_23.amc
Reading file walk/16_22.amc
Reading file walk/16_34.amc
Reading file walk/16_20.amc
Reading file walk/08_08.amc
Reading file walk/08_09.amc
Reading file walk/16_21.amc
Reading file walk/06_01.amc
Reading file walk/08_10.amc
Reading file walk/08_05.amc
Reading file walk/08

In [169]:
# Read run files
X = []
for filename in os.listdir('run/'):
    if filename.endswith(".amc"):
        file_path = os.path.join('run/', filename)
        walk_timeseries_data = read_amc_file(file_path)
        X.append(walk_timeseries_data)
N = len(X)
T = 100
D = 62
X_NxTD_run = build_dataset_wide_full(X, T) # N x TD [D=62]
X_NTxD_run = X_NxTD_run.reshape(N*T,D) # TD x N [D=62]
#X_long_run = build_dataset_long(X) # NT x D
#X_wide_run = build_dataset_wide(X) # N x TD
#X_wide_run_full = build_dataset_wide_full(X) # N x TD [D=62]
#X_tensor_run = build_dataset_tensor(X) # N x T x D

Reading file run/16_52.amc
Reading file run/16_46.amc
Reading file run/09_06.amc
Reading file run/09_07.amc
Reading file run/16_53.amc
Reading file run/16_45.amc
Reading file run/16_51.amc
Reading file run/09_11.amc
Reading file run/09_05.amc
Reading file run/09_04.amc
Reading file run/09_10.amc
Reading file run/16_50.amc
Reading file run/16_44.amc
Reading file run/16_40.amc
Reading file run/16_54.amc
Reading file run/09_01.amc
Reading file run/16_55.amc
Reading file run/16_41.amc
Reading file run/16_57.amc
Reading file run/16_43.amc
Reading file run/09_03.amc
Reading file run/09_02.amc
Reading file run/16_42.amc
Reading file run/16_56.amc
Reading file run/16_37.amc
Reading file run/35_17.amc
Reading file run/16_36.amc
Reading file run/16_08.amc
Reading file run/16_35.amc
Reading file run/16_38.amc
Reading file run/35_18.amc
Reading file run/35_24.amc
Reading file run/35_25.amc
Reading file run/35_19.amc
Reading file run/16_39.amc
Reading file run/35_26.amc
Reading file run/35_22.amc
R

In [170]:
# Merge data
Y_walk = np.zeros(X_NxTD_walk.shape[0])
Y_run = np.ones(X_NxTD_run.shape[0])
Y = np.concatenate((Y_walk, Y_run)).reshape(-1,1)
X_NxTD = np.vstack((X_NxTD_walk, X_NxTD_run))
X_NTxD = np.vstack((X_NTxD_walk, X_NTxD_run))
print('N.walk = ', X_NxTD_walk.shape[0])
print('N.run = ', X_NxTD_run.shape[0])

N.walk =  59
N.run =  46


In [172]:
# Create sensor labels
sensors_names = ['root','lowerback','upperback','thorax','lowerneck','upperneck','head','rclavicle','rhumerus','rradius','rwrist','rhand','rfingers','rthumb','lclavicle','lhumerus','lradius','lwrist','lhand','lfingers','lthumb','rfemur','rtibia','rfoot','rtoes','lfemur','ltibia','lfoot','ltoes']
s_mask = []
for sensor_j_measure in X[0][0]:
    s_mask.append(len(sensor_j_measure))
s_names_mask = []
for i, sensor_name in enumerate(sensors_names):
    s_names_mask.append([sensor_name] * s_mask[i])
s_names_mask_flat = []
for row in s_names_mask:
    s_names_mask_flat.extend(row)
sensors_names = np.array(s_names_mask_flat) 
sensors_mask = np.cumsum(np.array(s_mask))

In [175]:
np.savez('mocap_data.npz', X_NxTD=X_NxTD, X_NTxD=X_NTxD, Y=Y, sensors_names=sensors_names, sensors_mask=sensors_mask)