In [203]:
import os
import numpy as np
from sklearn.decomposition import PCA
import torch

def read_amc_file(file_path):
    print('Reading file %s'%file_path)
    num_sensors = 29
    walk_timeseries_data = []
    with open(file_path, 'r') as file:
        lines = file.read().splitlines() # Each line is a list of elements
        lines = lines[3:]
        p = 0 # index in lines
        while True: # Each line in [lines] is 'sensor_name sensor_data
            timestamp = int(lines[p])
            timestamp_data = []

            for i in range(num_sensors):
                sensor_line = lines[p+i+1].split(' ') # Read from p + i + 1 line (measurement of first sensor of timestamp p)
                next = False
                sensor_data = []
                k = 1
                while next == False:  # Reading sensor data
                    try:
                        s = float(sensor_line[k])
                        sensor_data.append(s)
                        k += 1
                    except:
                        next = True
                timestamp_data.append(sensor_data) 
                
            walk_timeseries_data.append(timestamp_data)
            p += num_sensors + 1
            if p >= len(lines):
                break
        return walk_timeseries_data

def column_pca(walk_timeseries_data, rowshape=False):
    pca = PCA(1)
    num_sensors = 29
    walk_timeseries_data_pca = np.empty((len(walk_timeseries_data), num_sensors))
    i = 0
    for sensor in range(num_sensors):
        sensor_data_over_time = np.array([walk_timeseries_data[i][sensor] for i in range(len(walk_timeseries_data))])
        if sensor_data_over_time.shape[1] > 1:
            sensor_data_over_time_pca = pca.fit_transform(sensor_data_over_time)
        else:
            sensor_data_over_time_pca = sensor_data_over_time
        walk_timeseries_data_pca[:, i:i+1] = sensor_data_over_time_pca
        i += 1
    if rowshape:
        walk_timeseries_data_pca = walk_timeseries_data_pca.reshape(walk_timeseries_data_pca.shape[0]*walk_timeseries_data_pca.shape[1]) # (1, T*D) shape
    return walk_timeseries_data_pca

def build_dataset_long(X):
    limit_time = 100
    num_sensors = 29
    nrows = limit_time * len(X)
    X_pca = np.empty((nrows, num_sensors))
    i = 0
    for walk_timeseries_data in X:
        walk_timeseries_data_pca = column_pca(walk_timeseries_data)[:limit_time,:]
        X_pca[i:i+limit_time, :] = walk_timeseries_data_pca
        i += limit_time 
    return X_pca

def build_dataset_wide(X):
    limit_time = 100
    num_sensors = 29
    ncols = num_sensors * limit_time
    X_pca = np.empty((len(X), num_sensors * limit_time))
    i = 0
    for walk_timeseries_data in X:
        walk_timeseries_data_pca = column_pca(walk_timeseries_data, rowshape=True)[:ncols]
        X_pca[i:i+1, :] = walk_timeseries_data_pca.reshape(1,-1)
        i += 1
    return X_pca

In [210]:
# Read walk files
X = []
for filename in os.listdir('walk/'):
    if filename.endswith(".amc"):
        file_path = os.path.join('walk/', filename)
        walk_timeseries_data = read_amc_file(file_path)
        X.append(walk_timeseries_data)
        #walk_timeseries_data_pca = column_pca(walk_timeseries_data)
# NOTE: SUBJECT 07_12 THERE WAS NOT ltoes MEASUREMENT FOR THE LAST TIMESTAMP, I PUT IT EQUAL TO THE PREVIOUS TIMESTAMP

# N = 100 -> I fixed a limit on the number of timestaps i consider for each file 
X_long_walk = build_dataset_long(X) # NT x D
X_wide_walk = build_dataset_wide(X) # N x TD

Reading file walk/07_03.amc
Reading file walk/07_02.amc
Reading file walk/07_01.amc
Reading file walk/07_05.amc


Reading file walk/07_11.amc
Reading file walk/07_10.amc
Reading file walk/07_04.amc
Reading file walk/07_12.amc
Reading file walk/07_06.amc
Reading file walk/07_07.amc
Reading file walk/08_08.amc
Reading file walk/08_09.amc
Reading file walk/08_10.amc
Reading file walk/08_05.amc
Reading file walk/08_11.amc
Reading file walk/08_06.amc
Reading file walk/08_02.amc
Reading file walk/08_03.amc
Reading file walk/08_01.amc
Reading file walk/07_09.amc
Reading file walk/07_08.amc


In [211]:
# Read run files
X = []
for filename in os.listdir('run/'):
    if filename.endswith(".amc"):
        file_path = os.path.join('run/', filename)
        walk_timeseries_data = read_amc_file(file_path)
        X.append(walk_timeseries_data)

# N = 100 -> I fixed a limit on the number of timestaps i consider for each file 
X_long_run = build_dataset_long(X) # NT x D
X_wide_run = build_dataset_wide(X) # N x TD

Reading file run/16_46.amc
Reading file run/09_06.amc
Reading file run/09_07.amc
Reading file run/16_45.amc
Reading file run/09_11.amc
Reading file run/09_05.amc
Reading file run/09_04.amc
Reading file run/09_10.amc
Reading file run/09_01.amc
Reading file run/16_55.amc
Reading file run/16_57.amc
Reading file run/09_03.amc
Reading file run/09_02.amc
Reading file run/16_56.amc
Reading file run/16_36.amc
Reading file run/16_35.amc
Reading file run/09_09.amc
Reading file run/09_08.amc


In [212]:
# Merge data
Y_walk = np.zeros(X_wide_walk.shape[0])
Y_run = np.ones(X_wide_run.shape[0])
Y_walk_run = np.concatenate((Y_walk, Y_run))
X_wide_walk_run = np.vstack((X_wide_walk, X_wide_run))

In [213]:
# Normalize and save data
X_wide_walk_run = (X_wide_walk_run - np.mean(X_wide_walk_run, axis=0)) / np.std(X_wide_walk_run, axis=0)
Y = Y_walk_run.reshape(-1,1)
inputs = torch.tensor(X_wide_walk_run, dtype=torch.float64)
targets = torch.tensor(Y, dtype=torch.float64)
torch.save([inputs, targets], './' + 'mocap' + '.pth')