In [1]:
import numpy as np
from scipy import stats as s
from scipy import constants

In [2]:
import os
root_folder = 'Optional/'
root_folder_files = os.listdir('Optional/')
print("Loading Optional")
for root_folder_file in root_folder_files:
    datContent = [i.strip().split() for i in open(root_folder + root_folder_file).readlines()]
    datContent = np.array(datContent)
    print(np.unique(datContent[:,1]))

files_optional = [root_folder + root_folder_file  for root_folder_file in root_folder_files] 

print("Loading Protocol")
root_folder = 'Protocol/'
root_folder_files = os.listdir('Protocol/')

for root_folder_file in root_folder_files:
    datContent = [i.strip().split() for i in open(root_folder + root_folder_file).readlines()]
    datContent = np.array(datContent)
    print(np.unique(datContent[:,1]))

files_protocol= [root_folder + root_folder_file  for root_folder_file in root_folder_files] 

Loading Optional
['0' '11' '18' '19' '9']
['0' '10' '19']
['0' '10' '18' '19']
['0' '10' '18' '19' '20']
['0' '10' '18' '19' '20']
Loading Protocol
['0' '1' '12' '13' '16' '17' '2' '24' '3' '4' '5' '6' '7']
['0' '1' '12' '13' '16' '17' '2' '24' '3' '4' '5' '6' '7']
['0' '1' '12' '13' '16' '17' '2' '3' '4']
['0' '1' '12' '13' '16' '17' '2' '3' '4' '5' '6' '7']
['0' '1' '12' '13' '16' '17' '2' '24' '3' '4' '5' '6' '7']
['0' '1' '12' '13' '16' '17' '2' '24' '3' '4' '5' '6' '7']
['0' '1' '12' '13' '16' '17' '2' '3' '4' '5' '6' '7']
['0' '1' '12' '13' '16' '17' '2' '24' '3' '4' '5' '6' '7']
['0' '24']


In [3]:
all_files = files_protocol + files_optional  
all_files

['Protocol/subject101.dat',
 'Protocol/subject102.dat',
 'Protocol/subject103.dat',
 'Protocol/subject104.dat',
 'Protocol/subject105.dat',
 'Protocol/subject106.dat',
 'Protocol/subject107.dat',
 'Protocol/subject108.dat',
 'Protocol/subject109.dat',
 'Optional/subject101.dat',
 'Optional/subject105.dat',
 'Optional/subject106.dat',
 'Optional/subject108.dat',
 'Optional/subject109.dat']

In [4]:
# Protocol Classes : ['0' '1' '12' '13' '16' '17' '2' '24' '3' '4' '5' '6' '7', '24']
# Optional Classes : ['0' '10' '18' '19' '20', '11', '9']

In [5]:
WINDOW_SEC = 2
WINDOW_OVERLAP_SEC = 0.5
DEVICE_HZ = 100
WINDOW_LEN = WINDOW_SEC* DEVICE_HZ
WINDOW_OVERLAP_LEN = int(DEVICE_HZ * WINDOW_OVERLAP_SEC)
WINDOW_STEP_LEN = WINDOW_LEN - WINDOW_OVERLAP_LEN

X = []
Y = []
pid = []

label_idx = 1
timestamp_idx = 0  # idx 2-> Heart rate idx 3 ->IMU Hand Temperature 
x_idx_1, y_idx_1, z_idx_1 = 4, 5, 6
x_idx_2, y_idx_2, z_idx_2 = 21, 22, 23
x_idx_3, y_idx_3, z_idx_3 = 38, 39, 40


for data_file in all_files[0:]:
    datContent = [i.strip().split() for i in open(data_file).readlines()]
    datContent = np.array(datContent)
    

    datContent = datContent[:,[timestamp_idx, label_idx, x_idx_1, y_idx_1, z_idx_1, x_idx_2, y_idx_2, z_idx_2, x_idx_3, y_idx_3, z_idx_3]]
    datContent = datContent.astype(float)

    datContent = datContent[~np.isnan(datContent).any(axis=1)]
    

    person_id = data_file.split('/')[1].strip().split('.dat')[0].strip().split('subject')[1]
    

    for i in range(0, len(datContent),WINDOW_STEP_LEN ):
        if(datContent[i: i+ WINDOW_LEN, [2, 3,4, 5,6,  7, 8,9, 10 ]].shape[0] == WINDOW_LEN):
            X.append(datContent[i: i+ WINDOW_LEN, [2, 3,4, 5,6,  7, 8,9, 10 ]])  #x_idx_2, y_idx_2, z_idx_2, x_idx_3, y_idx_3, z_idx_3
            Y.append(datContent[i: i+ WINDOW_LEN, 1])
            pid.append(person_id)
    

X = np.array(X)
Y = np.array(Y)
pid = np.array(pid)

In [6]:
def clean_up_label(X, labels, pid):
    # 1. remove rows with >50% zeros
    
    sample_count_per_row = labels.shape[1]  # number of windows

    rows2keep = np.ones(labels.shape[0], dtype=bool)
    transition_class = 0
    for i in range(labels.shape[0]):
        row = labels[i, :]
        if np.sum(row == transition_class) > 0.5 * sample_count_per_row:
            rows2keep[i] = False

    labels = labels[rows2keep]
    X = X[rows2keep]
    pid = pid[rows2keep]

    
    # 2. majority voting for label in each epoch
    final_labels = []
    for i in range(labels.shape[0]):
        row = labels[i, :]
        final_labels.append(s.mode(row)[0])
    final_labels = np.array(final_labels, dtype=int)

    final_label_filter = (final_labels!=0)

    X = X[final_label_filter]
    
    final_labels = final_labels[final_label_filter]
    pid = pid[final_label_filter]

    # print("Clean X shape: ", X.shape)
    # print("Clean y shape: ", final_labels.shape)
    return X, final_labels, pid

current_X, current_y, current_pid = clean_up_label(X, Y, pid)

current_y = current_y.flatten()
current_X = current_X / constants.g

# For standardizing.

# current_X = np.array(current_X, dtype=np.float32)
# m = np.mean(current_X, axis=0)
# current_X -= m
# std = np.std(current_X, axis=0)
# std += 0.000001
# current_X /= (std * 2)  # 2 is for having smaller values



# clip_value = 3
# current_X = np.clip(current_X, -clip_value, clip_value)

In [7]:
print(current_X.shape, current_y.shape, current_pid.shape)

(18004, 200, 9) (18004,) (18004,)


In [8]:
type(current_X[88])
# current_X[88].shape

numpy.ndarray

In [9]:
import collections
counter = collections.Counter(current_y)
counter

Counter({10: 2063,
         17: 1586,
         4: 1531,
         1: 1281,
         3: 1260,
         19: 1243,
         7: 1232,
         2: 1229,
         16: 1167,
         6: 1089,
         12: 782,
         13: 697,
         18: 664,
         5: 636,
         9: 557,
         11: 363,
         24: 318,
         20: 306})

In [10]:
np.save('new_X.npy', current_X)
np.save('new_Y.npy', current_y)
np.save('new_pid.npy', current_pid)

In [11]:
np.unique(current_y)

array([ 1,  2,  3,  4,  5,  6,  7,  9, 10, 11, 12, 13, 16, 17, 18, 19, 20,
       24])