In [244]:
import torch
import os
import json
import pandas as pd
import numpy as np
import pathlib

In [245]:
LABELS_DIR = f'../processed/3_final'

TENSOR_DIR = f'../tensors'

HERTZ = 100
WINDOW_LENGTH = 3 * HERTZ

ACTIVITY_NAME_TO_CLASS_INDEX_MAPPING = {
    'water':0,
    'listerine':1,
}

In [246]:
def to_final(start_idx, end_idx, label, X):
    samples = np.empty((0, (WINDOW_LENGTH*3) + 1), dtype=X.dtype)  # Initialize samples with the correct shape and data type

    for i in range(round(start_idx+0.6), round(end_idx-WINDOW_LENGTH-0.6)):
        sample = X[i:i+WINDOW_LENGTH].T.flatten()
        sample = np.append(sample, label)
        samples = np.vstack((samples, sample))  # Add the sample as a new row

    #print(f'{X.shape} shape: {samples.shape} \n')
    return samples

In [247]:
def json_to_final(labels, acc):
    arr = np.empty((0, (WINDOW_LENGTH*3) +1))

    for hand in labels:
        for action in labels[hand]:
            for bout in labels[hand][action]:
                index = acc[(acc.timestamp > bout['start']) & (acc.timestamp < bout['end'])].index
                if len(index) > 0:
                    start_idx = index[0]
                    end_idx = index[-1]
                    X = acc[['x','y','z']].values
                    rows = to_final(start_idx, end_idx, label=ACTIVITY_NAME_TO_CLASS_INDEX_MAPPING[action], X=X)
                    if rows.size != 0:
                        arr = np.vstack((arr, rows))


    return arr[:, :-1], arr[:, -1]


In [248]:
def json_to_tensor(json_file_path, acc_file_path):
    with open(json_file_path, 'r') as f:
        j_labels = json.load(f)

    acc = pd.read_csv(acc_file_path,skiprows=1)
    acc.timestamp = (acc.timestamp - acc.timestamp[0]) * 1e-9

    X, y = json_to_final(j_labels, acc)
    X_tenor = torch.tensor(X)
    y_tensor = torch.tensor(y)

    return X_tenor, y_tensor




In [249]:
def save(X, y, participant_id, session_id):

    # Define the base directory
    base_dir = pathlib.Path(TENSOR_DIR)

    # Create the participant and session directories if they don't exist
    participant_dir = base_dir / str(participant_id)
    participant_dir.mkdir(parents=True, exist_ok=True)
    session_dir = participant_dir / str(session_id)
    session_dir.mkdir(exist_ok=True)

    # Convert X and y to PyTorch tensors if they aren't already
    if not isinstance(X, torch.Tensor):
        ValueError('X is not a tensor')
    if not isinstance(y, torch.Tensor):
        ValueError('y is not a tensor')

    # Save X and y as separate PyTorch tensors
    X_path = session_dir / 'X.pt'
    y_path = session_dir / 'y.pt'
    torch.save(X, X_path)
    torch.save(y, y_path)
    

In [250]:
#open the labels from the processed file assumming you are in utils eg( ../processed/3_final/00/2024-01-06_14_29_18/labels.json )
for participant in sorted(os.listdir(LABELS_DIR)):
    if participant != '.DS_Store':
        sesions = []
        recordings = LABELS_DIR+f'/{participant}'
        for i,recording in enumerate(sorted(os.listdir(recordings))):
            if recording != '.DS_Store':
                label_path = recordings+f'/{recording}/labels.json'
                acc_path = recordings+f'/{recording}/acceleration.csv'

                #turn each one into a tensor
                X, y = json_to_tensor(label_path, acc_path)
                
                #save each one the tensor dir
                save(X, y, participant_id=participant, session_id=i)


In [251]:

acc = pd.read_csv('../processed/3_final/00/2024-01-06_14_29_18/acceleration.csv',skiprows=1)

In [252]:
acc.timestamp = (acc.timestamp - acc.timestamp[0]) * 1e-9
acc

Unnamed: 0,timestamp,x,y,z
0,0.000000,-0.040715,4.313430,8.866363
1,0.028790,0.098196,3.985313,8.938213
2,0.038399,0.002395,4.129014,8.945398
3,0.047911,-0.045505,4.224814,8.950189
4,0.057494,0.007185,4.210445,8.909473
...,...,...,...,...
75749,723.970684,-1.896856,4.224814,8.600515
75750,723.980232,-1.872905,4.239185,8.600515
75751,723.989789,-1.834585,4.251160,8.674761
75752,723.999346,-1.762734,4.239185,8.667576


In [253]:
X = acc[['x','y','z']].values


In [254]:
X[0:15].T

array([[-4.0715333e-02,  9.8195806e-02,  2.3950196e-03, -4.5505375e-02,
         7.1850587e-03, -2.3950196e-03,  0.0000000e+00,  3.8320314e-02,
         4.0715333e-02,  9.5800790e-03,  2.1555176e-02,  3.1135255e-02,
         1.6765138e-02,  3.5925295e-02,  3.5925295e-02],
       [ 4.3134303e+00,  3.9853127e+00,  4.1290140e+00,  4.2248144e+00,
         4.2104445e+00,  4.2104445e+00,  4.2822950e+00,  4.3182206e+00,
         4.2870850e+00,  4.2104445e+00,  4.1697290e+00,  4.2367897e+00,
         4.2822950e+00,  4.2727150e+00,  4.2775050e+00],
       [ 8.8663630e+00,  8.9382130e+00,  8.9453980e+00,  8.9501890e+00,
         8.9094730e+00,  8.8615720e+00,  8.8208570e+00,  8.8208570e+00,
         8.8783380e+00,  8.9166580e+00,  8.8951025e+00,  8.8927080e+00,
         8.8687570e+00,  8.8208570e+00,  8.8759430e+00]])

In [255]:
len(X.T.flatten())

227262