In [None]:
"""
This script will fetch all csv files from the specified directory and preprocess them.   
The preprocessing stage will include:   
    - Remove unnecessary columns (e.g. timestamp, foot and trunk imu data)   
    - Prefilter the data to remove noise (e.g. Moving Average or Zero-phase)    
    - Normalizing the data    
    - Splitting the data into windows (by window size and overlap).   
    - Reformatting the labels to be sin and cos of the phase variable.   
    - Saving the preprocessed data into a new data npy file based on the inputs provided for the preprocessing stage    
"""

In [1]:
import os
import pandas as pd
import numpy as np
from scipy import signal
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader

In [None]:
DATA_PATH = r'C:\Users\Elad\vscode Projects\Technion\LBIS_project\dataset'

# access all subject folders
for subject in os.listdir(DATA_PATH):

    # access all csv files in the treadmill folder of the subject
    for file in os.listdir(os.path.join(DATA_PATH,  subject, 'treadmill')):
        if file.endswith('.csv'):
            df = pd.read_csv(os.path.join(DATA_PATH, subject, 'treadmill', file))
            print(df.head())


# # Get the IMU CSV file path.
#         imu_path = self.imu_files[idx]
#         # Derive the corresponding gcRight CSV file path by replacing 'imu' with 'gcRight'
#         gcRight_path = imu_path.replace(os.sep + 'imu' + os.sep, os.sep + 'gcRight' + os.sep)
        
#         # Load CSV files (skip the header row)
#         imu_data = self._load_csv_file(imu_path)
#         gcRight_data = self._load_csv_file(gcRight_path)
        
#         # Drop the timestamp column (first column)
#         imu_data = imu_data[:, 1:]
#         gcRight_data = gcRight_data[:, 1:]
        
#         # Select only shank and thigh channels from IMU data.
#         # CSV column order (after dropping timestamp) is:
#         # [foot_Accel_X, foot_Accel_Y, foot_Accel_Z,
#         #  foot_Gyro_X, foot_Gyro_Y, foot_Gyro_Z,
#         #  shank_Accel_X, shank_Accel_Y, shank_Accel_Z,
#         #  shank_Gyro_X, shank_Gyro_Y, shank_Gyro_Z,
#         #  thigh_Accel_X, thigh_Accel_Y, thigh_Accel_Z,
#         #  thigh_Gyro_X, thigh_Gyro_Y, thigh_Gyro_Z,
#         #  trunk_Accel_X, trunk_Accel_Y, trunk_Accel_Z,
#         #  trunk_Gyro_X, trunk_Gyro_Y, trunk_Gyro_Z]
#         # We keep shank (columns 6 to 11) and thigh (columns 12 to 17)
#         shank = imu_data[:, 6:12]
#         thigh = imu_data[:, 12:18]
#         imu_selected = np.concatenate([shank, thigh], axis=1)  # Shape: (N, 12)
        
#         # Synchronize lengths: truncate all signals to the minimum available length.
#         min_length = min(imu_selected.shape[0], gcRight_data.shape[0])
#         imu_selected = imu_selected[:min_length, :]
#         gcRight_data = gcRight_data[:min_length, :]
        
#         # Randomly extract a window of fixed length.
#         if min_length > self.sequence_length:
#             start_idx = random.randint(0, min_length - self.sequence_length)
#         else:
#             start_idx = 0  # Alternatively, pad shorter sequences.
#         end_idx = start_idx + self.sequence_length
#         imu_window = imu_selected[start_idx:end_idx, :]  # (sequence_length, 12)
        
#         # Use the HeelStrike value from gcRight at the center of the window.
#         center_idx = start_idx + self.sequence_length // 2
#         heel_strike = gcRight_data[center_idx, 0]  # HeelStrike value (0-100)
#         # Normalize to [0, 1]
#         heel_strike_norm = heel_strike / 100.0
#         target = np.array([heel_strike_norm], dtype=np.float32)
        
#         # Optionally apply a transform; otherwise, convert to torch tensors.
#         if self.transform:
#             imu_window = self.transform(imu_window)
#         else:
#             imu_window = torch.tensor(imu_window, dtype=torch.float32)
#         target = torch.tensor(target, dtype=torch.float32)
        
#         return imu_window, target

#     def _load_csv_file(self, file_path):
#         """Loads a CSV file using NumPy (skipping the header row)."""
#         data = np.loadtxt(file_path, delimiter=',', skiprows=1)
#         return data


   Header  HeelStrike  ToeOff
0  10.360         0.0     0.0
1  10.365         0.0     0.0
2  10.370         0.0     0.0
3  10.375         0.0     0.0
4  10.380         0.0     0.0
   Header  foot_Accel_X  foot_Accel_Y  foot_Accel_Z  foot_Gyro_X  foot_Gyro_Y  \
0  10.360     -0.530859      0.303578      0.788758    -0.007199     0.036176   
1  10.365     -0.528300      0.298713      0.788308    -0.006505     0.030790   
2  10.370     -0.528048      0.307608      0.785671    -0.007193     0.037746   
3  10.375     -0.527954      0.304533      0.787364    -0.010729     0.037391   
4  10.380     -0.532506      0.304603      0.785833    -0.010253     0.034913   

   foot_Gyro_Z  shank_Accel_X  shank_Accel_Y  shank_Accel_Z  ...  \
0    -0.018538      -0.985110       0.099353      -0.028464  ...   
1    -0.018505      -0.985064       0.095580      -0.029383  ...   
2    -0.023075      -0.990004       0.090721      -0.030897  ...   
3    -0.021650      -0.988742       0.084288      -0.030312  

In [None]:
# HARDCODED PARAMETERS
BASE_SAMPLING_RATE = 200    # Hz

# INPUT PARAMETERS
NORMALIZE_FLAG = True       # Normalize the data decision variable
WINDOW_SIZE = 2             # seconds
WINDOW_OVERLAP = 1          # seconds