In [1]:
import os
import glob
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
col_names = ["accr_x", "accr_y", "accr_z", "gyr_x", "gyr_y", "gyr_z"]

In [3]:
data_root = "/Users/purushothamanyadav/Documents/NCSU/Spring23/NN/Project/ProjC/terrain-identification/data/TrainingData/"
test_root = "/Users/purushothamanyadav/Documents/NCSU/Spring23/NN/Project/ProjC/terrain-identification/data/TestData/"
X_TIME = "subject_{}_{}__x_time.csv"
X_DATA = "subject_{}_{}__x.csv"

Y_TIME = "subject_{}_{}__y_time.csv"
Y_DATA = "subject_{}_{}__y.csv"

In [4]:
# Define sequence parameters
label_sampling_rate = 0.1
data_sampling_rate = 0.025

sequence_length = 6
overlap = 2

In [5]:
files_x_data = sorted(glob.glob(data_root + X_DATA.format("*", "*")))
files_x_time = sorted(glob.glob(data_root + X_TIME.format("*", "*")))

files_y_data = sorted(glob.glob(data_root + Y_DATA.format("*", "*")))
files_y_time = sorted(glob.glob(data_root + Y_TIME.format("*", "*")))

In [6]:
out_dir = "/home/gmvincen/class_work/ece_542/terrain-identification/data/WindowedTraining"
for i in tqdm(files_x_data, total=len(files_x_data), desc="Files"):
    name = i.split("/")[-1].rstrip(".csv")
    
    x_file = i
    x_time = i.rstrip(".csv")+"_time.csv"
    
    y_file = i.rstrip("x.csv")+"y.csv"
    y_time = i.rstrip("x_time.csv")+"__y_time.csv"
    
    time_df = pd.read_csv(x_time, names=['time'])
    data_df = pd.read_csv(x_file, names=col_names)
    label_time_df = pd.read_csv(y_time, names=['time'])
    label_df = pd.read_csv(y_file, names=['label'])
    
    # Merge data and time dataframes
    x_df = pd.concat([time_df, data_df], axis=1)
    y_df = pd.concat([label_time_df, label_df], axis=1)
    
    # Create Sequences
    sequences = pd.DataFrame(columns=col_names+['timestamp', 'time'])
    for i in tqdm(range(0, len(x_df) - overlap, sequence_length - overlap), desc="Creating Sequences"):
        window = x_df.iloc[i:i+sequence_length, 1:]
        
        if window.shape[0] != sequence_length:
            window = pd.concat([window, pd.DataFrame(np.nan, index=[0 for i in range(0, sequence_length-window.shape[0])], columns=window.columns)], ignore_index=True)
            window = window.interpolate(method='linear')
            
        seq_df = pd.DataFrame(window, columns=col_names+['timestamp', 'time'])
        timestamp = int(i/(sequence_length - overlap))
        seq_df['timestamp'] = timestamp
        seq_df['time'] = y_df.loc[timestamp, 'time']
        
        sequences = pd.concat([sequences,seq_df], axis=0, ignore_index=True)
        
    sequences['timestamp'] = sequences['timestamp'].astype(int)
    
    sequences.to_csv(os.path.join(out_dir, name+".csv"), index=False)
    y_df.to_csv(os.path.join(out_dir, name.rstrip("x")+"y.csv"), index=False)

Files:   0%|          | 0/29 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/9472 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/17543 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/10786 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/13739 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/14780 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/9658 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/7856 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/16690 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/14000 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/13850 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/12199 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/12865 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/10580 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/9078 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/11802 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/4883 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/8577 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/8619 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/14020 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/8630 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/11226 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/12031 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/14890 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/11282 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/11550 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/11747 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/11160 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/9860 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/12034 [00:00<?, ?it/s]

In [7]:
files_x_data = sorted(glob.glob(test_root + X_DATA.format("*", "*")))
files_x_time = sorted(glob.glob(test_root + X_TIME.format("*", "*")))

files_y_time = sorted(glob.glob(test_root + Y_TIME.format("*", "*")))

In [8]:
out_dir = "/home/gmvincen/class_work/ece_542/terrain-identification/data/WindowedTest"
for i in tqdm(files_x_data, total=len(files_x_data), desc="Files"):
    name = i.split("/")[-1].rstrip(".csv")
    
    x_file = i
    x_time = i.rstrip(".csv")+"_time.csv"
    
    y_time = i.rstrip("x_time.csv")+"__y_time.csv"
    
    time_df = pd.read_csv(x_time, names=['time'])
    data_df = pd.read_csv(x_file, names=col_names)
    label_time_df = pd.read_csv(y_time, names=['time'])
    
    # Merge data and time dataframes
    x_df = pd.concat([time_df, data_df], axis=1)
    y_df = label_time_df
    
    # Create Sequences
    sequences = pd.DataFrame(columns=col_names+['timestamp', 'time'])
    for i in tqdm(range(0, len(x_df) - overlap, sequence_length - overlap), desc="Creating Sequences"):
        window = x_df.iloc[i:i+sequence_length, 1:]
        
        if window.shape[0] != sequence_length:
            window = pd.concat([window, pd.DataFrame(np.nan, index=[0 for i in range(0, sequence_length-window.shape[0])], columns=window.columns)], ignore_index=True)
            window = window.interpolate(method='linear')
            
        seq_df = pd.DataFrame(window, columns=col_names+['timestamp', 'time'])
        timestamp = int(i/(sequence_length - overlap))
        seq_df['timestamp'] = timestamp
        seq_df['time'] = y_df.loc[timestamp, 'time']
        
        sequences = pd.concat([sequences,seq_df], axis=0, ignore_index=True)
        
    sequences['timestamp'] = sequences['timestamp'].astype(int)
    
    sequences.to_csv(os.path.join(out_dir, name+".csv"), index=False)
    y_df.to_csv(os.path.join(out_dir, name.rstrip("x")+"y.csv"), index=False)

Files:   0%|          | 0/4 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/9498 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/12270 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/12940 [00:00<?, ?it/s]

Creating Sequences:   0%|          | 0/11330 [00:00<?, ?it/s]