In [103]:
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import TensorDataset
from tqdm import tqdm

In [104]:
def load_dataset(data_path):
    source_columns = ["gx(rad/s)", "gy(rad/s)", "gz(rad/s)", "ax(m/s^2)", "ay(m/s^2)", "az(m/s^2)"]
    target_columns = ["px", "py", "pz"]
    
    source_sequences = []
    target_sequences = []
    
    for filename in os.listdir(data_path):
        if filename.endswith(".txt"):
            file_path = os.path.join(data_path, filename)
        
            # Read the file using pandas
            df = pd.read_csv(file_path, sep=" ")
            
            # Extract required columns
            source_data = df[source_columns]
            target_data = df[target_columns]
        
            # Create source sequences of size (100, 6)
            for i in range(0, len(source_data) - 50, 50):
                source_seq = source_data.iloc[i:i+100, :].values
                source_sequences.append(source_seq)
        
            # Create target sequences of size (100, 3)
            for i in range(0, len(target_data) - 50, 50):
                target_seq = target_data.iloc[i:i+100, :].values
                target_sequences.append(target_seq)
        
            # Replace the last source sequence with the last 100 entries of the file
            last_source_seq = source_data.iloc[-100:, :].values
            source_sequences[-1] = last_source_seq
    
            # Replace the last target sequence with the last 100 entries of the file
            last_target_seq = target_data.iloc[-100:, :].values
            target_sequences[-1] = last_target_seq
    
    # Subtract the first row from all rows in each target sequence
    target_sequences = [seq - seq[0] for seq in target_sequences]
    
    # Convert source sequences to torch tensor
    source_tensors = torch.stack([torch.from_numpy(seq) for seq in source_sequences])
    
    # Convert target sequences to torch tensor
    target_tensors = torch.stack([torch.from_numpy(seq) for seq in target_sequences])
    
    # Create TensorDatasets
    source_dataset = TensorDataset(source_tensors)
    target_dataset = TensorDataset(target_tensors)

    return source_dataset, target_dataset

In [105]:
data_path = './../dat/merged/'
source_dataset, target_dataset = load_dataset(data_path)

# Print the shape of the datasets
print("Source Dataset Shape:", source_dataset.tensors[0].shape)
print("Target Dataset Shape:", target_dataset.tensors[0].shape)

Source Dataset Shape: torch.Size([2088, 100, 6])
Target Dataset Shape: torch.Size([2088, 100, 3])


In [102]:
for i in tqdm(range(len(source_sequences))):
    np.savetxt(f'./../dat/sequences/s{i}.txt', source_sequences[i], delimiter=' ', comments='')
    np.savetxt(f'./../dat/sequences/t{i}.txt', target_sequences[i], delimiter=' ', comments='')

100%|█████████████████████████████████████████████████████████████████████████████| 2088/2088 [00:08<00:00, 247.64it/s]
