In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from sklearn.model_selection import train_test_split
from torch import nn
import torch.nn.functional as F
from scipy.stats import linregress
from pathlib import Path

In [2]:
class IMUDataset(Dataset):
    def __init__(self, df, winsize=250, stride=50, transform=None):
        self.X = torch.from_numpy(df[['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 'gyr_z']].values)
        if transform is not None:
            self.X = transform(self.X)
        self.X = self.X.transpose(0, 1).to(torch.float32)
        self.winsize = winsize
        self.stride = stride
        self.len = (self.X.shape[1] - winsize) // stride + 1
    def __len__(self):
        return self.len
    def __getitem__(self, i):
        start = i * self.stride
        end = start + self.winsize
        return self.X[:,start:end]


In [None]:
HZ = 100
dfs = []
for file in Path('data/elshafei_data_cleaned').iterdir():
    df = pd.read_csv(file)
session_ids = df['session_id'].unique()
train_ids, val_ids = train_test_split(session_ids, test_size=0.2, random_state=42)

winsize_t = 5 # seconds
stride_t = 0.01 # seconds
winsize = int(winsize_t * HZ)
stride = int(stride_t * HZ)
print(winsize, stride)

train = df.loc[df['session_id'].isin(train_ids), ['acc_x', 'acc_y', 'acc_z', 'gyr_x', 'gyr_y', 'gyr_z']].values
norm = torch.from_numpy(train.mean(axis=0)), torch.from_numpy(train.std(axis=0))

def transform(x):
    return (x - norm[0]) / norm[1]

train_dataset = ConcatDataset([IMUDataset(df[df['session_id'] == session_id], winsize, stride, transform) for session_id in train_ids])
val_dataset = ConcatDataset([IMUDataset(df[df['session_id'] == session_id], winsize, stride, transform) for session_id in val_ids])

len(train_dataset), len(val_dataset)