In [1]:
import pandas as pd
import numpy as np
import pickle
import scipy

In [2]:
def create_windows_np(data, window_size, stride):
    num_samples, num_channels = data.shape
    num_windows = (num_samples - window_size) // stride + 1

    shape = (num_windows, window_size, num_channels)
    strides = (data.strides[0] * stride, data.strides[0], data.strides[1])

    windows = np.lib.stride_tricks.as_strided(data, shape=shape, strides=strides)

    return windows

In [5]:
def noise_transform_vectorized(X, sigma=0.05):
    """
    Adding random Gaussian noise with mean 0
    """
    noise = np.random.normal(loc=0, scale=sigma, size=X.shape)
    return X + noise

def scaling_transform_vectorized(X, sigma=0.1):
    """
    Scaling by a random factor
    """
    scaling_factor = np.random.normal(loc=1.0, scale=sigma, size=(X.shape[0], 1, X.shape[2]))
    return X * scaling_factor

def negate_transform_vectorized(X):
    """
    Inverting the signals
    """
    return X * -1

def time_flip_transform_vectorized(X):
    """
    Reversing the direction of time
    """
    return X[:, ::-1, :]

In [6]:
# Define the list of transformations to be applied
transformations = [
    lambda x: noise_transform_vectorized(x), 
    lambda x: scaling_transform_vectorized(x),
    lambda x: negate_transform_vectorized(x),
    lambda x: time_flip_transform_vectorized(x),
    ]
transformations_names = ["noise", "scale", "negate", "time_flip"]

In [7]:
path = "../capture24"

user_dataset = {}

for i in range(10):
    df = pd.read_csv(f'{path}/P{i+1:03d}.csv.gz', compression='gzip', low_memory=False)
    # assuming `df` is the pandas dataframe
    cols = ['x', 'y', 'z']
    data = df[cols].values.astype(np.float32)

    user_id = f'P{i+1:03d}'
    user_data = create_windows_np(data, 200, 100)

    # Get the number of windows and window size for the user's data
    num_windows, window_size, num_channels = user_data.shape

    # Apply the transformations to the user's data
    transformed_data = np.concatenate([transform_fn(user_data) for transform_fn in transformations], axis=0)
    transformed_data = np.concatenate([transformed_data, user_data], axis=0)

    # Create the labels for the transformed data
    transformed_labels = np.array(
        ["Noise" if transformation == "noise" else 
         "Scaling" if transformation == "scale" else
         "Negation" if transformation == "negate" else 
         "Time Flip" for transformation in transformations_names])
    transformed_labels = np.append(transformed_labels, "Original")
    transformed_labels = np.repeat(transformed_labels, num_windows)

    transformed_user_data = {
        'X': transformed_data,
        'y': transformed_labels
    }
    file_path = f'{path}/{user_id}.obj'
    with open(file_path, 'wb') as file:
        pickle.dump(transformed_user_data, file)

    # print(user_id, num_windows, transformed_data.shape, transformed_labels.shape)
    print(user_id, num_windows, transformed_data.shape)
    del transformed_user_data
    del transformed_data
    del transformed_labels


P001 100199 (500995, 200, 3)
P002 88199 (440995, 200, 3)
P003 97199 (485995, 200, 3)
P004 77916 (389580, 200, 3)
P005 98999 (494995, 200, 3)
P006 114299 (571495, 200, 3)
P007 100799 (503995, 200, 3)
P008 95854 (479270, 200, 3)
P009 73077 (365385, 200, 3)
P010 91799 (458995, 200, 3)


In [8]:
p001file = open(f'/media/darshana/Software/dataset/P001.obj', 'rb')
p001Dataset = pickle.load(p001file)

In [10]:
print(p001Dataset)
del p001Dataset

{'X': array([[[-0.55305357, -0.51706906,  0.69314428],
        [-0.50472098, -0.56538143,  0.65664381],
        [-0.44077426, -0.53410961,  0.59784726],
        ...,
        [-0.47601569, -0.48092514,  0.68703596],
        [-0.46727538, -0.53516123,  0.74507716],
        [-0.43284447, -0.50125229,  0.6164534 ]],

       [[-0.47427553, -0.53181389,  0.56354433],
        [-0.43548332, -0.6421605 ,  0.69263543],
        [-0.45257494, -0.47719815,  0.6296225 ],
        ...,
        [-0.49897984, -0.51259347,  0.61318453],
        [-0.42932175, -0.53151222,  0.64826092],
        [-0.47932165, -0.53827612,  0.61613918]],

       [[-0.43956137, -0.50406777,  0.6121599 ],
        [-0.44457909, -0.50826508,  0.64317895],
        [-0.43738584, -0.51463917,  0.68245852],
        ...,
        [-0.42746603, -0.51584196,  0.71400161],
        [-0.48124537, -0.57680277,  0.71200579],
        [-0.42053982, -0.44312963,  0.61433704]],

       ...,

       [[ 0.0494156 , -0.79784578,  0.56569976],
     

In [None]:
df = pd.read_csv(f'{path}/P{i+1:03d}.csv.gz', compression='gzip', low_memory=False)