In [2]:
import pandas as pd

df = pd.read_csv('dataset/dataset.csv')
print(df.head())


      acc_x     acc_y     acc_z    gyro_x    gyro_y    gyro_z  label
0  0.042693 -1.784441  0.336893  0.110804 -0.074376  0.102657      0
1 -0.001908 -1.579855 -0.242369  0.052294 -0.038513  0.108854      0
2 -0.023960 -1.397642 -0.722993 -0.008995 -0.023952  0.115826      0
3 -0.056374 -1.257657 -1.130061 -0.031175 -0.010533  0.100920      0
4 -0.247819 -1.177400 -1.608836  0.002271  0.025864  0.061671      0


In [3]:
import numpy as np

sequence_length = 50 
features = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']

X = df[features].values 
y = df['label'].values


num_full_windows = X.shape[0] // sequence_length
trimmed_X = X[:num_full_windows * sequence_length]

X_reshaped = trimmed_X.reshape(num_full_windows, sequence_length, 6)
y_reshaped = y[sequence_length - 1::sequence_length] 

print(f"Reshaped X shape: {X_reshaped.shape}")
print(f"Reshaped y shape: {y_reshaped.shape}")




Reshaped X shape: (254, 50, 6)
Reshaped y shape: (254,)


In [None]:
X_reshaped[:, :, 0:3] *= 9.81  # Normalize acc_x, acc_y, acc_z to m/s2

print(f"Normalized accelerometer data (acc_x, acc_y, acc_z): {X_reshaped[:, :, 0:3].min(), X_reshaped[:, :, 0:3].max()}")


Normalized accelerometer data (acc_x, acc_y, acc_z): (np.float64(-395.0247963938985), np.float64(537.1252566475667))


In [5]:
from sklearn.model_selection import train_test_split
import numpy as np

X = X_reshaped # shape (samples, 50, 6)
y = y_reshaped # shape (samples,)

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

np.save("X_train.npy", X_train)
np.save("y_train.npy", y_train)
np.save("X_val.npy", X_val)
np.save("y_val.npy", y_val)
np.save("X_test.npy", X_test)
np.save("y_test.npy", y_test)


In [6]:
from collections import Counter
print("Train:", Counter(y_train))
print("Val:", Counter(y_val))
print("Test:", Counter(y_test))


Train: Counter({np.int64(1): 51, np.int64(3): 51, np.int64(2): 51, np.int64(0): 50})
Val: Counter({np.int64(0): 7, np.int64(3): 6, np.int64(2): 6, np.int64(1): 6})
Test: Counter({np.int64(2): 7, np.int64(3): 7, np.int64(0): 6, np.int64(1): 6})


In [7]:
import numpy as np
import random
from scipy.interpolate import interp1d

# Function to add random noise to the data
def add_noise(data, noise_factor=0.01):
    noise = np.random.normal(0, noise_factor, data.shape)
    return data + noise

def time_warp(data, target_length, sampling_rate=60, warp_factor=0.2):
    random_warp = random.uniform(1 - warp_factor, 1 + warp_factor)
    num_points = data.shape[0]
    new_num_points = int(num_points * random_warp)
    new_num_points = min(max(new_num_points, 1), target_length)

    time_indices = np.linspace(0, num_points - 1, num_points)
    new_time_indices = np.linspace(0, num_points - 1, new_num_points)
    
    interpolator = interp1d(time_indices, data, axis=0, kind='linear', fill_value="extrapolate")
    warped_data = interpolator(new_time_indices)

    if warped_data.shape[0] < target_length:
        pad_size = target_length - warped_data.shape[0]
        warped_data = np.pad(warped_data, ((0, pad_size), (0, 0)), mode='constant', constant_values=0)
    elif warped_data.shape[0] > target_length:
        warped_data = warped_data[:target_length]
    
    return warped_data

def jitter_data(data, jitter_factor=0.01):
    jitter = np.random.normal(0, jitter_factor, data.shape)
    return data + jitter

def augment_data(X, y, noise_factor=0.01, warp_factor=0.2, jitter_factor=0.01):
    augmented_X = []
    augmented_y = []
    
    target_length = X.shape[1]

    for i in range(len(X)):
        sample = X[i]
        label = y[i]
        
        sample_with_noise = add_noise(sample, noise_factor)
        warped_sample = time_warp(sample, target_length, warp_factor=warp_factor)
        jittered_sample = jitter_data(sample, jitter_factor)
        
        augmented_X.extend([sample_with_noise, warped_sample, jittered_sample])
        augmented_y.extend([label, label, label]) 
    
    return np.array(augmented_X), np.array(augmented_y)


augmented_X_train, augmented_y_train = augment_data(X_train, y_train)
augmented_X_val, augmented_y_val = augment_data(X_val, y_val) 

np.save('augmented_X_train.npy', augmented_X_train)
np.save('augmented_y_train.npy', augmented_y_train)
np.save('augmented_X_val.npy', augmented_X_val)
np.save('augmented_y_val.npy', augmented_y_val)



In [8]:
from collections import Counter
print("Validation label counts:", Counter(y_val))
print("Test label counts:", Counter(y_test))


Validation label counts: Counter({np.int64(0): 7, np.int64(3): 6, np.int64(2): 6, np.int64(1): 6})
Test label counts: Counter({np.int64(2): 7, np.int64(3): 7, np.int64(0): 6, np.int64(1): 6})
