In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../')

from src.DataLoader import DataLoader

In [2]:
accel_S3 = DataLoader().load_ACC_data("S3", "evening")

accel_S3.head()

Unnamed: 0,X_acceleration,Y_acceleration,Z_acceleration,UNIX_time
0,-58.0,-12.0,30.0,1675172000.0
1,-59.0,-12.0,29.0,1675172000.0
2,-58.0,-12.0,29.0,1675172000.0
3,-58.0,-12.0,30.0,1675172000.0
4,-59.0,-12.0,30.0,1675172000.0


In [18]:
accel_S4 = DataLoader().load_ACC_data("S4", "morning")

accel_S4.head()

Unnamed: 0,X_acceleration,Y_acceleration,Z_acceleration,UNIX_time
0,-51.0,39.0,9.0,1675152000.0
1,-46.0,28.0,24.0,1675152000.0
2,-45.0,23.0,32.0,1675152000.0
3,-44.0,26.0,42.0,1675152000.0
4,-35.0,24.0,49.0,1675152000.0


In [34]:
'''This script defines a function that computes FFT features
(e.g. mean, standard deviation, etc.) from accelerometer data captured by an
Inertial Measurement Unit (IMU). The input is a Pandas DataFrame whose first
three columns represent X, Y, and Z axis accelerations. The function splits the
data into windows and calculates statistical parameters for each one,
consolidating them into a cohesive DataFrame at the end.'''

import numpy as np
import pandas as pd
from scipy.stats import entropy, kurtosis, skew

def windows_FFT_features(data, window_size = 5):
    '''Compute the FFT features of accelerometer data over windows of time
        Args:
            data (pd.DataFrame): Accelerometer data captured by an
                Inertial Measurement Unit (IMU). The first columns of the
                DataFrame should be labelled "X_acceleration", "Y_acceleration",
                respectively "Z_acceleration"
            windows_size (int): The size of the windows into which the input
                DataFrame will be split; measured in seconds
        Returns:
            pd.DataFrame: DataFrame containing the statistical features.'''

    # Declare the input data sampling rate
    sampling_rate = 32 # [Hz]
    # Transform the window size from seconds into number of data entries
    window_size *= sampling_rate
    # Compute the number of windows into which the input data is devided
    num_windows = len(data) // window_size

    # Initialize an empty DataFrame to store computed features
    columns = [
                # X-axis acceleration mean over the window interval
               'mean_x',
               # Y-axis acceleration mean over the window interval
               'mean_y',
               # Z-axis acceleration mean over the window interval
               'mean_z',
               # X-axis acceleration standard deviation over the window interval
               'std_x',
               # Y-axis acceleration standard deviation over the window interval
               'std_y',
               # Z-axis acceleration standard deviation over the window interval
               'std_z',
               # X-axis acceleration variation over the window interval
                'var_x',
               # Y-axis acceleration variation over the window interval
                'var_y',
               # Z-axis acceleration variation over the window interval
                'var_z',
               # Energy (Euclidean norm) over the window interval
                'energy',
               # Entropy over the window interval
            #    'entropy',
               # X-axis acceleration skew over the window interval
                'skew_x',
               # Y-axis acceleration skew over the window interval
                'skew_y',
               # Z-axis acceleration skew over the window interval
                'skew_z',
               # X-axis acceleration kurtosis over the window interval
                'kurt_x',
               # Y-axis acceleration kurtosis over the window interval
                'kurt_y',
               # Z-axis acceleration kurtosis over the window interval
                'kurt_z',
               # X-axis acceleration range over the window interval
                'range_x',
               # Y-axis acceleration range over the window interval
                'range_y',
               # Z-axis acceleration range over the window interval
                'range_z',
               # X-axis acceleration bottom quarter percentile over the window
               # interval
                'p25_x',
               # X-axis acceleration half percentile over the window interval
                'p50_x',
               # X-axis acceleration top quarter percentile over the window
               # interval
                'p75_x',
               # Y-axis acceleration bottom quarter percentile over the window
               # interval
                'p25_y',
               # Y-axis acceleration half percentile over the window interval
                'p50_y',
               # Y-axis acceleration top quarter percentile over the window
               # interval
                'p75_y',
               # Z-axis acceleration bottom quarter percentile over the window
               # interval
                'p25_z',
               # Z-axis acceleration half percentile over the window interval
                'p50_z',
               # Z-axis acceleration top quarter percentile over the window
               # interval
                'p75_z',
            #    'approx_entropy',
            #    'sample_entropy',
            #    'perm_entropy'
               ]

    # Aceleration axes labels
    axis_labels = ["X", "Y", "Z"]

    # Add cross-correlation columns
    for i in range(len(axis_labels)):
        for j in range(i + 1, len(axis_labels)):
            columns.append(f"cross_corr_{axis_labels[i]}_{axis_labels[j]}")

    # Create an empty Pandas DataFrame to store the features into
    features_df = pd.DataFrame(columns=columns)

    # Loop over the windows
    for i in range(num_windows):
        # Slice the DataFrame according to the window size
        window_data = data.iloc[i * window_size : (i + 1) * window_size]
        # Transform the DataFrame slice into a Numpy array
        if isinstance(window_data, pd.DataFrame):
            window_data = window_data.to_numpy()

        # Compute features within the window
        features = {
            # Compute the X-axis acceleration mean over the window interval
            'mean_x': np.mean(window_data[:, 0]),
            # Compute the Y-axis acceleration mean over the window interval
            'mean_y': np.mean(window_data[:, 1]),
            # Compute the Z-axis acceleration mean over the window interval
            'mean_z': np.mean(window_data[:, 2]),
            # Compute the X-axis acceleration standard deviation over the window
            # interval
            'std_x': np.std(window_data[:, 0]),
            # Compute the Y-axis acceleration standard deviation over the window
            # interval
            'std_y': np.std(window_data[:, 1]),
            # Compute the Z-axis acceleration standard deviation over the window
            # interval
            'std_z': np.std(window_data[:, 2]),
            # Compute the X-axis acceleration variance over the window interval
             'var_x': np.var(window_data[:, 0]),
            # Compute the Y-axis acceleration variance over the window interval
             'var_y': np.var(window_data[:, 1]),
            # Compute the Z-axis acceleration variance over the window interval
             'var_z': np.var(window_data[:, 2]),
            # Compute the energy (euclidean norm) along over the window interval
             'energy': np.linalg.norm(window_data[:, :3], axis = None),
            # Compute entropy over the window interval
            # 'entropy': -np.sum(np.abs(window_data) *
            #                    np.log2(np.abs(window_data))),
            # Compute the X-axis acceleration skew over the window interval
             'skew_x': skew(window_data[:, 0]),
            # Compute the Y-axis acceleration skew over the window interval
             'skew_y': skew(window_data[:, 1]),
            # Compute the Z-axis acceleration skew over the window interval
             'skew_z': skew(window_data[:, 2]),
            # Compute the X-axis acceleration kurtosis over the window interval
             'kurt_x': kurtosis(window_data[:, 0]),
            # Compute the Y-axis acceleration kurtosis over the window interval
             'kurt_y': kurtosis(window_data[:, 1]),
            # Compute the Z-axis acceleration kurtosis over the window interval
             'kurt_z': kurtosis(window_data[:, 2]),
            # Compute the X-axis acceleration range over the window interval
             'range_x': np.max(window_data[:, 0]) - np.min(window_data[:, 0]),
            # Compute the Y-axis acceleration range over the window interval
             'range_y': np.max(window_data[:, 1]) - np.min(window_data[:, 1]),
            # Compute the Z-axis acceleration range over the window interval
             'range_z': np.max(window_data[:, 2]) - np.min(window_data[:, 2]),
            # Compute the X-axis acceleration bottom quarter percentile over the
            # window interval
             'p25_x': np.percentile(window_data[:, 0], 25),
            # Compute the X-axis acceleration half percentile over the window
            # interval
             'p50_x': np.percentile(window_data[:, 0], 50),
            # Compute the  X-axis acceleration top quarter percentile over the
            # window interval
             'p75_x': np.percentile(window_data[:, 0], 75),
            # Compute the Y-axis acceleration bottom quarter percentile over the
            # window interval
             'p25_y': np.percentile(window_data[:, 1], 25),
            # Compute the Y-axis acceleration half percentile over the window
            # interval
             'p50_y': np.percentile(window_data[:, 1], 50),
            # Compute the  Y-axis acceleration top quarter percentile over the
            # window interval
             'p75_y': np.percentile(window_data[:, 1], 75),
            # Compute the Z-axis acceleration bottom quarter percentile over the
            # window interval
             'p25_z': np.percentile(window_data[:, 2], 25),
            # Compute the Z-axis acceleration half percentile over the window
            # interval
             'p50_z': np.percentile(window_data[:, 2], 50),
            # Compute the  Z-axis acceleration top quarter percentile over the
            # window interval
             'p75_z': np.percentile(window_data[:, 2], 75),
            # 'approx_entropy': app_entropy(window_data.flatten()),
            # 'sample_entropy': sample_entropy(window_data.flatten()),
            # 'perm_entropy': perm_entropy(window_data.flatten())
        }

        # Compute cross-correlation
        for i in range(3):
            for j in range(i + 1, 3):
                features[f"cross_corr_{axis_labels[i]}_{axis_labels[j]}"] = np.corrcoef(
                    window_data[:, i],
                    window_data[:, j]
                    )[0, 1]

        # Make a Pandas DataFrame from the features computed for each window
        features = pd.DataFrame(features, index = [0])

        # Append features to the DataFrame
        features_df = pd.concat([features_df, features], ignore_index=True)

    return features_df


In [35]:
windows_statistical_features(accel_S3, 5)

  features_df = pd.concat([features_df, features], ignore_index=True)


Unnamed: 0,mean_x,mean_y,mean_z,std_x,std_y,std_z,var_x,var_y,var_z,energy,...,p75_x,p25_y,p50_y,p75_y,p25_z,p50_z,p75_z,cross_corr_X_Y,cross_corr_X_Z,cross_corr_Y_Z
0,-57.40625,-11.70625,32.06250,1.454720,0.803095,2.474084,2.116211,0.644961,6.121094,845.638221,...,-56.00,-12.00,-12.0,-11.00,30.00,33.0,34.00,-0.331183,0.887483,-0.128772
1,-50.74375,-12.04375,41.13750,1.946943,0.839247,2.417353,3.790586,0.704336,5.843594,841.200333,...,-49.00,-13.00,-12.0,-11.00,40.00,41.0,43.00,0.515593,0.910138,0.659155
2,-52.74375,-9.78750,38.67500,4.574722,2.414507,5.940907,20.928086,5.829844,35.294375,842.425664,...,-49.00,-11.00,-10.0,-9.00,32.00,41.0,43.00,0.282512,0.876473,0.513725
3,-52.60000,-8.08750,37.25000,9.933026,6.507099,12.074767,98.665000,42.342344,145.800000,849.144275,...,-47.00,-12.00,-11.0,-6.75,29.00,32.0,43.00,0.272259,0.525111,0.740446
4,-53.46250,-4.26250,39.39375,2.959914,1.776540,3.598779,8.761094,3.156094,12.951211,844.098928,...,-51.00,-4.00,-4.0,-3.00,37.00,37.0,42.00,-0.522289,0.925956,-0.453069
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,-53.80000,25.88750,16.48125,8.754142,17.466392,15.313789,76.635000,305.074844,234.512148,844.033767,...,-48.75,14.00,17.0,35.00,11.75,22.0,27.00,0.512031,0.075787,0.205804
356,-58.12500,20.73125,8.87500,10.778886,18.463045,10.423381,116.184375,340.884023,108.646875,844.064571,...,-52.00,8.00,14.0,23.75,1.75,6.0,13.00,0.610193,0.643705,0.751297
357,-55.91875,25.40625,11.65000,11.224400,17.162931,10.636729,125.987148,294.566211,113.140000,843.023131,...,-49.00,15.75,19.0,31.25,4.75,8.0,20.00,0.675818,0.660831,0.675464
358,-54.75625,30.98125,7.68125,11.371976,14.834281,12.613669,129.321836,220.055898,159.104648,850.935368,...,-48.00,23.00,31.5,41.00,-1.00,5.0,12.25,0.557171,0.460526,0.292335


In [33]:
import numpy as np
import pandas as pd
from scipy.stats import kurtosis, skew
#from entropy import app_entropy, sample_entropy, perm_entropy

# Load your preprocessed accelerometer data (replace with your actual data)
# X_preprocessed: Preprocessed features (standardized and normalized)
# Example: X_preprocessed = load_preprocessed_data()

def compute_features_in_windows(X, window_size):
    num_windows = len(X) // window_size

    # Initialize an empty DataFrame to store computed features
    columns = ['mean_x', 'mean_y', 'mean_z', 'std_x', 'std_y', 'std_z',
               'var_x', 'var_y', 'var_z', 'energy', 'entropy',
               'skew_x', 'skew_y', 'skew_z', 'kurt_x', 'kurt_y', 'kurt_z',
               'range_x', 'range_y', 'range_z',
               'p25_x', 'p50_x', 'p75_x', 'p25_y', 'p50_y', 'p75_y', 'p25_z', 'p50_z', 'p75_z',
               #'approx_entropy',
               #'sample_entropy',
               #'perm_entropy'
               ]

    # Aceleration axes labels
    axis_labels = ["X", "Y", "Z"]

    # Add cross-correlation columns
    for i in range(len(axis_labels)):
        for j in range(i + 1, len(axis_labels)):
            columns.append(f"cross_corr_{axis_labels[i]}_{axis_labels[j]}")

    # Create an empty Pandas DataFrame to store the features into
    features_df = pd.DataFrame(columns=columns)

    for i in range(num_windows):
        window_data = X.iloc[i * window_size : (i + 1) * window_size]
        if isinstance(window_data, pd.DataFrame):
            window_data = window_data.to_numpy()

        # Compute features within the window
        features = {
            'mean_x': np.mean(window_data[:, 0]),
            'mean_y': np.mean(window_data[:, 1]),
            'mean_z': np.mean(window_data[:, 2]),
            'std_x': np.std(window_data[:, 0]),
            'std_y': np.std(window_data[:, 1]),
            'std_z': np.std(window_data[:, 2]),
            'var_x': np.var(window_data[:, 0]),
            'var_y': np.var(window_data[:, 1]),
            'var_z': np.var(window_data[:, 2]),
            'energy': np.linalg.norm(window_data[:, :3], axis = 0),
            'entropy': entropy(window_data.T),  # Compute entropy along columns
            'skew_x': skew(window_data[:, 0]),
            'skew_y': skew(window_data[:, 1]),
            'skew_z': skew(window_data[:, 2]),
            'kurt_x': kurtosis(window_data[:, 0]),
            'kurt_y': kurtosis(window_data[:, 1]),
            'kurt_z': kurtosis(window_data[:, 2]),
            'range_x': np.max(window_data[:, 0]) - np.min(window_data[:, 0]),
            'range_y': np.max(window_data[:, 1]) - np.min(window_data[:, 1]),
            'range_z': np.max(window_data[:, 2]) - np.min(window_data[:, 2]),
            'p25_x': np.percentile(window_data[:, 0], 25),
            'p50_x': np.percentile(window_data[:, 0], 50),
            'p75_x': np.percentile(window_data[:, 0], 75),
            'p25_y': np.percentile(window_data[:, 1], 25),
            'p50_y': np.percentile(window_data[:, 1], 50),
            'p75_y': np.percentile(window_data[:, 1], 75),
            'p25_z': np.percentile(window_data[:, 2], 25),
            'p50_z': np.percentile(window_data[:, 2], 50),
            'p75_z': np.percentile(window_data[:, 2], 75),
            #'approx_entropy': app_entropy(window_data.flatten()),
            #'sample_entropy': sample_entropy(window_data.flatten()),
            #'perm_entropy': perm_entropy(window_data.flatten())
        }

        # Compute cross-correlation
        for i in range(3):
            for j in range(i + 1, 3):
                features[f'cross_corr_{i}{j}'] = np.corrcoef(window_data[:, i], window_data[:, j])[0, 1]

        # Make a Pandas DataFrame from the features computed for each window
        features = pd.DataFrame(features, index = [0])

        # Append features to the DataFrame
        features_df = pd.concat([features_df, features], ignore_index=True)

    return features_df


In [83]:
compute_features_in_windows(accel_S3, 5)

  'skew_y': skew(window_data[:, 1]),
  'kurt_y': kurtosis(window_data[:, 1]),
  c /= stddev[:, None]
  c /= stddev[None, :]


ValueError: Length of values (3) does not match length of index (1)

In [29]:
def split_windows(X, window_size):
    num_windows = len(X) // window_size

    # Initialize an empty DataFrame to store computed features
    columns = ['mean_x', 'mean_y', 'mean_z', 'std_x', 'std_y', 'std_z',
               'var_x', 'var_y', 'var_z', 'energy', 'entropy',
               'skew_x', 'skew_y', 'skew_z', 'kurt_x', 'kurt_y', 'kurt_z',
               'range_x', 'range_y', 'range_z',
               'p25_x', 'p50_x', 'p75_x', 'p25_y', 'p50_y', 'p75_y', 'p25_z', 'p50_z', 'p75_z',
               'approx_entropy', 'sample_entropy', 'perm_entropy']

    # Add cross-correlation columns
    for i in range(3):
        for j in range(i + 1, 3):
            columns.append(f'cross_corr_{i}{j}')

    features_df = pd.DataFrame(columns=columns)

    for i in range(num_windows):
        window_data = X[i * window_size : (i + 1) * window_size]

    return window_data

In [88]:
import numpy as np
import pandas as pd
from scipy.stats import kurtosis, skew
#from entropy import app_entropy, sample_entropy, perm_entropy

# Load your preprocessed accelerometer data (replace with your actual data)
# X_preprocessed: Preprocessed features (standardized and normalized)
# Example: X_preprocessed = load_preprocessed_data()

def compute_means_in_windows(X, window_size):
    num_windows = len(X) // window_size

    # Initialize an empty DataFrame to store computed features
    columns = ['mean_x', 'mean_y', 'mean_z']

    # Create an empty Pandas DataFrame to store the features into
    features_df = pd.DataFrame(columns=columns)

    for i in range(num_windows):
        print(features_df)
        window_data = X.iloc[i * window_size : (i + 1) * window_size]

        # Compute features within the window
        features = {
            'mean_x': np.mean(window_data.iloc[:, 0]),
            'mean_y': np.mean(window_data.iloc[:, 1]),
            'mean_z': np.mean(window_data.iloc[:, 2])}


        # Make a Pandas DataFrame from the features computed for each window
        features = pd.DataFrame(features, index = [0])

        print(features)

        # Append features to the DataFrame
        features_df = pd.concat([features_df, features], ignore_index=True)

    return features_df

In [89]:
compute_means_in_windows(accel_S3 , 5)

Empty DataFrame
Columns: [mean_x, mean_y, mean_z]
Index: []
   mean_x  mean_y  mean_z
0   -58.4   -12.0    29.6
   mean_x  mean_y  mean_z
0   -58.4   -12.0    29.6
   mean_x  mean_y  mean_z
0   -58.8   -12.2    29.4
   mean_x  mean_y  mean_z
0   -58.4   -12.0    29.6
1   -58.8   -12.2    29.4
   mean_x  mean_y  mean_z
0   -58.8   -12.0    30.0
   mean_x  mean_y  mean_z
0   -58.4   -12.0    29.6
1   -58.8   -12.2    29.4
2   -58.8   -12.0    30.0
   mean_x  mean_y  mean_z
0   -58.8   -11.4    29.6
   mean_x  mean_y  mean_z
0   -58.4   -12.0    29.6
1   -58.8   -12.2    29.4
2   -58.8   -12.0    30.0
3   -58.8   -11.4    29.6
   mean_x  mean_y  mean_z
0   -59.0   -11.2    29.4
   mean_x  mean_y  mean_z
0   -58.4   -12.0    29.6
1   -58.8   -12.2    29.4
2   -58.8   -12.0    30.0
3   -58.8   -11.4    29.6
4   -59.0   -11.2    29.4
   mean_x  mean_y  mean_z
0   -58.8   -11.6    29.4
   mean_x  mean_y  mean_z
0   -58.4   -12.0    29.6
1   -58.8   -12.2    29.4
2   -58.8   -12.0    30.0
3   

  features_df = pd.concat([features_df, features], ignore_index=True)


   mean_x  mean_y  mean_z
0   -49.2   -11.2    43.6
    mean_x  mean_y  mean_z
0    -58.4   -12.0    29.6
1    -58.8   -12.2    29.4
2    -58.8   -12.0    30.0
3    -58.8   -11.4    29.6
4    -59.0   -11.2    29.4
..     ...     ...     ...
56   -49.4   -11.0    43.4
57   -49.0   -11.2    43.6
58   -49.0   -12.0    42.4
59   -49.0   -11.0    43.2
60   -49.2   -11.2    43.6

[61 rows x 3 columns]
   mean_x  mean_y  mean_z
0   -48.6   -11.8    43.2
    mean_x  mean_y  mean_z
0    -58.4   -12.0    29.6
1    -58.8   -12.2    29.4
2    -58.8   -12.0    30.0
3    -58.8   -11.4    29.6
4    -59.0   -11.2    29.4
..     ...     ...     ...
57   -49.0   -11.2    43.6
58   -49.0   -12.0    42.4
59   -49.0   -11.0    43.2
60   -49.2   -11.2    43.6
61   -48.6   -11.8    43.2

[62 rows x 3 columns]
   mean_x  mean_y  mean_z
0   -50.4   -12.4    41.2
    mean_x  mean_y  mean_z
0    -58.4   -12.0    29.6
1    -58.8   -12.2    29.4
2    -58.8   -12.0    30.0
3    -58.8   -11.4    29.6
4    -59.0   -1

Unnamed: 0,mean_x,mean_y,mean_z
0,-58.4,-12.0,29.6
1,-58.8,-12.2,29.4
2,-58.8,-12.0,30.0
3,-58.8,-11.4,29.6
4,-59.0,-11.2,29.4
...,...,...,...
11515,-6.8,23.2,-48.2
11516,-5.4,29.0,-56.6
11517,-5.2,29.4,-57.8
11518,-5.0,35.6,-59.4


In [90]:
accel_S3.shape

(57600, 4)