In [1]:
import pandas as pd
import os
from dotenv import load_dotenv

load_dotenv()

data_path = os.getenv('RAW_DATA_PATH')

categories = {'idle': 'idle',
              'running': 'run',
              'stairs': 'stair',
              'walking': 'walk'}

In [2]:
import re

def natural_sort_key(s):
    return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]

def get_col_names():
    col_names = []

    for i in range(1, 31):
        for l in ['x', 'y', 'z']:
            col_names.append(f'{l}_{i}')
            
    return col_names

In [3]:
dataframes = []

for category, label in categories.items():
    category_path = os.path.join(data_path, category)
    
    for sample_num, file in enumerate(sorted(os.listdir(category_path), key=natural_sort_key)):
        
        file_path = os.path.join(category_path, file)
        raw_df = pd.read_csv(file_path)
        temp_df = pd.DataFrame([raw_df.values.flatten()], columns=get_col_names())
        temp_df['action'] = label
        
        dataframes.append(temp_df)

df = pd.concat(dataframes, ignore_index=True)

In [4]:
from scipy.stats import entropy, skew, kurtosis
import numpy as np

for l in ['x', 'y', 'z']:
    sensor_columns = [f'{l}_{i}' for i in range(1, 31)]

    df[f'{l}_mean'] = df[sensor_columns].mean(axis=1)
    df[f'{l}_variance'] = df[sensor_columns].var(axis=1)
    df[f'{l}_std'] = df[sensor_columns].std(axis=1)
    df[f'{l}_median'] = df[sensor_columns].median(axis=1)
    df[f'{l}_range'] = df[sensor_columns].max(axis=1) - df[sensor_columns].min(axis=1)
    df[f'{l}_max_value'] = df[sensor_columns].max(axis=1)
    df[f'{l}_min_value'] = df[sensor_columns].min(axis=1)
    df[f'{l}_rms'] = np.sqrt((df[sensor_columns] ** 2).mean(axis=1))
    df[f'{l}_signal_magnitude_area'] = np.abs(df[sensor_columns]).sum(axis=1)
    
    max_value_indices = df[sensor_columns].idxmax(axis=1)
    df[f'{l}_index_max_value'] = max_value_indices.apply(lambda x: int(x.split('_')[1]))
    min_value_indices = df[sensor_columns].idxmin(axis=1)
    df[f'{l}_index_min_value'] = min_value_indices.apply(lambda x: int(x.split('_')[1]))

    df[f'{l}_power'] = (df[sensor_columns] ** 2).mean(axis=1)
    df[f'{l}_energy'] = (df[sensor_columns] ** 2).sum(axis=1)
    df[f'{l}_entropy'] = df[sensor_columns].apply(lambda x: entropy(x + np.abs(x.min()) + 1), axis=1)
    df[f'{l}_skewness'] = df[sensor_columns].skew(axis=1)
    df[f'{l}_kurtosis'] = df[sensor_columns].apply(kurtosis, axis=1)
    df[f'{l}_interquartile_range'] = df[sensor_columns].quantile(0.75, axis=1) - df[sensor_columns].quantile(0.25, axis=1)
    df[f'{l}_mean_absolute_deviation'] = df[sensor_columns].subtract(df[sensor_columns].mean(axis=1), axis=0).abs().mean(axis=1)

In [6]:
def calculate_row_cross_correlation(row, sensor1, sensor2):
    sensor1_values = row[[f'{sensor1}_{i}' for i in range(1, 31)]].values
    sensor2_values = row[[f'{sensor2}_{i}' for i in range(1, 31)]].values
    return pd.Series(sensor1_values).corr(pd.Series(sensor2_values))

for pair in [('x', 'y'), ('y', 'z'), ('z', 'x')]:
    sensor1, sensor2 = pair
    df[f'cross_corr_{sensor1}_{sensor2}'] = df.apply(lambda row: calculate_row_cross_correlation(row, sensor1, sensor2), axis=1)

In [13]:
df['label'] = pd.factorize(df['action'])[0]
df.to_csv('complete_ds_2.csv', index=False)