In [1]:
import numpy as np
import sys
sys.path.append('../')  # Adjust the path as necessary
import dataprocessing

import pandas as pd
from scipy.stats import kurtosis
from scipy.stats import skew
from collections import Counter



filepath = '../datasets/pos_A.csv'
filepath2 = '../datasets/Fredrik_funny_walk.csv'
categories = ['walking', 'standing', 'grazing', 'eating']
windows_ac = dataprocessing.import_and_downsample(filepath2, False, 'HP', None, True)  #
all_time_domain_signals = dataprocessing.process_window(windows_ac, None, False)
features_df = pd.DataFrame()



In [2]:
def get_majority_label(labels):
    label_counts = Counter(labels)
    majority_label = label_counts.most_common(1)[0][0]  # Get the most common label
    return majority_label

In [3]:
#RAW ACCELEROMETER DATA

# Initialize lists for all features
mean_acc_x, mean_acc_y, mean_acc_z = [], [], []
std_acc_x, std_acc_y, std_acc_z = [], [], []
max_acc_x, max_acc_y, max_acc_z = [], [], []
min_acc_x, min_acc_y, min_acc_z = [], [], []
Q5_x, Q5_y, Q5_z = [], [], []
Q95_x, Q95_y, Q95_z = [], [], []
average_intensity = []
label = []


# Process each window
for window in windows_ac:
    # Calculate means
    mean_acc_x.append(window['ax'].mean())
    mean_acc_y.append(window['ay'].mean())
    mean_acc_z.append(window['az'].mean())

    # Calculate standard deviations
    std_acc_x.append(window['ax'].std())
    std_acc_y.append(window['ay'].std())
    std_acc_z.append(window['az'].std())

    # Calculate maximums
    max_acc_x.append(window['ax'].max())
    max_acc_y.append(window['ay'].max())
    max_acc_z.append(window['az'].max())

    # Calculate minimums
    min_acc_x.append(window['ax'].min())
    min_acc_y.append(window['ay'].min())
    min_acc_z.append(window['az'].min())

    # Calculate 5th percentiles
    Q5_x.append(np.percentile(window['ax'], 5))
    Q5_y.append(np.percentile(window['ay'], 5))
    Q5_z.append(np.percentile(window['az'], 5))

    # Calculate 95th percentilesxw
    Q95_x.append(np.percentile(window['ax'], 95))
    Q95_y.append(np.percentile(window['ay'], 95))
    Q95_z.append(np.percentile(window['az'], 95))

    # Calculate average intensity
    amag = np.sqrt(window['ax']**2 + window['ay']**2 + window['az']**2)
    average_intensity.append(np.mean(amag))
    label.append(get_majority_label(window['label'].tolist()))
    
    
# Create a DataFrame from the features
features_df = pd.DataFrame({
    'mean_acc_x': mean_acc_x,
    'mean_acc_y': mean_acc_y,
    'mean_acc_z': mean_acc_z,
    'std_acc_x': std_acc_x,
    'std_acc_y': std_acc_y,
    'std_acc_z': std_acc_z,
    'max_acc_x': max_acc_x,
    'max_acc_y': max_acc_y,
    'max_acc_z': max_acc_z,
    'min_acc_x': min_acc_x,
    'min_acc_y': min_acc_y,
    'min_acc_z': min_acc_z,
    'Q5_x': Q5_x,
    'Q5_y': Q5_y,
    'Q5_z': Q5_z,
    'Q95_x': Q95_x,
    'Q95_y': Q95_y,
    'Q95_z': Q95_z,
    'average_intensity': average_intensity,
    'label': label
})

# Set the index name if needed
features_df.index.name = 'window_id'


KeyError: 'label'

In [7]:
print(features_df.head)

<bound method NDFrame.head of            mean_acc_x  mean_acc_y  mean_acc_z  std_acc_x  std_acc_y  \
window_id                                                             
0           -4.868100    8.187096   -3.573674   2.371603   2.150729   
1           -4.916078    8.199861   -3.460064   2.066944   1.923925   
2           -4.049131    8.661184   -3.410227   2.242223   2.357574   
3           -4.012356    8.888872   -3.522131   3.580163   3.324470   
4           -4.155064    8.694152   -3.251390   2.190101   2.050183   
...               ...         ...         ...        ...        ...   
3579        -5.123497    2.679978   -8.006552   1.801830   1.354730   
3580        -4.531545    4.545127   -6.743695   2.955829   2.663418   
3581        -5.952587    6.972496   -3.888684   1.498942   1.540738   
3582        -6.025417    6.680076   -4.449515   1.161416   1.029659   
3583        -6.250892    6.194609   -4.768592   1.192983   0.977037   

           std_acc_z  max_acc_x  max_acc_y  ma

In [8]:
#AC-COMPONENTS (TIME DOMAIN)

mean_ac_x, mean_ac_y, mean_ac_z = [], [], []
std_ac_x, std_ac_y, std_ac_z = [], [], []
max_ac_x, max_ac_y, max_ac_z = [], [], []
Q5_acx, Q5_acy, Q5_acz = [], [], []
Q95_acx, Q95_acy, Q95_acz = [], [], []
kurt_acx, kurt_acy, kurt_acz = [], [], []
skew_acx, skew_acy, skew_acz = [], [], []

# Iterate over each window and calculate features
for window in windows_ac:
    mean_ac_x.append(window['ac_ax'].mean())
    mean_ac_y.append(window['ac_ay'].mean())
    mean_ac_z.append(window['ac_az'].mean())
    
    std_ac_x.append(window['ac_ax'].std())
    std_ac_y.append(window['ac_ay'].std())
    std_ac_z.append(window['ac_az'].std())
    
    max_ac_x.append(window['ac_ax'].max())
    max_ac_y.append(window['ac_ay'].max())
    max_ac_z.append(window['ac_az'].max())

    Q5_acx.append(np.percentile(window['ac_ax'], 5))
    Q5_acy.append(np.percentile(window['ac_ay'], 5))
    Q5_acz.append(np.percentile(window['ac_az'], 5))

    Q95_acx.append(np.percentile(window['ac_ax'], 95))
    Q95_acy.append(np.percentile(window['ac_ay'], 95))
    Q95_acz.append(np.percentile(window['ac_az'], 95))
    
    kurt_acx.append(kurtosis(window['ac_ax'], fisher=False))
    kurt_acy.append(kurtosis(window['ac_ay'], fisher=False))
    kurt_acz.append(kurtosis(window['ac_az'], fisher=False))
    
    skew_acx.append(skew(window['ac_ax']))
    skew_acy.append(skew(window['ac_ay']))
    skew_acz.append(skew(window['ac_az']))

# Assuming you have a DataFrame called features_df to store the features
features_df['mean_ac_x'] = mean_ac_x
features_df['mean_ac_y'] = mean_ac_y
features_df['mean_ac_z'] = mean_ac_z

features_df['std_ac_x'] = std_ac_x
features_df['std_ac_y'] = std_ac_y
features_df['std_ac_z'] = std_ac_z

features_df['max_ac_x'] = max_ac_x
features_df['max_ac_y'] = max_ac_y
features_df['max_ac_z'] = max_ac_z

features_df['Q5_ac_x'] = Q5_acx
features_df['Q5_ac_y'] = Q5_acy
features_df['Q5_ac_z'] = Q5_acz

features_df['Q95_ac_x'] = Q95_acx
features_df['Q95_ac_y'] = Q95_acy
features_df['Q95_ac_z'] = Q95_acz

features_df['kurt_acx'] = kurt_acx
features_df['kurt_acy'] = kurt_acy
features_df['kurt_acz'] = kurt_acz

features_df['skew_acx'] = skew_acx
features_df['skew_acy'] = skew_acy
features_df['skew_acz'] = skew_acz

In [11]:
# Initialize a dictionary to hold the features for each frequency band and axis
# as well as for each window
features_dict = {}

# Iterate over each axis, window ID, and frequency band
for axis, windows in all_time_domain_signals.items():
    for windowID, bands in windows.items():
        for frequency_band, signal in bands.items():
            magnitudes = np.abs(signal)
            
            # Calculate the features
            rms_value = np.sqrt(np.mean(np.square(magnitudes)))
            std_value = np.std(magnitudes)
            min_val = np.min(magnitudes)
            max_val = np.max(magnitudes)
            
            # Generate feature names
            rms_feature_name = f'rms_{axis}_band{frequency_band}'
            std_feature_name = f'std_{axis}_band{frequency_band}'
            min_feature_name = f'min_{axis}_band{frequency_band}'
            max_feature_name = f'max_{axis}_band{frequency_band}'

            # Initialize nested dictionaries if not already present
            if windowID not in features_dict:
                features_dict[windowID] = {}
            
            # Store the features in the dictionary
            features_dict[windowID][rms_feature_name] = rms_value
            features_dict[windowID][std_feature_name] = std_value
            features_dict[windowID][min_feature_name] = min_val
            features_dict[windowID][max_feature_name] = max_val

# Convert the dictionary of features into a DataFrame where each key is a window ID
# and each value is another dictionary of features for that window
spectral_df = pd.DataFrame.from_dict(features_dict, orient='index')

# If your original DataFrame is indexed by 'window_id' and you want to merge
# the features into this DataFrame, you can do so as follows:
# Your existing DataFrame with window_id as index
features_df = features_df.join(spectral_df)

# Now combined_df will have one row per window, with all the original features
# plus the new RMS, STD, MIN, MAX features for each axis and frequency band
del all_time_domain_signals


In [12]:
features_df.head()

Unnamed: 0_level_0,mean_acc_x,mean_acc_y,mean_acc_z,std_acc_x,std_acc_y,std_acc_z,max_acc_x,max_acc_y,max_acc_z,min_acc_x,...,min_az_band3,max_az_band3,rms_az_band4,std_az_band4,min_az_band4,max_az_band4,rms_az_band5,std_az_band5,min_az_band5,max_az_band5
window_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-4.8681,8.187096,-3.573674,2.371603,2.150729,1.59097,2.18517,15.3848,-0.057505,-16.6691,...,0.000295,0.443718,0.15414,0.092312,0.002852,0.449108,0.121159,0.071889,0.001132,0.312782
1,-4.916078,8.199861,-3.460064,2.066944,1.923925,1.308362,3.62758,14.9751,0.436076,-12.7947,...,0.002495,0.406169,0.087877,0.057452,6.8e-05,0.24266,0.084332,0.05457,0.001012,0.225347
2,-4.049131,8.661184,-3.410227,2.242223,2.357574,1.696496,5.96849,18.0181,4.15949,-12.3587,...,0.000185,0.456392,0.153978,0.086714,0.000127,0.397306,0.121827,0.071723,0.004954,0.370656
3,-4.012356,8.888872,-3.522131,3.580163,3.32447,2.167673,10.0273,21.8206,2.96627,-15.5837,...,0.000111,0.437151,0.170174,0.103537,8.8e-05,0.396471,0.183026,0.106627,0.002232,0.537219
4,-4.155064,8.694152,-3.25139,2.190101,2.050183,1.531107,4.82319,18.3391,3.71383,-14.8865,...,0.00173,0.561088,0.134899,0.091594,0.000329,0.408065,0.111719,0.079405,0.000882,0.426578
