In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [129]:
import pandas as pd

# GOAL: simulate training dataset

# Set random seed for reproducibility
np.random.seed(123)

# Generate random data
data = np.random.randint(-100, 100, size=(10, 8))

df = pd.DataFrame(data, columns=['Acc-X', 'Acc-Y','Acc-Z', 
      'Gyro-X', 'Gyro-Y', 'Gyro-Z',
      'Flex1','Flex2'])

raw_df = df

df.head()

Unnamed: 0,Acc-X,Acc-Y,Acc-Z,Gyro-X,Gyro-Y,Gyro-Z,Flex1,Flex2
0,9,26,-34,-2,-83,-17,6,23
1,-43,-4,13,26,-53,-27,-68,74
2,11,53,-17,-22,64,-4,-32,-51
3,-45,95,-98,-16,-61,-34,-16,-53
4,89,76,35,5,-1,24,-8,80


In [130]:
import numpy as np
from scipy import stats, signal

# Define the feature extraction functions

def compute_mean(data):
    return np.mean(data)

def compute_variance(data):
    return np.var(data)

def compute_median_absolute_deviation(data):
    return stats.median_abs_deviation(data, axis=None)

def compute_root_mean_square(data):
    return np.sqrt(np.mean(np.square(data)))

def compute_interquartile_range(data):
    return stats.iqr(data)

def compute_percentile_75(data):
    return np.percentile(data, 75)

def compute_kurtosis(data):
    return stats.kurtosis(data)

def compute_min_max(data):
    return np.max(data) - np.min(data)

def compute_signal_magnitude_area(data):
    return np.sum(data) / len(data)

def compute_zero_crossing_rate(data):
    return ((data[:-1] * data[1:]) < 0).sum()

def compute_spectral_centroid(data):
    spectrum = np.abs(np.fft.rfft(data))
    normalized_spectrum = spectrum / np.sum(spectrum)
    normalized_frequencies = np.linspace(0, 1, len(spectrum))
    spectral_centroid = np.sum(normalized_frequencies * normalized_spectrum)
    return spectral_centroid

def compute_spectral_entropy(data):
    freqs, power_density = signal.welch(data)
    return stats.entropy(power_density)

def compute_spectral_energy(data):
    freqs, power_density = signal.welch(data)
    return np.sum(np.square(power_density))

def compute_principle_frequency(data):
    freqs, power_density = signal.welch(data)
    return freqs[np.argmax(np.square(power_density))]

In [139]:
# def compute_features(raw_df):
# Initialize array to hold processed data
processed_data = []

# Loop through each column and compute features
for column in raw_df.columns:
    column_data = raw_df[column]

    # Compute features for the column
    mean = compute_mean(column_data)
    variance = compute_variance(column_data)
    median_absolute_deviation = compute_median_absolute_deviation(column_data)
    root_mean_square = compute_root_mean_square(column_data)
    interquartile_range = compute_interquartile_range(column_data)
    percentile_75 = compute_percentile_75(column_data)
    kurtosis = compute_kurtosis(column_data)
    min_max = compute_min_max(column_data)
    signal_magnitude_area = compute_signal_magnitude_area(column_data)
    zero_crossing_rate = compute_zero_crossing_rate(column_data)
    spectral_centroid = compute_spectral_centroid(column_data)
    spectral_entropy = compute_spectral_entropy(column_data)
    spectral_energy = compute_spectral_energy(column_data)
    principle_frequency = compute_principle_frequency(column_data)

    # Store features in list
    processed_column_data = [mean, variance, median_absolute_deviation, root_mean_square, 
                              interquartile_range, percentile_75, kurtosis, min_max, 
                              signal_magnitude_area, zero_crossing_rate, spectral_centroid, 
                              spectral_entropy, spectral_energy, principle_frequency]
    print(processed_column_data)
    # Append processed column data to main processed data array
    processed_data.append(processed_column_data)

processed_data_arr = np.concatenate(processed_data)

# for i in processed_data_arr:
#   print(i)

# # Convert processed data array into Pandas DataFrame
# processed_df = pd.DataFrame(processed_data, columns=[
#                             f'{column}_mean' for column in raw_df.columns], index=[
#                             'mean', 'variance', 'median_absolute_deviation', 'root_mean_square', 
#                             'interquartile_range', 'percentile_75', 'kurtosis', 'min_max', 
#                             'signal_magnitude_area', 'zero_crossing_rate', 'spectral_centroid', 
#                             'spectral_entropy', 'spectral_energy', 'principle_frequency'])

    # return processed_df

[-6.6, 2208.44, 37.0, 47.45524207081869, 58.5, 14.0, -0.49290133604299546, 167, -6.6, 0, 0.6469936570460438, 1.566522455274587, 229625425.99012893, 0.5]
[43.5, 1185.25, 31.5, 55.47521969312064, 59.5, 70.5, -1.3435365383966005, 99, 43.5, 0, 0.23007946865090362, 1.1434679951023443, 77627060.5657832, 0.2]
[-16.4, 1942.0399999999995, 26.0, 47.02127178203499, 50.0, 16.75, -0.817978653645631, 133, -16.4, 0, 0.5183203529236047, 1.1612298511595784, 193747286.6024294, 0.30000000000000004]
[0.1, 2008.6899999999998, 24.0, 44.81852295647415, 41.25, 20.75, -0.1813965014631984, 169, 0.1, 0, 0.7530561700274069, 1.2408873340155018, 29496714.89008185, 0.4]
[-11.8, 2877.1600000000003, 55.0, 54.92176253544673, 101.0, 42.0, -1.4640505630164726, 147, -11.8, 0, 0.45779718780339573, 1.4523333601784467, 150778939.2933493, 0.4]
[17.8, 2700.56, 48.0, 54.93086564036653, 98.5, 74.0, -1.4760394561589305, 148, 17.8, 0, 0.4199837937129861, 1.5767906283714201, 224962369.93665552, 0.1]
[-15.9, 1845.2900000000002, 16.0



In [140]:
variables = ['Acc-X', 'Acc-Y', 'Acc-Z', 'Gyro-X', 'Gyro-Y', 'Gyro-Z', 'Flex1', 'Flex2']
factors = ['mean', 'variance', 'median_absolute_deviation', 'root_mean_square', 'interquartile_range',            
           'percentile_75', 'kurtosis', 'min_max', 'signal_magnitude_area', 'zero_crossing_rate',            
           'spectral_centroid', 'spectral_entropy', 'spectral_energy', 'principle_frequency']

headers = [f'{var}_{factor}' for var in variables for factor in factors]

print(headers)

['Acc-X_mean', 'Acc-X_variance', 'Acc-X_median_absolute_deviation', 'Acc-X_root_mean_square', 'Acc-X_interquartile_range', 'Acc-X_percentile_75', 'Acc-X_kurtosis', 'Acc-X_min_max', 'Acc-X_signal_magnitude_area', 'Acc-X_zero_crossing_rate', 'Acc-X_spectral_centroid', 'Acc-X_spectral_entropy', 'Acc-X_spectral_energy', 'Acc-X_principle_frequency', 'Acc-Y_mean', 'Acc-Y_variance', 'Acc-Y_median_absolute_deviation', 'Acc-Y_root_mean_square', 'Acc-Y_interquartile_range', 'Acc-Y_percentile_75', 'Acc-Y_kurtosis', 'Acc-Y_min_max', 'Acc-Y_signal_magnitude_area', 'Acc-Y_zero_crossing_rate', 'Acc-Y_spectral_centroid', 'Acc-Y_spectral_entropy', 'Acc-Y_spectral_energy', 'Acc-Y_principle_frequency', 'Acc-Z_mean', 'Acc-Z_variance', 'Acc-Z_median_absolute_deviation', 'Acc-Z_root_mean_square', 'Acc-Z_interquartile_range', 'Acc-Z_percentile_75', 'Acc-Z_kurtosis', 'Acc-Z_min_max', 'Acc-Z_signal_magnitude_area', 'Acc-Z_zero_crossing_rate', 'Acc-Z_spectral_centroid', 'Acc-Z_spectral_entropy', 'Acc-Z_spectral