In [1]:
import json
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis, entropy

In [2]:
# Load the JSON data
with open('pc_samples.json', 'r') as f:
    data = json.load(f)

In [3]:
# Function to calculate RMS
def rms(data):
    return np.sqrt(np.mean(data**2, axis=0))

# Function to calculate energy
def energy(data):
    return np.sum(data**2, axis=0)

# Function to check for sufficient variability
def safe_skew(data):
    if np.all(data == data[0]):
        return np.array([np.nan, np.nan, np.nan])
    else:
        return skew(data, axis=0)

def safe_kurtosis(data):
    if np.all(data == data[0]):
        return np.array([np.nan, np.nan, np.nan])
    else:
        return kurtosis(data, axis=0)

In [4]:
# Define lists to store extracted features
features = []

# Extract features from each sample
for sample in data['data_sets']:
    gyro_data = np.array([list(d.values()) for d in sample['gyroscope']])
    accel_data = np.array([list(d.values()) for d in sample['accelerometer']])
    
    # Calculate features
    n_samples = gyro_data.shape[0]
    
    # Gyroscope features
    gyro_mean = gyro_data.mean(axis=0)
    gyro_std = gyro_data.std(axis=0)
    gyro_variance = gyro_data.var(axis=0)
    gyro_skew = safe_skew(gyro_data)
    gyro_kurt = safe_kurtosis(gyro_data)
    gyro_max = gyro_data.max(axis=0)
    gyro_min = gyro_data.min(axis=0)
    gyro_range = gyro_max - gyro_min
    gyro_rms = rms(gyro_data)
    gyro_energy = energy(gyro_data)
    
    # Accelerometer features
    accel_mean = accel_data.mean(axis=0)
    accel_std = accel_data.std(axis=0)
    accel_variance = accel_data.var(axis=0)
    accel_skew = safe_skew(accel_data)
    accel_kurt = safe_kurtosis(accel_data)
    accel_max = accel_data.max(axis=0)
    accel_min = accel_data.min(axis=0)
    accel_range = accel_max - accel_min
    accel_rms = rms(accel_data)
    accel_energy = energy(accel_data)
    
    # Correlation coefficients between pairs of axes
    gyro_corr_xy = np.corrcoef(gyro_data[:, 0], gyro_data[:, 1])[0, 1] if n_samples > 1 else np.nan
    gyro_corr_xz = np.corrcoef(gyro_data[:, 0], gyro_data[:, 2])[0, 1] if n_samples > 1 else np.nan
    gyro_corr_yz = np.corrcoef(gyro_data[:, 1], gyro_data[:, 2])[0, 1] if n_samples > 1 else np.nan
    
    accel_corr_xy = np.corrcoef(accel_data[:, 0], accel_data[:, 1])[0, 1] if n_samples > 1 else np.nan
    accel_corr_xz = np.corrcoef(accel_data[:, 0], accel_data[:, 2])[0, 1] if n_samples > 1 else np.nan
    accel_corr_yz = np.corrcoef(accel_data[:, 1], accel_data[:, 2])[0, 1] if n_samples > 1 else np.nan

    # Append features to the list
    features.append({
        'n_samples': n_samples,
        'gyro_mean_x': gyro_mean[0], 'gyro_mean_y': gyro_mean[1], 'gyro_mean_z': gyro_mean[2],
        'acce_mean_x': accel_mean[0], 'acce_mean_y': accel_mean[1], 'acce_mean_z': accel_mean[2],
        'gyro_std_x': gyro_std[0], 'gyro_std_y': gyro_std[1], 'gyro_std_z': gyro_std[2],
        'acce_std_x': accel_std[0], 'acce_std_y': accel_std[1], 'acce_std_z': accel_std[2],
        'gyro_variance_x': gyro_variance[0], 'gyro_variance_y': gyro_variance[1], 'gyro_variance_z': gyro_variance[2],
        'acce_variance_x': accel_variance[0], 'acce_variance_y': accel_variance[1], 'acce_variance_z': accel_variance[2],
        'gyro_skew_x': gyro_skew[0], 'gyro_skew_y': gyro_skew[1], 'gyro_skew_z': gyro_skew[2],
        'acce_skew_x': accel_skew[0], 'acce_skew_y': accel_skew[1], 'acce_skew_z': accel_skew[2],
        'gyro_kurt_x': gyro_kurt[0], 'gyro_kurt_y': gyro_kurt[1], 'gyro_kurt_z': gyro_kurt[2],
        'acce_kurt_x': accel_kurt[0], 'acce_kurt_y': accel_kurt[1], 'acce_kurt_z': accel_kurt[2],
        'gyro_max_x': gyro_max[0], 'gyro_max_y': gyro_max[1], 'gyro_max_z': gyro_max[2],
        'acce_max_x': accel_max[0], 'acce_max_y': accel_max[1], 'acce_max_z': accel_max[2],
        'gyro_min_x': gyro_min[0], 'gyro_min_y': gyro_min[1], 'gyro_min_z': gyro_min[2],
        'acce_min_x': accel_min[0], 'acce_min_y': accel_min[1], 'acce_min_z': accel_min[2],
        'gyro_range_x': gyro_range[0], 'gyro_range_y': gyro_range[1], 'gyro_range_z': gyro_range[2],
        'acce_range_x': accel_range[0], 'acce_range_y': accel_range[1], 'acce_range_z': accel_range[2],
        'gyro_rms_x': gyro_rms[0], 'gyro_rms_y': gyro_rms[1], 'gyro_rms_z': gyro_rms[2],
        'acce_rms_x': accel_rms[0], 'acce_rms_y': accel_rms[1], 'acce_rms_z': accel_rms[2],
        'gyro_energy_x': gyro_energy[0], 'gyro_energy_y': gyro_energy[1], 'gyro_energy_z': gyro_energy[2],
        'acce_energy_x': accel_energy[0], 'acce_energy_y': accel_energy[1], 'acce_energy_z': accel_energy[2],
        'gyro_corr_xy': gyro_corr_xy, 'gyro_corr_xz': gyro_corr_xz, 'gyro_corr_yz': gyro_corr_yz,
        'acce_corr_xy': accel_corr_xy, 'acce_corr_xz': accel_corr_xz, 'acce_corr_yz': accel_corr_yz,
        'label': sample['label']
    })

In [5]:
df = pd.DataFrame(features)
df

Unnamed: 0,n_samples,gyro_mean_x,gyro_mean_y,gyro_mean_z,acce_mean_x,acce_mean_y,acce_mean_z,gyro_std_x,gyro_std_y,gyro_std_z,...,acce_energy_x,acce_energy_y,acce_energy_z,gyro_corr_xy,gyro_corr_xz,gyro_corr_yz,acce_corr_xy,acce_corr_xz,acce_corr_yz,label
0,13,4.815191,6.026918,4.603798,0.620774,2.388052,1.199669,3.328440,2.882618,3.112205,...,5.189465,107.940018,37.154712,0.361928,0.148264,0.237668,-0.442732,0.031785,0.383250,circle
1,11,3.830515,4.600585,5.543827,0.528498,3.001554,1.338157,3.536201,3.164839,3.122509,...,3.162875,118.852905,36.288021,0.821864,0.413533,0.216026,-0.583849,0.239175,0.261781,circle
2,11,5.026256,4.058207,6.074978,0.478871,3.010742,0.734086,3.436114,3.133476,2.817666,...,2.668988,121.908372,7.613957,0.078384,0.441364,-0.154925,-0.444553,-0.601073,0.251883,circle
3,12,5.664079,5.274837,5.638030,1.444621,3.811829,0.925944,3.350898,3.301901,3.040522,...,61.384326,174.572059,12.049456,-0.277734,0.280101,-0.073561,0.257627,0.464325,0.447814,circle
4,11,5.165162,5.362632,5.592880,0.676780,2.415661,0.953413,3.712826,3.177411,3.010405,...,17.425519,97.337498,12.038113,0.517413,0.264493,0.019544,0.310117,-0.031677,-0.072867,circle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
895,23,5.115281,5.667291,4.901557,1.098877,0.556959,0.813742,4.144368,3.721062,3.674045,...,66.524159,31.758997,16.056101,0.302193,0.558180,0.213285,-0.067749,0.611542,-0.261807,circle
896,23,6.890356,4.877647,5.220894,1.130191,1.137196,0.833761,3.538936,3.791139,3.629333,...,67.623633,92.669800,17.080751,0.277386,-0.112916,-0.238539,-0.216941,0.537293,-0.445964,circle
897,22,3.459345,4.059441,3.613884,1.016413,0.928977,0.806829,4.084874,3.782963,3.488527,...,53.125512,62.628655,14.979493,0.335411,0.316357,0.060558,-0.089276,0.599522,-0.212209,circle
898,20,2.645748,3.558622,4.525622,1.122803,0.901758,0.868542,3.663880,3.590053,3.799220,...,62.933942,60.085252,15.422572,0.631527,0.063396,0.098755,-0.186641,0.665846,-0.353082,circle


In [6]:
# Save the DataFrame
df.to_csv('features2.csv', index=False)