In [16]:
import pandas as pd
import numpy as np
import pgmpy
from sklearn.cluster import KMeans


In [17]:
df = pd.read_csv('../data/enhanced/data.csv')
print(f"Loaded: {df.shape}")

Loaded: (30240, 77)


In [18]:

print(f"\nOriginal data shape: {df.shape}")
print(f"Columns: {df.columns.tolist()[:10]}...")

df['Motion_Intensity'] = np.sqrt(
    df['HMDVec_X']**2 +
    df['HMDVec_Y']**2 +
    df['HMDVec_Z']**2
)

print(f"\nComputed Motion_Intensity")
print(f"  Range: [{df['Motion_Intensity'].min():.2f}, {df['Motion_Intensity'].max():.2f}]")

df['Gaze_Error_Angle'] = np.sqrt(
    df['Combine_HMD2TargAng_X']**2 +
    df['Combine_HMD2TargAng_Y']**2 +
    df['Combine_HMD2TargAng_Z']**2
)

print(f"\nComputed Gaze_Error_Angle")
print(f"  Range: [{df['Gaze_Error_Angle'].min():.2f}, {df['Gaze_Error_Angle'].max():.2f}]")

window_size = 10 
hr_diff = df['HR'].diff()
df['HRV'] = hr_diff.rolling(window=window_size).apply(
    lambda x: np.sqrt(np.mean(x**2)), raw=True
)
df['HRV'].fillna(df['HRV'].mean(), inplace=True)

print(f"\n Computed HRV")
print(f"  Range: [{df['HRV'].min():.2f}, {df['HRV'].max():.2f}]")

avg_openness = (df['Left_Openness'] + df['Right_Openness']) / 2
df['Eye_Blink_Rate'] = 30 * (1 - avg_openness)  # Scale to blinks/min

print(f"\n Computed Eye_Blink_Rate")
print(f"  Range: [{df['Eye_Blink_Rate'].min():.2f}, {df['Eye_Blink_Rate'].max():.2f}]")



Original data shape: (30240, 77)
Columns: ['HMDVec_X', 'HMDVec_Y', 'HMDVec_Z', 'HMDPos_X', 'HMDPos_Y', 'HMDPos_Z', 'HMDRot_X', 'HMDRot_Y', 'HMDRot_Z', 'HMDRot_W']...

Computed Motion_Intensity
  Range: [0.02, 1.00]

Computed Gaze_Error_Angle
  Range: [11.85, 254.81]

 Computed HRV
  Range: [0.09, 46.37]

 Computed Eye_Blink_Rate
  Range: [0.00, 30.00]


In [19]:
system_features = [
    'Motion_Intensity'
]

# Physiological features
physio_features = [
   'HR',
    'GSR',
    'Left_Diameter',
    'Right_Diameter',
    'Left_Openness',
    'Right_Openness',
    'Gaze_Error_Angle',
    'HRV',
    'Eye_Blink_Rate',
]

# Target
target = ['fms']

# Combine
selected_features = system_features + physio_features + target
print(f"Selected {len(selected_features)}")

Selected 11


In [20]:
df_selected = df[selected_features].copy()
print(f"Shape: {df_selected.shape}")
print("\nColumns:")
print(df_selected.columns.tolist())

Shape: (30240, 11)

Columns:
['Motion_Intensity', 'HR', 'GSR', 'Left_Diameter', 'Right_Diameter', 'Left_Openness', 'Right_Openness', 'Gaze_Error_Angle', 'HRV', 'Eye_Blink_Rate', 'fms']


In [21]:
print("System features:", len(system_features))
print("Physio features:", len(physio_features))
print("Total:", len(selected_features)-1, "+ target")

System features: 1
Physio features: 9
Total: 10 + target


In [22]:
df_selected.to_csv('../data/processed/data_selected_features_final.csv', index=False)
print("Saved: data_selected_features.csv")

Saved: data_selected_features.csv
