In [1]:
import pandas as pd
import numpy as np
import pgmpy
from sklearn.cluster import KMeans


In [2]:
df = pd.read_csv('../data/enhanced/data.csv')
print(f"Loaded: {df.shape}")

Loaded: (30240, 77)


In [3]:

print(f"\nOriginal data shape: {df.shape}")
print(f"Columns: {df.columns.tolist()[:10]}...")

df['Motion_Intensity'] = np.sqrt(
    df['HMDVec_X']**2 +
    df['HMDVec_Y']**2 +
    df['HMDVec_Z']**2
)

print(f"\nComputed Motion_Intensity")
print(f"  Range: [{df['Motion_Intensity'].min():.2f}, {df['Motion_Intensity'].max():.2f}]")

df['Gaze_Error_Angle'] = np.sqrt(
    df['Combine_HMD2TargAng_X']**2 +
    df['Combine_HMD2TargAng_Y']**2 +
    df['Combine_HMD2TargAng_Z']**2
)

print(f"\nComputed Gaze_Error_Angle")
print(f"  Range: [{df['Gaze_Error_Angle'].min():.2f}, {df['Gaze_Error_Angle'].max():.2f}]")

window_size = 10 
hr_diff = df['HR'].diff()
df['HRV'] = hr_diff.rolling(window=window_size).apply(
    lambda x: np.sqrt(np.mean(x**2)), raw=True
)
df['HRV'].fillna(df['HRV'].mean(), inplace=True)

print(f"\n Computed HRV")
print(f"  Range: [{df['HRV'].min():.2f}, {df['HRV'].max():.2f}]")

avg_openness = (df['Left_Openness'] + df['Right_Openness']) / 2
df['Eye_Blink_Rate'] = 30 * (1 - avg_openness)  # Scale to blinks/min

print(f"\n Computed Eye_Blink_Rate")
print(f"  Range: [{df['Eye_Blink_Rate'].min():.2f}, {df['Eye_Blink_Rate'].max():.2f}]")



Original data shape: (30240, 77)
Columns: ['HMDVec_X', 'HMDVec_Y', 'HMDVec_Z', 'HMDPos_X', 'HMDPos_Y', 'HMDPos_Z', 'HMDRot_X', 'HMDRot_Y', 'HMDRot_Z', 'HMDRot_W']...

Computed Motion_Intensity
  Range: [0.02, 1.00]

Computed Gaze_Error_Angle
  Range: [11.85, 254.81]

 Computed HRV
  Range: [0.09, 46.37]

 Computed Eye_Blink_Rate
  Range: [0.00, 30.00]


In [8]:

# Temporal Smoothness
drot_x = df['HMDRot_X'].diff().fillna(0)
drot_y = df['HMDRot_Y'].diff().fillna(0)
drot_z = df['HMDRot_Z'].diff().fillna(0)
dpos_x = df['HMDPos_X'].diff().fillna(0)
dpos_y = df['HMDPos_Y'].diff().fillna(0)
dpos_z = df['HMDPos_Z'].diff().fillna(0)

rotation_var = drot_x**2 + drot_y**2 + drot_z**2
position_var = dpos_x**2 + dpos_y**2 + dpos_z**2
df['Temporal_Smoothness'] = 1.0 / (rotation_var + position_var + 1e-6)
print(f"  Temporal_Smoothness: [{df['Temporal_Smoothness'].min():.2f}, {df['Temporal_Smoothness'].max():.2f}]")

# Luminance 
avg_diameter = (df['Left_Diameter'] + df['Right_Diameter']) / 2
df['Luminance'] = 1 - (avg_diameter - avg_diameter.min()) / (avg_diameter.max() - avg_diameter.min() + 1e-6)
print(f"  Luminance: [{df['Luminance'].min():.2f}, {df['Luminance'].max():.2f}]")

# HOG Features
dgaze_x = df['Combine_GazeDir_X'].diff().fillna(0).abs()
dgaze_y = df['Combine_GazeDir_Y'].diff().fillna(0).abs()
dgaze_z = df['Combine_GazeDir_Z'].diff().fillna(0).abs()
gaze_gradient = np.sqrt(dgaze_x**2 + dgaze_y**2 + dgaze_z**2)
df['HOG_features'] = gaze_gradient.rolling(window=10, min_periods=1).std().fillna(0)
print(f"  HOG_features: [{df['HOG_features'].min():.2f}, {df['HOG_features'].max():.2f}]")

  Temporal_Smoothness: [0.00, 1000000.00]
  Luminance: [0.00, 1.00]
  HOG_features: [0.00, 1.05]


In [9]:
system_features = [
    'Motion_Intensity',
    'Temporal_Smoothness',
    'Luminance',
    'HOG_features'
]

# Physiological features
physio_features = [
   'HR',
    'GSR',
    'Left_Diameter',
    'Right_Diameter',
    'Left_Openness',
    'Right_Openness',
    'Gaze_Error_Angle',
    'HRV',
    'Eye_Blink_Rate',
]

# Target
target = ['fms']

# Combine
selected_features = system_features + physio_features + target
print(f"Selected {len(selected_features)}")

Selected 14


In [13]:
df_selected = df[selected_features].copy()
print(f"Shape: {df_selected.shape}")
print("\nColumns:")
print(df_selected.columns.tolist())

Shape: (30240, 14)

Columns:
['Motion_Intensity', 'Temporal_Smoothness', 'Luminance', 'HOG_features', 'HR', 'GSR', 'Left_Diameter', 'Right_Diameter', 'Left_Openness', 'Right_Openness', 'Gaze_Error_Angle', 'HRV', 'Eye_Blink_Rate', 'fms']


In [14]:
print("System features:", len(system_features))
print("Physio features:", len(physio_features))
print("Total:", len(selected_features)-1, "+ target")

System features: 4
Physio features: 9
Total: 13 + target


In [15]:
df_selected.to_csv('../data/processed/data_selected_features_final2.csv', index=False)
print("Saved: data_selected_features.csv")

Saved: data_selected_features.csv
