In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('../data/processed/data_selected_features.csv')
print(f"Loaded: {df.shape}")

Loaded: (999, 22)


In [3]:
X = df.drop('fms', axis=1)
y = df['fms']
print(f"Features: {X.shape}")
print(f"Target: {y.shape}")

Features: (999, 21)
Target: (999,)


In [4]:
X_discretized = X.copy()
n_bins = 5

print(f"Discretizing {len(X.columns)} features into {n_bins} bins...")
print()

for col in X.columns:
    # K-means on raw values
    kmeans = KMeans(n_clusters=n_bins, random_state=42, n_init=10)
    bins = kmeans.fit_predict(X[[col]]) + 1  # +1 for 1-5 instead of 0-4
    X_discretized[col] = bins
    
    unique_vals = sorted(X_discretized[col].unique())
    print(f"  {col:30} → bins {unique_vals}")

print("\n Discretization complete!")

Discretizing 21 features into 5 bins...

  Luminance                      → bins [1, 2, 3, 4, 5]
  HOG_features                   → bins [1, 2, 3, 4, 5]
  Spectral_Entropy               → bins [1, 2, 3, 4, 5]
  Temporal_Smoothness            → bins [1, 2, 3, 4, 5]
  Optical_Flow                   → bins [1, 2, 3, 4, 5]
  Frame_Rate                     → bins [1, 2, 3, 4, 5]
  Scene_Complexity               → bins [1, 2, 3, 4, 5]
  Motion_Intensity               → bins [1, 2, 3, 4, 5]
  Visual_Contrast                → bins [1, 2, 3, 4, 5]
  FOV                            → bins [1, 2, 3, 4, 5]
  HR                             → bins [1, 2, 3, 4, 5]
  GSR                            → bins [1, 2, 3, 4, 5]
  Left_Diameter                  → bins [1, 2, 3, 4, 5]
  Right_Diameter                 → bins [1, 2, 3, 4, 5]
  Left_Openness                  → bins [1, 2, 3, 4, 5]
  Right_Openness                 → bins [1, 2, 3, 4, 5]
  Gaze_Error_Angle               → bins [1, 2, 3, 4, 5]
  React

In [5]:
print("\nValidation:")
for col in X_discretized.columns:
    unique = sorted(X_discretized[col].unique())
    all_valid = all(v in [1,2,3,4,5] for v in unique)
    status = "✓" if all_valid else "✗"
    print(f"{status} {col:30} → {unique}")


Validation:
✓ Luminance                      → [1, 2, 3, 4, 5]
✓ HOG_features                   → [1, 2, 3, 4, 5]
✓ Spectral_Entropy               → [1, 2, 3, 4, 5]
✓ Temporal_Smoothness            → [1, 2, 3, 4, 5]
✓ Optical_Flow                   → [1, 2, 3, 4, 5]
✓ Frame_Rate                     → [1, 2, 3, 4, 5]
✓ Scene_Complexity               → [1, 2, 3, 4, 5]
✓ Motion_Intensity               → [1, 2, 3, 4, 5]
✓ Visual_Contrast                → [1, 2, 3, 4, 5]
✓ FOV                            → [1, 2, 3, 4, 5]
✓ HR                             → [1, 2, 3, 4, 5]
✓ GSR                            → [1, 2, 3, 4, 5]
✓ Left_Diameter                  → [1, 2, 3, 4, 5]
✓ Right_Diameter                 → [1, 2, 3, 4, 5]
✓ Left_Openness                  → [1, 2, 3, 4, 5]
✓ Right_Openness                 → [1, 2, 3, 4, 5]
✓ Gaze_Error_Angle               → [1, 2, 3, 4, 5]
✓ Reaction_Time                  → [1, 2, 3, 4, 5]
✓ HRV                            → [1, 2, 3, 4, 5]
✓ Fixation_Duratio

In [6]:
df_discretized = X_discretized.copy()
df_discretized['fms'] = y
print(f"\nFinal shape: {df_discretized.shape}")
print(df_discretized.head())


Final shape: (999, 22)
   Luminance  HOG_features  Spectral_Entropy  Temporal_Smoothness  \
0          3             3                 2                    4   
1          5             1                 4                    4   
2          2             3                 4                    3   
3          4             2                 4                    1   
4          1             4                 1                    3   

   Optical_Flow  Frame_Rate  Scene_Complexity  Motion_Intensity  \
0             2           4                 2                 1   
1             2           2                 4                 4   
2             5           5                 3                 2   
3             4           4                 4                 2   
4             5           3                 3                 4   

   Visual_Contrast  FOV  ...  Left_Diameter  Right_Diameter  Left_Openness  \
0                4    2  ...              3               1              1   
1 

In [7]:
df_discretized.to_csv('../data/processed/data_discretized.csv', index=False)
print("Saved: data_discretized.csv")
print(f"Shape: {df_discretized.shape}")
print(f"All features discretized to bins [1-5]")

Saved: data_discretized.csv
Shape: (999, 22)
All features discretized to bins [1-5]
