In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('../data/processed/data_selected_features_final2.csv')
print(f"Loaded: {df.shape}")

Loaded: (30240, 14)


In [3]:
X = df.drop('fms', axis=1)
y = df['fms']
print(f"Features: {X.shape}")
print(f"Target: {y.shape}")

Features: (30240, 13)
Target: (30240,)


In [4]:
X_discretized = X.copy()
n_bins = 5

print(f"Discretizing {len(X.columns)} features into {n_bins} bins...")
print()

for col in X.columns:
    kmeans = KMeans(n_clusters=n_bins, random_state=42, n_init=10)
    bins = kmeans.fit_predict(X[[col]]) + 1  
    X_discretized[col] = bins
    
    unique_vals = sorted(X_discretized[col].unique())
    print(f"  {col:30} → bins {unique_vals}")

Discretizing 13 features into 5 bins...

  Motion_Intensity               → bins [1, 2, 3, 4, 5]
  Temporal_Smoothness            → bins [1, 2, 3, 4, 5]
  Luminance                      → bins [1, 2, 3, 4, 5]
  HOG_features                   → bins [1, 2, 3, 4, 5]
  HR                             → bins [1, 2, 3, 4, 5]
  GSR                            → bins [1, 2, 3, 4, 5]
  Left_Diameter                  → bins [1, 2, 3, 4, 5]
  Right_Diameter                 → bins [1, 2, 3, 4, 5]
  Left_Openness                  → bins [1, 2, 3, 4, 5]
  Right_Openness                 → bins [1, 2, 3, 4, 5]
  Gaze_Error_Angle               → bins [1, 2, 3, 4, 5]
  HRV                            → bins [1, 2, 3, 4, 5]
  Eye_Blink_Rate                 → bins [1, 2, 3, 4, 5]


In [5]:
print("\nValidation:")
for col in X_discretized.columns:
    unique = sorted(X_discretized[col].unique())
    all_valid = all(v in [1,2,3,4,5] for v in unique)
    print(f" {col:30} → {unique}")


Validation:
 Motion_Intensity               → [1, 2, 3, 4, 5]
 Temporal_Smoothness            → [1, 2, 3, 4, 5]
 Luminance                      → [1, 2, 3, 4, 5]
 HOG_features                   → [1, 2, 3, 4, 5]
 HR                             → [1, 2, 3, 4, 5]
 GSR                            → [1, 2, 3, 4, 5]
 Left_Diameter                  → [1, 2, 3, 4, 5]
 Right_Diameter                 → [1, 2, 3, 4, 5]
 Left_Openness                  → [1, 2, 3, 4, 5]
 Right_Openness                 → [1, 2, 3, 4, 5]
 Gaze_Error_Angle               → [1, 2, 3, 4, 5]
 HRV                            → [1, 2, 3, 4, 5]
 Eye_Blink_Rate                 → [1, 2, 3, 4, 5]


In [6]:
df_discretized = X_discretized.copy()
df_discretized['fms'] = y
print(f"\nFinal shape: {df_discretized.shape}")
print(df_discretized.head())


Final shape: (30240, 14)
   Motion_Intensity  Temporal_Smoothness  Luminance  HOG_features  HR  GSR  \
0                 2                    2          1             4   5    5   
1                 1                    1          1             2   3    5   
2                 4                    1          1             2   3    5   
3                 1                    1          1             4   3    5   
4                 2                    1          1             4   3    5   

   Left_Diameter  Right_Diameter  Left_Openness  Right_Openness  \
0              5               3              4               3   
1              5               3              4               3   
2              5               3              4               3   
3              5               3              4               3   
4              5               3              4               3   

   Gaze_Error_Angle  HRV  Eye_Blink_Rate  fms  
0                 4    1               1    1  
1     

In [7]:
df_discretized.to_csv('../data/processed/data_discretized_final2.csv', index=False)
print("Saved: data_discretized.csv")
print(f"Shape: {df_discretized.shape}")

Saved: data_discretized.csv
Shape: (30240, 14)
