In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('../data/processed/data_selected_features_1.csv')
print(f"Loaded: {df.shape}")

Loaded: (30240, 11)


In [4]:
X = df.drop('fms', axis=1)
y = df['fms']
print(f"Features: {X.shape}")
print(f"Target: {y.shape}")

Features: (30240, 10)
Target: (30240,)


In [5]:
X_discretized = X.copy()
n_bins = 5

print(f"Discretizing {len(X.columns)} features into {n_bins} bins...")
print()

for col in X.columns:
    # K-means on raw values
    kmeans = KMeans(n_clusters=n_bins, random_state=42, n_init=10)
    bins = kmeans.fit_predict(X[[col]]) + 1  # +1 for 1-5 instead of 0-4
    X_discretized[col] = bins
    
    unique_vals = sorted(X_discretized[col].unique())
    print(f"  {col:30} → bins {unique_vals}")

print("\n Discretization complete!")

Discretizing 10 features into 5 bins...

  Motion_Intensity               → bins [1, 2, 3, 4, 5]
  HR                             → bins [1, 2, 3, 4, 5]
  GSR                            → bins [1, 2, 3, 4, 5]
  Left_Diameter                  → bins [1, 2, 3, 4, 5]
  Right_Diameter                 → bins [1, 2, 3, 4, 5]
  Left_Openness                  → bins [1, 2, 3, 4, 5]
  Right_Openness                 → bins [1, 2, 3, 4, 5]
  Gaze_Error_Angle               → bins [1, 2, 3, 4, 5]
  HRV                            → bins [1, 2, 3, 4, 5]
  Eye_Blink_Rate                 → bins [1, 2, 3, 4, 5]

 Discretization complete!


In [6]:
print("\nValidation:")
for col in X_discretized.columns:
    unique = sorted(X_discretized[col].unique())
    all_valid = all(v in [1,2,3,4,5] for v in unique)
    status = "✓" if all_valid else "✗"
    print(f"{status} {col:30} → {unique}")


Validation:
✓ Motion_Intensity               → [1, 2, 3, 4, 5]
✓ HR                             → [1, 2, 3, 4, 5]
✓ GSR                            → [1, 2, 3, 4, 5]
✓ Left_Diameter                  → [1, 2, 3, 4, 5]
✓ Right_Diameter                 → [1, 2, 3, 4, 5]
✓ Left_Openness                  → [1, 2, 3, 4, 5]
✓ Right_Openness                 → [1, 2, 3, 4, 5]
✓ Gaze_Error_Angle               → [1, 2, 3, 4, 5]
✓ HRV                            → [1, 2, 3, 4, 5]
✓ Eye_Blink_Rate                 → [1, 2, 3, 4, 5]


In [7]:
df_discretized = X_discretized.copy()
df_discretized['fms'] = y
print(f"\nFinal shape: {df_discretized.shape}")
print(df_discretized.head())


Final shape: (30240, 11)
   Motion_Intensity  HR  GSR  Left_Diameter  Right_Diameter  Left_Openness  \
0                 2   5    5              5               3              4   
1                 1   3    5              5               3              4   
2                 4   3    5              5               3              4   
3                 1   3    5              5               3              4   
4                 2   3    5              5               3              4   

   Right_Openness  Gaze_Error_Angle  HRV  Eye_Blink_Rate  fms  
0               3                 4    1               1    1  
1               3                 4    1               1    1  
2               3                 4    1               1    1  
3               3                 4    1               1    1  
4               3                 4    1               1    1  


In [8]:
df_discretized.to_csv('../data/processed/data_discretized_1.csv', index=False)
print("Saved: data_discretized.csv")
print(f"Shape: {df_discretized.shape}")
print(f"All features discretized to bins [1-5]")

Saved: data_discretized.csv
Shape: (30240, 11)
All features discretized to bins [1-5]
