## 00. Imports

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import xgboost as xgb
import lightgbm as lgb
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler

## 01. Loading MEFAR PreProcessed DataSet

In [2]:
MEFAR_MID = pd.read_csv('../raw_data//MEFAR_preprocessed/MEFAR_MID.csv')


## 02. Creating Time-Window Data

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

sampling_rate = 32
window_seconds = 10  # 10 seconds of data = 320 rows
window_size = sampling_rate * window_seconds
stride = int(window_size * 0.5)  # 50% overlap = 5s shift


In [None]:

X_windows, y_windows, session_ids = [], [], []

for _, row in session_map.iterrows():  # Iterate over each session
    session_id = row['session_id']
    start = int(row['start_index'])
    end = int(row['end_index'])

    X_session = X.iloc[start:end].values
    y_session = y.iloc[start:end].values

    for i in range(0, len(X_session) - window_size + 1, stride):
        window = X_session[i:i + window_size]
        label_window = y_session[i:i + window_size]

        # Majority vote label
        majority_label = int(label_window.mean() > 0.5)

        X_windows.append(window)
        y_windows.append(majority_label)
        session_ids.append(session_id)

# Convert to numpy arrays
X_windows = np.array(X_windows)
y_windows = np.array(y_windows)
session_ids = np.array(session_ids)

# # Normalize per window (optional, test impact)
# X_norm = np.array([MinMaxScaler().fit_transform(window) for window in X_windows])

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Define sampling and windowing parameters
sampling_rate = 32                     # MEFAR data has 32 samples per second
window_seconds = 10                    # Each window covers 10 seconds
window_size = sampling_rate * window_seconds  # 320 samples per window
stride = int(window_size * 0.5)        # 50% overlap between windows → 5s step

# Lists to store results
X_windows, y_windows, session_ids = [], [], []

# Loop through each session defined in session_map
for _, row in session_map.iterrows():
    session_id = row['session_id']               # Session identifier
    start = int(row['start_index'])              # Start index of session in full dataset
    end = int(row['end_index'])                  # End index of session

    # Extract the features and labels for this session
    X_session = X.iloc[start:end].values         # Shape: (session_length, num_features)
    y_session = y.iloc[start:end].values         # Shape: (session_length,)

    # Slide window through session using defined stride
    for i in range(0, len(X_session) - window_size + 1, stride):
        # Extract the feature window and corresponding label window
        window = X_session[i:i + window_size]          # Shape: (320, num_features)
        label_window = y_session[i:i + window_size]    # Shape: (320,)

        # ✅ Assign label based on majority vote in the 10s window
        # If more than 50% of samples in the window are "1" (fatigued), label the window as fatigued
        majority_label = int(label_window.mean() > 0.5)

        # Store results
        X_windows.append(window)
        y_windows.append(majority_label)
        session_ids.append(session_id)

# Convert lists to numpy arrays for modeling
X_windows = np.array(X_windows)       # Shape: (num_windows, 320, num_features)
y_windows = np.array(y_windows)       # Shape: (num_windows,)
session_ids = np.array(session_ids)   # Shape: (num_windows,)

# Optional: Normalize each window individually using min-max scaling
# This ensures each feature in each window is scaled to [0, 1] based on its own min/max
X_norm = np.array([MinMaxScaler().fit_transform(window) for window in X_windows])