In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
import os  # FIXED: Add import os for path joining

# FIXED: Load merged df (from 01 – or re-load and merge here)
PROJECT_ROOT = r"D:\Work\Stress_Level_Prediction\data"
TRAIN_TIME = os.path.join(PROJECT_ROOT, 'Train Data', 'Train Data Zip', 'time_domain_features_train.csv')
TRAIN_FREQ = os.path.join(PROJECT_ROOT, 'Train Data', 'Train Data Zip', 'frequency_domain_features_train.csv')

df_time = pd.read_csv(TRAIN_TIME)
df_freq = pd.read_csv(TRAIN_FREQ)
df = pd.merge(df_time, df_freq, on='uuid', how='inner')

# Sample for faster training (optional – remove if you want full 369k)
df = df.sample(n=10000, random_state=42).reset_index(drop=True)

# Feature engineering (example: daily_activity as SDRR + LF proxy for tasks)
df['daily_activity'] = df['SDRR'] + df['LF']

# UPDATED: Select features (HRV-based for sleep/activity/heart)
features = ['MEAN_RR', 'SDRR', 'LF', 'HF']  # MEAN_RR for sleep, SDRR/LF/HF for heart/activity
X = df[features]

# Target: HR for regression (heart rate as stress indicator)
y = df['HR'].values.reshape(-1, 1)

# Scaling
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"Sample X_train: {X_train[0]}")
print(f"Sample y_train: {y_train[0]}")

# Save processed
df_processed = df[features + ['HR']].copy()
df_processed.to_csv('../data/heart_stress_processed.csv', index=False)
print("Processed data saved.")

# Save scalers
import joblib
joblib.dump(scaler_X, '../models/scaler_X.pkl')
joblib.dump(scaler_y, '../models/scaler_y.pkl')
print("Scalers saved.")

X_train shape: (8000, 4)
y_train shape: (8000, 1)
Sample X_train: [0.39443202 0.04491381 0.17939958 0.03346833]
Sample y_train: [0.33817447]
Processed data saved.
Scalers saved.
