In [4]:
import os

data_dir = r'D:\laiba\Desktop\HRClaude\jsons'
print(f"Folder exists: {os.path.exists(data_dir)}")
print(f"Contents: {os.listdir(data_dir)}")

stress_path = os.path.join(data_dir, 'com.samsung.shealth.stress')
print(f"Stress folder exists: {os.path.exists(stress_path)}")

hrv_path = os.path.join(data_dir, 'com.samsung.health.hrv')
print(f"HRV folder exists: {os.path.exists(hrv_path)}")

Folder exists: True
Contents: ['com.samsung.health.hrv', 'com.samsung.shealth.stress']
Stress folder exists: True
HRV folder exists: True


In [77]:
loader = SamsungDataLoader()
stress_data, hrv_data, hr_data = loader.load_all_samsung_files(data_dir)

print(f"Loaded stress_data: {len(stress_data)} records")
print(f"Loaded hrv_data: {len(hrv_data)} records")

if stress_data:
    print(f"First stress record columns: {stress_data[0].keys()}")
    print(f"First stress record: {stress_data[0]}")
else:
    print("ERROR: stress_data is EMPTY!")

if hrv_data:
    print(f"First HRV record columns: {hrv_data[0].keys()}")
    print(f"First HRV record: {hrv_data[0]}")
else:
    print("ERROR: hrv_data is EMPTY!")

Found:
  Stress files: 119
  HRV files: 35
Loaded stress_data: 5091 records
Loaded hrv_data: 2807 records
First stress record columns: dict_keys(['score', 'score_max', 'score_min', 'flag', 'level', 'start_time', 'end_time'])
First stress record: {'score': 0, 'score_max': 0, 'score_min': 0, 'flag': 1, 'level': 1, 'start_time': 1767300300000, 'end_time': 1767300359000}
First HRV record columns: dict_keys(['start_time', 'end_time', 'sdnn', 'rmssd'])
First HRV record: {'start_time': 1767088810348, 'end_time': 1767089112018, 'sdnn': 44.36093, 'rmssd': 52.021973}


In [None]:
import json
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import joblib
import glob
import pickle
from scipy.signal import find_peaks
import os


class SamsungProcessor:
    """handles samsung health data processing"""
    
    def __init__(self):
        self.data = None
    
    def loadAndProcess(self, hrvData, stressData):
        print("\nProcessing Samsung Health data...")
        
        # convert to dataframes
        hrvDf = pd.DataFrame(hrvData)
        stressDf = pd.DataFrame(stressData)
        
        # fix timestamps
        hrvDf['timestamp'] = pd.to_datetime(hrvDf['start_time'], unit='ms')
        stressDf['timestamp'] = pd.to_datetime(stressDf['start_time'], unit='ms')
        
        # create 3 stress categories - found this on stackoverflow
        stressDf['stressCategory'] = pd.qcut(stressDf['score'], q=3, labels=[0, 1, 2], duplicates='drop')
        stressDf['stressCategory'] = stressDf['stressCategory'].astype(int)
        
        print(f"Stress range: {stressDf['score'].min():.1f} - {stressDf['score'].max():.1f}")
        lowCount = (stressDf['stressCategory']==0).sum()
        moderateCount = (stressDf['stressCategory']==1).sum()
        highCount = (stressDf['stressCategory']==2).sum()
        print(f"Categories: Low={lowCount}, Moderate={moderateCount}, High={highCount}")
        
        # merge the data
        mergedData = pd.merge_asof(
            hrvDf.sort_values('timestamp'),
            stressDf.sort_values('timestamp'),
            on='timestamp',
            direction='nearest',
            tolerance=pd.Timedelta('1hr')
        )
        
        mergedData = mergedData.dropna(subset=['sdnn', 'rmssd', 'stressCategory'])
        mergedData['dataSource'] = 'samsung'
        
        print(f"Merged samples: {len(mergedData)}")
        lowCount = (mergedData['stressCategory']==0).sum()
        moderateCount = (mergedData['stressCategory']==1).sum()
        highCount = (mergedData['stressCategory']==2).sum()
        print(f"  Low: {lowCount}, Moderate: {moderateCount}, High: {highCount}")
        
        self.data = mergedData
        return mergedData


class WesadProcessor:
    """processes wesad dataset"""
    
    def __init__(self):
        self.data = None
    
    def computeHrv(self, ecgSignal):
        # peak detection algorithm
        peaks, _ = find_peaks(ecgSignal, distance=420, height=0)
        rrIntervals = np.diff(peaks) / 700 * 1000
        
        # filter bad values
        rrIntervals = rrIntervals[(rrIntervals > 300) & (rrIntervals < 2000)]
        
        if len(rrIntervals) < 10:
            return None, None
        
        # calculate metrics
        sdnnValue = np.std(rrIntervals)
        rmssdValue = np.sqrt(np.mean(np.diff(rrIntervals)**2))
        
        return sdnnValue, rmssdValue
    
    def loadWesad(self, wesadPath):
        print("\nLoading WESAD dataset...")
        
        allSamples = []
        subjectList = ['S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 
                      'S10', 'S11', 'S13', 'S14', 'S15', 'S16', 'S17']
        
        for subjId in subjectList:
            pklFile = f"{wesadPath}/{subjId}/{subjId}.pkl"
            
            if not os.path.exists(pklFile):
                continue
            
            try:
                with open(pklFile, 'rb') as f:
                    subjData = pickle.load(f, encoding='latin1')
                
                ecgData = subjData['signal']['chest']['ECG'].flatten()
                labelData = subjData['label']
                
                # process windows - 60 seconds each
                windowSize = 42000
                stepSize = 21000
                
                for i in range(0, len(ecgData) - windowSize, stepSize):
                    ecgWindow = ecgData[i:i+windowSize]
                    labelWindow = labelData[i:i+windowSize]
                    
                    try:
                        sdnn, rmssd = self.computeHrv(ecgWindow)
                        
                        if sdnn is None:
                            continue
                        
                        # get majority label
                        majorityLabel = np.bincount(labelWindow).argmax()
                        
                        # wesad labels: 
                        if majorityLabel == 1:
                            stressClass = 0  # baseline = low
                        elif majorityLabel == 2:
                            stressClass = 2  # stress = high
                        elif majorityLabel == 3:
                            stressClass = 1  # amusement = moderate
                        else:
                            continue
                        
                        allSamples.append({
                            'sdnn': sdnn,
                            'rmssd': rmssd,
                            'stressCategory': stressClass,
                            'dataSource': 'wesad'
                        })
                    except:
                        continue
                
                print(f"  Loaded {subjId}")
            except:
                continue
        
        wesadDf = pd.DataFrame(allSamples)
        print(f"WESAD samples: {len(wesadDf)}")
        lowCount = (wesadDf['stressCategory']==0).sum()
        moderateCount = (wesadDf['stressCategory']==1).sum()
        highCount = (wesadDf['stressCategory']==2).sum()
        print(f"Low: {lowCount}, Moderate: {moderateCount}, High: {highCount}")
        
        self.data = wesadDf
        return wesadDf


class StressClassifier:
    """trains xgboost model for stress classification"""
    
    def __init__(self):
        self.scaler = StandardScaler()
        self.model = None
        self.useHybrid = False
    
    def trainHybridModel(self, samsungData, wesadData):
        """train using both wesad and samsung data"""
        self.useHybrid = True
        print("\n=== Training Model ===")
        
        # combine datasets
        combinedData = pd.concat([wesadData, samsungData], ignore_index=True)
        print(f"Total samples: {len(combinedData)} (WESAD: {len(wesadData)}, Samsung: {len(samsungData)})")
        
        # prepare features
        allFeatures = combinedData[['sdnn', 'rmssd']].values
        allLabels = combinedData['stressCategory'].values
        sourceLabels = combinedData['dataSource'].values
        
        # split by data source
        isSamsung = sourceLabels == 'samsung'
        samsungFeatures = allFeatures[isSamsung]
        samsungLabels = allLabels[isSamsung]
        wesadFeatures = allFeatures[~isSamsung]
        wesadLabels = allLabels[~isSamsung]
        
        wesadLow = (wesadLabels==0).sum()
        wesadMod = (wesadLabels==1).sum()
        wesadHigh = (wesadLabels==2).sum()
        print(f"WESAD distribution - Low: {wesadLow}, Moderate: {wesadMod}, High: {wesadHigh}")
        
        samsungLow = (samsungLabels==0).sum()
        samsungMod = (samsungLabels==1).sum()
        samsungHigh = (samsungLabels==2).sum()
        print(f"Samsung distribution - Low: {samsungLow}, Moderate: {samsungMod}, High: {samsungHigh}")
        
        # split wesad for validation
        xTrain, xVal, yTrain, yVal = train_test_split(
            wesadFeatures, wesadLabels, test_size=0.2, random_state=42, stratify=wesadLabels
        )
        
        # scale the features
        xTrain = self.scaler.fit_transform(xTrain)
        xVal = self.scaler.transform(xVal)
        
        # train on wesad
        print("\nTraining on WESAD...")
        self.model = xgb.XGBClassifier(
            n_estimators=100,
            max_depth=5,
            learning_rate=0.1,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            eval_metric='mlogloss',
            num_class=3
        )
        
        self.model.fit(xTrain, yTrain, verbose=False)
        
        # check validation accuracy
        predVal = self.model.predict(xVal)
        valAcc = (predVal == yVal).mean()
        print(f"Validation accuracy: {valAcc:.3f}")
        
        #finetune on samsung 
        print("\nFine-tuning on Samsung...")
        samsungScaled = self.scaler.transform(samsungFeatures)
        
        self.model.learning_rate = 0.05
        self.model.n_estimators = 50
        existingModel = self.model.get_booster()
        self.model.fit(samsungScaled, samsungLabels, verbose=False, xgb_model=existingModel)
        
        # evaluate final model
        predSamsung = self.model.predict(samsungScaled)
        probaSamsung = self.model.predict_proba(samsungScaled)
        
        print("\n--- Final Results (Samsung) ---")
        uniqueClasses = np.unique(samsungLabels)
        
        if len(uniqueClasses) >= 2:
            classNames = ['Low', 'Moderate', 'High']
            targetNames = [classNames[int(i)] for i in uniqueClasses]
            print(classification_report(samsungLabels, predSamsung, target_names=targetNames, labels=uniqueClasses))
            print(f"\nConfusion Matrix:\n{confusion_matrix(samsungLabels, predSamsung)}")
            
            try:
                aucScore = roc_auc_score(samsungLabels, probaSamsung, multi_class='ovr', average='weighted')
                print(f"AUC: {aucScore:.3f}")
            except:
                pass
        else:
            print(f"Only {len(uniqueClasses)} class found")
            print(f"Accuracy: {(predSamsung==samsungLabels).mean():.3f}")
        
        print(f"\nFeature importance: SDNN={self.model.feature_importances_[0]:.3f}, RMSSD={self.model.feature_importances_[1]:.3f}")
    
    def trainSamsungOnly(self, samsungData):
        """train using only samsung data"""
        self.useHybrid = False
        print("\n=== Samsung-Only Training ===")
        
        features = samsungData[['sdnn', 'rmssd']].values
        labels = samsungData['stressCategory'].values
        
        lowCount = (labels==0).sum()
        moderateCount = (labels==1).sum()
        highCount = (labels==2).sum()
        print(f"Distribution - Low: {lowCount}, Moderate: {moderateCount}, High: {highCount}")
        
        uniqueClasses = np.unique(labels)
        if len(uniqueClasses) < 2:
            print("ERROR: Need at least 2 classes")
            return
        
        # split data
        xTrain, xTest, yTrain, yTest = train_test_split(
            features, labels, test_size=0.2, random_state=42, stratify=labels
        )
        
        # scale features
        xTrain = self.scaler.fit_transform(xTrain)
        xTest = self.scaler.transform(xTest)
        
        # train model
        self.model = xgb.XGBClassifier(
            n_estimators=100,
            max_depth=5,
            learning_rate=0.1,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            eval_metric='mlogloss',
            num_class=3
        )
        
        self.model.fit(xTrain, yTrain, verbose=False)
        
        # evaluate
        pred = self.model.predict(xTest)
        proba = self.model.predict_proba(xTest)
        
        print("\n--- Results ---")
        testClasses = np.unique(yTest)
        
        if len(testClasses) >= 2:
            classNames = ['Low', 'Moderate', 'High']
            targetNames = [classNames[int(i)] for i in testClasses]
            print(classification_report(yTest, pred, target_names=targetNames, labels=testClasses))
            print(f"\nConfusion Matrix:\n{confusion_matrix(yTest, pred)}")
            
            try:
                aucScore = roc_auc_score(yTest, proba, multi_class='ovr', average='weighted')
                print(f"AUC: {aucScore:.3f}")
            except:
                pass
        else:
            print(f"Only {len(testClasses)} class in test")
            print(f"Accuracy: {(pred==yTest).mean():.3f}")
        
        print(f"\nFeature importance: SDNN={self.model.feature_importances_[0]:.3f}, RMSSD={self.model.feature_importances_[1]:.3f}")
    
    def saveModel(self):
        """save model and scaler to disk"""
        if self.useHybrid:
            modelPath = 'stressModelHybrid.pkl'
            scalerPath = 'scalerHybrid.pkl'
        else:
            modelPath = 'stressModelSamsung.pkl'
            scalerPath = 'scalerSamsung.pkl'
        
        joblib.dump(self.model, modelPath)
        joblib.dump(self.scaler, scalerPath)
        
        print(f"\nSaved model: {modelPath}")
        print(f"Saved scaler: {scalerPath}")


# main execution
if __name__ == '__main__':
    print("="*70)
    print("STRESS DETECTION MODEL TRAINING")
    print("="*70)
    
    # paths
    dataDirectory = r'D:\laiba\Desktop\HRClaude\jsons'
    
    # search for WESAD directory
    def findWesadDirectory():
        print("\nSearching for WESAD directory...")
        
        # check common locations first
        possiblePaths = [
            r'D:\laiba\Desktop\HRClaude\WESAD',
            r'D:\laiba\Desktop\WESAD',
            './WESAD',
            '../WESAD',
            '../../WESAD',
            os.path.join(os.getcwd(), 'WESAD'),
        ]
        
        for path in possiblePaths:
            if os.path.exists(path):
                print(f"Found WESAD at: {path}")
                return path
        
        # if not found, search from desktop
        try:
            desktop = os.path.join(os.path.expanduser('~'), 'Desktop')
            if os.path.exists(desktop):
                for root, dirs, files in os.walk(desktop):
                    if 'WESAD' in dirs:
                        wesadPath = os.path.join(root, 'WESAD')
                        print(f"Found WESAD at: {wesadPath}")
                        return wesadPath
        except:
            pass
        
        print("WESAD directory not found")
        return None
    
    wesadDirectory = findWesadDirectory()
    
    # load samsung files
    print("\nLoading Samsung Health files...")
    
    def load_all_samsung_files(data_directory):
        # Search recursively for HRV files
        hrv_files = glob.glob(data_directory + '/com.samsung.health.hrv/**/*.json', recursive=True)
        
        # Search recursively for stress files (shealth NOT health)
        stress_files = glob.glob(data_directory + '/com.samsung.shealth.stress/**/*.json', recursive=True)
        
        hr_files = []
        
        print(f"Found:")
        print(f"  Stress files: {len(stress_files)}")
        print(f"  HRV files: {len(hrv_files)}")
        
        stress_data = []
        for file in stress_files:
            try:
                with open(file, 'r') as f:
                    data = json.load(f)
                    if isinstance(data, list):
                        stress_data.extend(data)
                    else:
                        stress_data.append(data)
            except Exception as e:
                print(f"Error: {e}")
        
        hrv_data = []
        for file in hrv_files:
            if 'stress' in file.lower() or 'heart_rate' in file.lower():
                continue
            try:
                with open(file, 'r') as f:
                    data = json.load(f)
                    if isinstance(data, list):
                        hrv_data.extend(data)
                    else:
                        hrv_data.append(data)
            except Exception as e:
                print(f"Error: {e}")
        
        return stress_data, hrv_data, hr_files
    
    stressRecords, hrvRecords, hrFiles = load_all_samsung_files(dataDirectory)
    
    print(f"Loaded {len(hrvRecords)} HRV records, {len(stressRecords)} stress records")
    
    # process samsung data
    samsungProcessor = SamsungProcessor()
    samsungDf = samsungProcessor.loadAndProcess(hrvRecords, stressRecords)
    
    # try to load wesad
    wesadDf = None
    if wesadDirectory and os.path.exists(wesadDirectory):
        wesadProcessor = WesadProcessor()
        wesadDf = wesadProcessor.loadWesad(wesadDirectory)
    else:
        print(f"\nWESAD directory not available")
    
    # train the model
    print("\n" + "="*70)
    print("TRAINING")
    print("="*70)
    
    classifier = StressClassifier()
    
    if wesadDf is not None and len(wesadDf) > 0:
        classifier.trainHybridModel(samsungDf, wesadDf)
    else:
        classifier.trainSamsungOnly(samsungDf)
    
    classifier.saveModel()
    
    print("\n" + "="*70)
    print("DONE!")
    print("="*70)

STRESS DETECTION MODEL TRAINING

Searching for WESAD directory...
Found WESAD at: ./WESAD

Loading Samsung Health files...
Found:
  Stress files: 119
  HRV files: 35
Loaded 2807 HRV records, 5091 stress records

Processing Samsung Health data...
Stress range: 0.0 - 100.0
Categories: Low=1711, Moderate=1705, High=1675
Merged samples: 855
  Low: 419, Moderate: 229, High: 207

Loading WESAD dataset...
WESAD samples: 0


KeyError: 'stressCategory'

In [70]:
import json
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import joblib
from datetime import datetime
from pathlib import Path
import glob

class SamsungDataLoader:
    
    @staticmethod
    def load_all_samsung_files(data_directory):
        # Search recursively for HRV files
        hrv_files = glob.glob(data_directory + '/com.samsung.health.hrv/**/*.json', recursive=True)
        
        # Search recursively for stress files (shealth NOT health)
        stress_files = glob.glob(data_directory + '/com.samsung.shealth.stress/**/*.json', recursive=True)
        
        hr_files = []
        
        print(f"Found:")
        print(f"  Stress files: {len(stress_files)}")
        print(f"  HRV files: {len(hrv_files)}")
        
        stress_data = []
        for file in stress_files:
            try:
                with open(file, 'r') as f:
                    data = json.load(f)
                    if isinstance(data, list):
                        stress_data.extend(data)
                    else:
                        stress_data.append(data)
            except Exception as e:
                print(f"Error: {e}")
        
        hrv_data = []
        for file in hrv_files:
            if 'stress' in file.lower() or 'heart_rate' in file.lower():
                continue
            try:
                with open(file, 'r') as f:
                    data = json.load(f)
                    if isinstance(data, list):
                        hrv_data.extend(data)
                    else:
                        hrv_data.append(data)
            except Exception as e:
                print(f"Error: {e}")
        
        return stress_data, hrv_data, hr_files
    
    @staticmethod
    def process_stress_data(stress_data):
        df = pd.DataFrame(stress_data)
        df['timestamp'] = pd.to_datetime(df['start_time'], unit='ms')
        
        df['stress_normalized'] = df['score'] / df['score'].max()
        median = df['score'].median()
        df['stress_binary'] = (df['score'] > median).astype(int)
        
        print(f"Stress data:")
        print(f"  Samples: {len(df)}")
        print(f"  Score range: {df['score'].min():.1f} - {df['score'].max():.1f}")
        print(f"  High stress: {df['stress_binary'].sum()}")
        print(f"  Low stress: {(df['stress_binary'] == 0).sum()}")
        
        return df[['timestamp', 'score', 'stress_binary', 'stress_normalized']]
    
    @staticmethod
    def process_hrv_data(hrv_data):
        df = pd.DataFrame(hrv_data)
        df['timestamp'] = pd.to_datetime(df['start_time'], unit='ms')
        
        if 'sdnn' not in df.columns:
            df['sdnn'] = np.nan
        if 'rmssd' not in df.columns:
            df['rmssd'] = np.nan
        
        print(f"HRV data:")
        print(f"  Samples: {len(df)}")
        print(f"  SDNN range: {df['sdnn'].min():.1f} - {df['sdnn'].max():.1f}")
        print(f"  RMSSD range: {df['rmssd'].min():.1f} - {df['rmssd'].max():.1f}")
        
        return df[['timestamp', 'sdnn', 'rmssd']]
    
    @staticmethod
    def merge_stress_and_hrv(df_stress, df_hrv):
        df_stress['timestamp_rounded'] = df_stress['timestamp'].dt.round('1min')
        df_hrv['timestamp_rounded'] = df_hrv['timestamp'].dt.round('1min')
        
        merged = pd.merge_asof(
            df_hrv.sort_values('timestamp'),
            df_stress.sort_values('timestamp'),
            on='timestamp',
            direction='nearest',
            tolerance=pd.Timedelta('1hr')
        )
        
        merged = merged.dropna(subset=['stress_binary'])
        merged = merged.dropna(subset=['sdnn', 'rmssd'])
        
        print(f"Merged data:")
        print(f"  Matched samples: {len(merged)}")
        print(f"  Time span: {merged['timestamp'].min()} to {merged['timestamp'].max()}")
        print(f"  Duration: {(merged['timestamp'].max() - merged['timestamp'].min()).days} days")
        
        return merged[['timestamp', 'sdnn', 'rmssd', 'stress_binary', 'score']]


class SamsungStressClassifier:
    def __init__(self):
        self.scaler = StandardScaler()
        self.model = None
        self.feature_names = ['sdnn', 'rmssd']
    
    def train(self, X, y, test_size=0.2):
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=42, stratify=y
        )
        
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        
        self.model = xgb.XGBClassifier(
            n_estimators=100,
            max_depth=5,
            learning_rate=0.1,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            eval_metric='logloss'
        )
        
        self.model.fit(X_train_scaled, y_train, verbose=False)
        
        y_pred = self.model.predict(X_test_scaled)
        y_pred_proba = self.model.predict_proba(X_test_scaled)[:, 1]
        
        print("\nMODEL EVALUATION")
        print("="*60)
        
        unique_classes = np.unique(y_test)
        if len(unique_classes) > 1:
            target_names = ['Low', 'Moderate', 'High']
            print(classification_report(y_test, y_pred, target_names=target_names, labels=unique_classes))
            print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}")
            print(f"ROC-AUC Score: {roc_auc_score(y_test, y_pred_proba, multi_class='ovr'):.3f}")
        else:
            print(f"Warning: Only 1 class in test set")
            print(f"Accuracy: {(y_pred == y_test).mean():.3f}")
        
        print("\nFeature Importance:")
        for feat, imp in zip(self.feature_names, self.model.feature_importances_):
            print(f"  {feat}: {imp:.4f}")
        
        fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
        plt.figure(figsize=(8, 6))
        plt.plot(fpr, tpr, label=f'ROC Curve')
        plt.plot([0, 1], [0, 1], 'k--', label='Random')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curve')
        plt.legend()
        plt.savefig('roc_curve.png', dpi=150)
        plt.close()
        print("\nROC curve saved")
        
        return self.model
    
    def predict(self, X_new):
        if self.model is None:
            raise ValueError("Model not trained!")
        X_scaled = self.scaler.transform(X_new)
        predictions = self.model.predict(X_scaled)
        probabilities = self.model.predict_proba(X_scaled)[:, 1]
        return predictions, probabilities
    
    def save_model(self, model_path='stress_model.pkl', scaler_path='scaler.pkl'):
        joblib.dump(self.model, model_path)
        joblib.dump(self.scaler, scaler_path)
        print(f"\nModel saved to {model_path}")
        print(f"Scaler saved to {scaler_path}")


if __name__ == '__main__':
    print("Samsung Health Stress Classification")
    print("="*60)
    
    loader = SamsungDataLoader()
    data_dir = r'D:\laiba\Desktop\HRClaude\jsons'    
    stress_data, hrv_data, hr_data = loader.load_all_samsung_files(data_dir)
    
    
    print("\nProcessing data...")
    df_stress = loader.process_stress_data(stress_data)
    df_hrv = loader.process_hrv_data(hrv_data)
    
    print("\nMerging...")
    df = loader.merge_stress_and_hrv(df_stress, df_hrv)
    
    print("\nTraining...")
    X = df[['sdnn', 'rmssd']].values
    y = df['stress_binary'].values
    
    classifier = SamsungStressClassifier()
    classifier.train(X, y)
    classifier.save_model()
    
    print("\nTRAINING COMPLETE!")

Samsung Health Stress Classification
Found:
  Stress files: 119
  HRV files: 35

Processing data...
Stress data:
  Samples: 5091
  Score range: 0.0 - 100.0
  High stress: 2512
  Low stress: 2579
HRV data:
  Samples: 2807
  SDNN range: 30.8 - 174.1
  RMSSD range: 25.6 - 133.1

Merging...
Merged data:
  Matched samples: 855
  Time span: 2025-12-26 06:48:02.825000 to 2026-01-02 00:36:18.909000
  Duration: 6 days

Training...

MODEL EVALUATION
              precision    recall  f1-score   support

         Low       0.81      0.89      0.85       117
    Moderate       0.69      0.54      0.60        54

    accuracy                           0.78       171
   macro avg       0.75      0.71      0.72       171
weighted avg       0.77      0.78      0.77       171

Confusion Matrix:
[[104  13]
 [ 25  29]]
ROC-AUC Score: 0.859

Feature Importance:
  sdnn: 0.4425
  rmssd: 0.5575

ROC curve saved

Model saved to stress_model.pkl
Scaler saved to scaler.pkl

TRAINING COMPLETE!


