## 1


In [7]:
import json
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import joblib
from datetime import datetime, timedelta
from pathlib import Path
import glob
import pickle
from scipy import signal
import os

## 2


Only run if retraining model with new data or such

In [3]:
# WESAD dataset loader

class WesadDataLoader:
    
    @staticmethod
    def calculateHrv(rrIntervals):
        if len(rrIntervals) < 2:
            return None, None, None, None
        
        rrIntervals = rrIntervals[(rrIntervals > 300) & (rrIntervals < 2000)]
        
        if len(rrIntervals) < 2:
            return None, None, None, None
        
        sdnn = np.std(rrIntervals)
        diffRr = np.diff(rrIntervals)
        rmssd = np.sqrt(np.mean(diffRr ** 2))
        meanHr = 60000 / np.mean(rrIntervals)  # heart rate
        pnn50 = np.sum(np.abs(diffRr) > 50) / len(diffRr) * 100  # stress indicator
        
        return sdnn, rmssd, meanHr, pnn50
    
    @staticmethod
    def getRrFromEcg(ecgSignal, fs=700):
        nyq = fs / 2
        low = 0.5 / nyq
        high = 40 / nyq
        b, a = signal.butter(4, [low, high], btype='band')
        ecgFilt = signal.filtfilt(b, a, ecgSignal)
        
        ecgNorm = (ecgFilt - np.mean(ecgFilt)) / np.std(ecgFilt)
        
        minDist = int(0.4 * fs)
        thresh = np.mean(ecgNorm) + 0.5 * np.std(ecgNorm)
        peaks = signal.find_peaks(ecgNorm, height=thresh, distance=minDist)[0]
        
        if len(peaks) < 2:
            return None
        
        rr = np.diff(peaks) / fs * 1000
        
        return rr
    
    @staticmethod
    def loadWesadSubject(subjectPath):
        with open(subjectPath, 'rb') as f:
            data = pickle.load(f, encoding='latin1')
        
        if 'signal' not in data or 'chest' not in data['signal']:
            return None
        
        chest = data['signal']['chest']
        labels = data['label']
        
        if isinstance(chest, dict):
            ecg = chest['ECG'].flatten() if 'ECG' in chest else None
        else:
            ecg = chest[:, 0] if len(chest.shape) > 1 else chest
            
        if ecg is None:
            return None
        
        fs = 700
        
        return ecg, labels, fs
    
    @staticmethod
    def processWesadData(wesadDirectory):
        subjectFiles = glob.glob(f"{wesadDirectory}/**/S*.pkl", recursive=True)
        
        print(f"\nFound {len(subjectFiles)} WESAD subjects")
        
        if len(subjectFiles) == 0:
            return None
        
        features = []
        
        for subjFile in subjectFiles:
            subj = Path(subjFile).stem
            print(f"Processing {subj}...")
            
            result = WesadDataLoader.loadWesadSubject(subjFile)
            if result is None:
                continue
            
            ecg, labels, fs = result
            
            winSize = 60 * fs
            step = winSize // 2
            
            for i in range((len(ecg) - winSize) // step + 1):
                start = i * step
                end = start + winSize
                
                if end > len(ecg):
                    break
                
                ecgWin = ecg[start:end]
                labelWin = labels[start:end]
                
                vals, counts = np.unique(labelWin, return_counts=True)
                label = vals[np.argmax(counts)]
                
                if label == 0:
                    continue
                elif label == 1:
                    stress = 0
                elif label == 2:
                    stress = 2
                elif label == 3:
                    stress = 1
                elif label == 4:
                    stress = 0
                else:
                    continue
                
                rr = WesadDataLoader.getRrFromEcg(ecgWin, fs)
                
                if rr is None or len(rr) < 10:
                    continue
                
                sdnn, rmssd, meanHr, pnn50 = WesadDataLoader.calculateHrv(rr)
                
                if sdnn is None:
                    continue
                
                features.append({
                    'subject': subj,
                    'sdnn': sdnn,
                    'rmssd': rmssd,
                    'meanHr': meanHr,
                    'pnn50': pnn50,
                    'stressClass': stress,
                    'source': 'WESAD'
                })
        
        df = pd.DataFrame(features)
        
        print(f"\nWESAD data:")
        print(f"  Total samples: {len(df)}")
        print(f"  Low: {(df['stressClass'] == 0).sum()}")
        print(f"  Moderate: {(df['stressClass'] == 1).sum()}")
        print(f"  High: {(df['stressClass'] == 2).sum()}")
        print(f"  SDNN range: {df['sdnn'].min():.1f} - {df['sdnn'].max():.1f}")
        print(f"  RMSSD range: {df['rmssd'].min():.1f} - {df['rmssd'].max():.1f}")
        
        return df


In [5]:
# Samsung health data loader

class SamsungDataLoader:
    
    @staticmethod
    def loadAllSamsungFiles(dataDirectory):
        hrvFiles = glob.glob(dataDirectory + '/com.samsung.health.hrv/**/*.json', recursive=True)
        stressFiles = glob.glob(dataDirectory + '/com.samsung.shealth.stress/**/*.json', recursive=True)
        
        print(f"Found:")
        print(f"  Stress files: {len(stressFiles)}")
        print(f"  HRV files: {len(hrvFiles)}")
        
        stressData = []
        for file in stressFiles:
            with open(file, 'r') as f:
                data = json.load(f)
                if isinstance(data, list):
                    stressData.extend(data)
                else:
                    stressData.append(data)
        
        hrvData = []
        for file in hrvFiles:
            if 'stress' in file.lower() or 'heart_rate' in file.lower():
                continue
            with open(file, 'r') as f:
                data = json.load(f)
                if isinstance(data, list):
                    hrvData.extend(data)
                else:
                    hrvData.append(data)
        
        return stressData, hrvData
    
    @staticmethod
    def processStressData(stressData):
        df = pd.DataFrame(stressData)
        df['timestamp'] = pd.to_datetime(df['start_time'], unit='ms')
        
        df['stressNormalized'] = df['score'] / df['score'].max()
        
        lowThresh = df['score'].quantile(0.33)
        highThresh = df['score'].quantile(0.66)
        
        df['stressClass'] = 1
        df.loc[df['score'] <= lowThresh, 'stressClass'] = 0
        df.loc[df['score'] > highThresh, 'stressClass'] = 2
        
        print(f"Stress data:")
        print(f"  Samples: {len(df)}")
        print(f"  Score range: {df['score'].min():.1f} - {df['score'].max():.1f}")
        print(f"  Low stress: {(df['stressClass'] == 0).sum()}")
        print(f"  Moderate stress: {(df['stressClass'] == 1).sum()}")
        print(f"  High stress: {(df['stressClass'] == 2).sum()}")
        
        return df[['timestamp', 'score', 'stressClass', 'stressNormalized']]
    
    @staticmethod
    def processHrvData(hrvData):
        df = pd.DataFrame(hrvData)
        df['timestamp'] = pd.to_datetime(df['start_time'], unit='ms')
        
        if 'sdnn' not in df.columns:
            df['sdnn'] = np.nan
        if 'rmssd' not in df.columns:
            df['rmssd'] = np.nan
        
        print(f"HRV data:")
        print(f"  Samples: {len(df)}")
        print(f"  SDNN range: {df['sdnn'].min():.1f} - {df['sdnn'].max():.1f}")
        print(f"  RMSSD range: {df['rmssd'].min():.1f} - {df['rmssd'].max():.1f}")
        
        return df[['timestamp', 'sdnn', 'rmssd']]
    
    @staticmethod
    def mergeStressAndHrv(dfStress, dfHrv):
        dfStress['timestampRounded'] = dfStress['timestamp'].dt.round('1min')
        dfHrv['timestampRounded'] = dfHrv['timestamp'].dt.round('1min')
        
        merged = pd.merge_asof(
            dfHrv.sort_values('timestamp'),
            dfStress.sort_values('timestamp'),
            on='timestamp',
            direction='nearest',
            tolerance=pd.Timedelta('1hr')
        )
        
        merged = merged.dropna(subset=['stressClass'])
        merged = merged.dropna(subset=['sdnn', 'rmssd'])
        
        print(f"Merged data:")
        print(f"  Matched samples: {len(merged)}")
        print(f"  Time span: {merged['timestamp'].min()} to {merged['timestamp'].max()}")
        print(f"  Duration: {(merged['timestamp'].max() - merged['timestamp'].min()).days} days")
        
        return merged[['timestamp', 'sdnn', 'rmssd', 'stressClass', 'score']]




In [7]:
# model training class

class StressClassifier:
    def __init__(self):
        self.scaler = StandardScaler()
        self.model = None
        self.features = ['sdnn', 'rmssd', 'meanHr', 'pnn50']  # add new features
    
    def train(self, X, y, testSize=0.2):
        xTrain, xTest, yTrain, yTest = train_test_split(
            X, y, test_size=testSize, random_state=42, stratify=y
        )
        
        xTrainScaled = self.scaler.fit_transform(xTrain)
        xTestScaled = self.scaler.transform(xTest)
        
        # calculate sample weights
        #from sklearn.utils.class_weight import compute_sample_weight
        #sampleWeights = compute_sample_weight('balanced', yTrain)
        from sklearn.utils.class_weight import compute_class_weight
        classWeights = compute_class_weight('balanced', classes=np.unique(yTrain), y=yTrain)
        classWeightDict = {0: classWeights[0], 1: classWeights[1]*2, 2: classWeights[2]}
        sampleWeights = np.array([classWeightDict[y] for y in yTrain])
        self.model = xgb.XGBClassifier(
            n_estimators=100,
            max_depth=5,
            learning_rate=0.1,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            num_class=3
        )
        
        # add sample_weight here
        self.model.fit(xTrainScaled, yTrain, sample_weight=sampleWeights, verbose=False)

        
        yPred = self.model.predict(xTestScaled)
        yProba = self.model.predict_proba(xTestScaled)
        
        print("\nEVALUATION")
        print("="*50)
        
        labels = ['Low', 'Moderate', 'High']
        print(classification_report(yTest, yPred, target_names=labels))
        
        print(f"\nConfusion Matrix:")
        print(confusion_matrix(yTest, yPred))
        
        auc = roc_auc_score(yTest, yProba, multi_class='ovr', average='macro')
        print(f"\nROC-AUC: {auc:.3f}")
        
        print("\nFeature importance:")
        for f, imp in zip(self.features, self.model.feature_importances_):
            print(f"  {f}: {imp:.3f}")
        
        cm = confusion_matrix(yTest, yPred)
        plt.figure(figsize=(8, 6))
        plt.imshow(cm, cmap='Blues')
        plt.colorbar()
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.title('Confusion Matrix')
        plt.xticks([0,1,2], labels)
        plt.yticks([0,1,2], labels)
        
        for i in range(3):
            for j in range(3):
                plt.text(j, i, cm[i,j], ha='center', va='center')
        
        plt.savefig('confusion.png', dpi=150)
        plt.close()
        print("\nSaved confusion.png")
        
        return self.model
    
    def predict(self, xNew):
        xScaled = self.scaler.transform(xNew)
        preds = self.model.predict(xScaled)
        proba = self.model.predict_proba(xScaled)
        return preds, proba
    
    def saveModel(self, modelFile='stress_model.pkl', scalerFile='scaler.pkl'):
        joblib.dump(self.model, modelFile)
        joblib.dump(self.scaler, scalerFile)
        print(f"\nSaved {modelFile} and {scalerFile}")


## 3


Train New Model

In [None]:
# main script

if __name__ == '__main__':
    print("Stress Classification - 3 Classes")
    print("="*50)
    
    samsungDir = r'D:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver2\HRVModule\XGBoost\jsons'
    wesadDir = r'D:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver2\HRVModule\XGBoost\WESAD'
    
    useSamsung = False
    useWesad = True
    
    allData = []
    
    if useSamsung:
        print("\nLoading Samsung data...")
        loader = SamsungDataLoader()
        stressData, hrvData = loader.loadAllSamsungFiles(samsungDir)
        
        if stressData and hrvData:
            dfStress = loader.processStressData(stressData)
            dfHrv = loader.processHrvData(hrvData)
            dfSamsung = loader.mergeStressAndHrv(dfStress, dfHrv)
            
            dfSamsung['source'] = 'Samsung'
            allData.append(dfSamsung[['sdnn', 'rmssd', 'stressClass', 'source']])
            print(f"Added {len(dfSamsung)} Samsung samples")
    
    if useWesad:
        print("\nLoading WESAD data...")
        wesadLoader = WesadDataLoader()
        dfWesad = wesadLoader.processWesadData(wesadDir)
        
        if dfWesad is not None and len(dfWesad) > 0:
            allData.append(dfWesad[['sdnn', 'rmssd', 'meanHr', 'pnn50', 'stressClass', 'source']])    
    print("\nCombining datasets...")
    df = pd.concat(allData, ignore_index=True)
    
    print(f"\nTotal samples: {len(df)}")
    for src in df['source'].unique():
        srcDf = df[df['source'] == src]
        print(f"\n{src}:")
        print(f"  Samples: {len(srcDf)}")
        print(f"  Low: {(srcDf['stressClass'] == 0).sum()}")
        print(f"  Moderate: {(srcDf['stressClass'] == 1).sum()}")
        print(f"  High: {(srcDf['stressClass'] == 2).sum()}")
    
    print("\nTraining model...")
    X = df[['sdnn', 'rmssd', 'meanHr', 'pnn50']].values
    y = df['stressClass'].values
    
    clf = StressClassifier()
    clf.train(X, y)
    clf.saveModel()
    
    print("\nDONE!")

In [6]:
print("Looking for model files...")

# find model files
startPath = os.path.abspath('../../')
modelPath = None
scalerPath = None

for root, dirs, files in os.walk(startPath):
    if 'stress_model.pkl' in files:
        modelPath = os.path.join(root, 'stress_model.pkl')
        print(f"Found model at: {modelPath}")
    if 'scaler.pkl' in files:
        scalerPath = os.path.join(root, 'scaler.pkl')
        print(f"Found scaler at: {scalerPath}")

print("Loading model...")
model = joblib.load(modelPath)
print("Model loaded")

print("Loading scaler...")
scaler = joblib.load(scalerPath)
print("Scaler loaded")

print("All files loaded successfully")

# show file locations
print("\n" + "="*60)
print("MODEL FILE LOCATIONS:")
print("="*60)
print(f"Model file: {modelPath}")
print(f"Scaler file: {scalerPath}")
print("="*60 + "\n")

Looking for model files...
Found model at: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver1 on git\HRVModule\X2\XGBoost\stress_model.pkl
Found scaler at: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver1 on git\HRVModule\X2\XGBoost\scaler.pkl
Found model at: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver1 on git\HRVModule\X2\XGBoost\Others\stress_model.pkl
Found scaler at: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver1 on git\HRVModule\X2\XGBoost\Others\scaler.pkl
Found model at: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver1 on git\HRVModule\XGBoost\stress_model.pkl
Found scaler at: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver1 on git\HRVModule\XGBoost\scaler.pkl
Found model at: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver1 on git\HRVModule\XGBoost\Others\stress_model.pkl
Found scaler at: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver1 on git\HRVModule\XGBoost\Others\scaler.pkl
Found model at: d:\laiba\

## 4 

Predictor; Need to run

In [55]:
#StressPredictor Class

class StressPredictor:
    
    def __init__(self, modelFile='stress_model.pkl', scalerFile='scaler.pkl', db=None):
        self.model = joblib.load(modelFile)
        self.scaler = joblib.load(scalerFile)
        self.db = db
        
        self.minConfidence = 0.60
        self.minSdnn = 10
        self.maxSdnn = 200
        self.minRmssd = 5
        self.maxRmssd = 150
        
        
        print("Model loaded")
    
    def predictSingle(self, sdnn, rmssd, meanHr, pnn50, showDetails=True):
        if not (self.minSdnn <= sdnn <= self.maxSdnn):
            return {'error': f'Invalid SDNN: {sdnn:.2f}'}
        if not (self.minRmssd <= rmssd <= self.maxRmssd):
            return {'error': f'Invalid RMSSD: {rmssd:.2f}'}
        
        X = np.array([[sdnn, rmssd, meanHr, pnn50]])
        xScaled = self.scaler.transform(X)
        
        pred = self.model.predict(xScaled)[0]
        proba = self.model.predict_proba(xScaled)[0]
        
        stressLabels = {0: 'LOW', 1: 'MODERATE', 2: 'HIGH'}
        confidence = proba.max()
        
        result = {
            'stressLevel': int(pred),
            'stressLabel': stressLabels[pred],
            'confidence': float(confidence),
            'showToUser': confidence >= self.minConfidence,
            'probabilities': {
                'class_0_low': float(proba[0]),
                'class_1_medium': float(proba[1]),
                'class_2_high': float(proba[2])
            }
        }
        
        if showDetails:
            print(f"\nInput: SDNN={sdnn:.2f}, RMSSD={rmssd:.2f}, MeanHR={meanHr:.2f}, PNN50={pnn50:.2f}")
            print(f"Predicted: {result['stressLabel']} (confidence: {confidence:.1%})")
            print(f"Show to user: {'YES' if result['showToUser'] else 'NO'}")
        
        return result
    
    def calculateHrvFromIbi(self, ibiData):
        if len(ibiData) < 2:
            return None
        
        ibi = np.array(ibiData)
        
        ibi = ibi[(ibi > 300) & (ibi < 2000)]
        
        if len(ibi) < 2:
            return None
        
        sdnn = np.std(ibi)
        rmssd = np.sqrt(np.mean(np.diff(ibi) ** 2))
        meanHr = 60000 / np.mean(ibi)
        diffIbi = np.diff(ibi)
        pnn50 = np.sum(np.abs(diffIbi) > 50) / len(diffIbi) * 100 if len(diffIbi) > 0 else 0
        
        return {
            'sdnn': sdnn,
            'rmssd': rmssd,
            'meanHr': meanHr,
            'pnn50': pnn50
        }
    
    def runBatch(self, hours=1):
        if self.db is None:
            print("No Firebase connection")
            return []
        
        print(f"\n{'='*60}")
        print(f"BATCH PROCESSING - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"{'='*60}")
        
        cutoff = datetime.now() - timedelta(hours=hours)
        print(f"Processing data from last {hours} hour(s)")
        print(f"Cutoff time: {cutoff.strftime('%Y-%m-%d %H:%M:%S')}")
        
        try:
            wearableRef = self.db.collection('heart_rate_data')
            
            docs = wearableRef.where('timestamp', '>=', cutoff).stream()
            
            processed = 0
            saved = 0
            errors = 0
            
            for doc in docs:
                try:
                    data = doc.to_dict()
                    
                    if 'ibi' in data and isinstance(data['ibi'], list) and len(data['ibi']) > 0:
                        hrv = self.calculateHrvFromIbi(data['ibi'])
                        
                        if hrv is None:
                            errors += 1
                            continue
                        
                        prediction = self.predictSingle(
                            hrv['sdnn'], 
                            hrv['rmssd'], 
                            hrv['meanHr'], 
                            hrv['pnn50'],
                            showDetails=False
                        )
                    
                    elif all(k in data for k in ['sdnn', 'rmssd', 'meanHr', 'pnn50']):
                        prediction = self.predictSingle(
                            data['sdnn'],
                            data['rmssd'],
                            data['meanHr'],
                            data['pnn50'],
                            showDetails=False
                        )
                    
                    elif 'heart_rate' in data:
                        hr = data['heart_rate']
                        
                        if hr < 70:
                            sdnn, rmssd, pnn50 = 80, 65, 30
                        elif hr < 90:
                            sdnn, rmssd, pnn50 = 48, 38, 15
                        else:
                            sdnn, rmssd, pnn50 = 15, 10, 5
                        
                        prediction = self.predictSingle(
                            sdnn, rmssd, hr, pnn50,
                            showDetails=False
                        )
                    else:
                        errors += 1
                        continue
                    
                    if 'error' in prediction:
                        errors += 1
                        continue
                    
                    self.db.collection('stress_predictions2').add({
                        'userId': 'QOyZROlPzUf25tKPvv0FWnd3NZw2',
                        'userEmail': 'test@email.com',
                        'stress_probabilities': prediction['probabilities'],
                        'stressLevel': prediction['stressLevel'],
                        'stressLabel': prediction['stressLabel'],
                        'confidence': prediction['confidence'],
                        'prediction_timestamp': firestore.SERVER_TIMESTAMP,
                        'source': 'batch_prediction',
                        'source_doc_id': doc.id
                    })
                    
                    saved += 1
                    processed += 1
                    
                except Exception as e:
                    errors += 1
                    print(f"Error processing document {doc.id}: {e}")
                    continue
            
            print(f"\nBATCH RESULTS:")
            print(f"  Processed: {processed}")
            print(f"  Saved: {saved}")
            print(f"  Errors: {errors}")
            print(f"{'='*60}\n")
            
            return {
                'processed': processed,
                'saved': saved,
                'errors': errors,
                'timestamp': datetime.now()
            }
            
        except Exception as e:
            print(f"Batch processing failed: {e}")
            import traceback
            traceback.print_exc()
            return {'error': str(e)}
    
    def fetchAndPredictFromFirebase(self, hours=1):
        return self.runBatch(hours=hours)

## 5

Firebase


In [12]:
"""
Stress Prediction System - NO USER ID REQUIRED
Processes ALL heart_rate_data without user filtering
"""

from datetime import datetime, timedelta
import joblib
import numpy as np


class StressPredictor:
    """Stress predictor - processes ALL data regardless of user"""
    
    def __init__(self, model_file, scaler_file, db, default_user_id='QOyZROlPzUf25tKPvv0FWnd3NZw2', default_email='user@healmind.com'):
        self.model = joblib.load(model_file)
        self.scaler = joblib.load(scaler_file)
        self.db = db
        self.default_user_id = default_user_id
        self.default_email = default_email
        print("Model and scaler loaded successfully")
        print(f"Default user: {default_email} (ID: {default_user_id})")
    
    def calculate_hrv_from_ibi(self, ibi_data):
        """Calculate HRV metrics from IBI data"""
        if not isinstance(ibi_data, list) or len(ibi_data) < 2:
            return None
        
        ibi = np.array(ibi_data)
        
        # Filter valid IBI values (300-2000ms)
        ibi = ibi[(ibi > 300) & (ibi < 2000)]
        
        if len(ibi) < 2:
            return None
        
        # Calculate HRV metrics (4 features to match model training)
        rmssd = np.sqrt(np.mean(np.diff(ibi) ** 2))
        sdnn = np.std(ibi)
        mean_hr = 60000 / np.mean(ibi) if np.mean(ibi) > 0 else 0
        
        # Calculate pNN50 (4th feature) - percentage of successive RR intervals that differ by more than 50ms
        diff_ibi = np.abs(np.diff(ibi))
        pnn50 = (np.sum(diff_ibi > 50) / len(diff_ibi) * 100) if len(diff_ibi) > 0 else 0
        
        return {
            'rmssd': float(rmssd),
            'sdnn': float(sdnn),
            'mean_hr': float(mean_hr),
            'pnn50': float(pnn50)
        }
    
    def predict_stress(self, hrv_metrics):
        """Predict stress level from HRV metrics"""
        if hrv_metrics is None:
            return None
        
        # Prepare features (4 features to match model training)
        features = np.array([[
            hrv_metrics['rmssd'],
            hrv_metrics['sdnn'],
            hrv_metrics['mean_hr'],
            hrv_metrics['pnn50']
        ]])
        
        # Scale and predict
        features_scaled = self.scaler.transform(features)
        probabilities = self.model.predict_proba(features_scaled)[0]
        prediction = self.model.predict(features_scaled)[0]
        
        stress_labels = ['LOW', 'MEDIUM', 'HIGH']
        
        return {
            'stressLevel': int(prediction),
            'stressLabel': stress_labels[int(prediction)],
            'confidence': float(max(probabilities)),
            'probabilities': {
                'class_0_low': float(probabilities[0]),
                'class_1_medium': float(probabilities[1]),
                'class_2_high': float(probabilities[2])
            }
        }
    
    def run_batch(self, hours=24, verbose=True):
        """Process ALL documents from heart_rate_data - NO USER FILTER"""
        cutoff = datetime.now() - timedelta(hours=hours)
        
        if verbose:
            print(f"\nBATCH PROCESSING - ALL DATA")
            print(f"Time window: Last {hours} hours")
            print(f"Cutoff: {cutoff}\n")
        
        try:
            # Get ALL documents - NO userId filter
            print(f"Querying heart_rate_data collection...")
            docs = self.db.collection('heart_rate_data')\
                .where('timestamp', '>=', cutoff)\
                .stream()
            
            docs_list = list(docs)
            total_docs = len(docs_list)
            
            print(f"Found {total_docs} documents\n")
            
            if total_docs == 0:
                print(f"NO DOCUMENTS FOUND in the last {hours} hours!")
                return {'processed': 0, 'skipped': 0, 'errors': 0, 'total': 0}
            
            processed = 0
            skipped = 0
            errors = 0
            
            for idx, doc in enumerate(docs_list, 1):
                try:
                    data = doc.to_dict()
                    
                    # Extract IBI data
                    ibi_data = data.get('ibi', [])
                    
                    if not ibi_data:
                        skipped += 1
                        continue
                    
                    # Calculate HRV
                    hrv_metrics = self.calculate_hrv_from_ibi(ibi_data)
                    
                    if hrv_metrics is None:
                        skipped += 1
                        continue
                    
                    # Predict stress
                    prediction = self.predict_stress(hrv_metrics)
                    
                    if prediction is None:
                        skipped += 1
                        continue
                    
                    # Save prediction - use default userId and email
                    self.db.collection('stress_predictions2').add({
                        'userId': self.default_user_id,
                        'userEmail': self.default_email,
                        'stress_probabilities': prediction['probabilities'],
                        'stressLevel': prediction['stressLevel'],
                        'stressLabel': prediction['stressLabel'],
                        'confidence': prediction['confidence'],
                        'prediction_timestamp': firestore.SERVER_TIMESTAMP,
                        'source': 'batch_prediction',
                        'source_doc_id': doc.id,
                        'hrv_metrics': hrv_metrics
                    })
                    
                    processed += 1
                    
                    # Progress indicator
                    if verbose and processed % 50 == 0:
                        print(f"  Progress: {idx}/{total_docs} docs | {processed} processed | {skipped} skipped")
                    
                except Exception as e:
                    if verbose:
                        print(f"  Error on doc {doc.id}: {str(e)}")
                    errors += 1
            
            result = {
                'processed': processed,
                'skipped': skipped,
                'errors': errors,
                'total': total_docs
            }
            
            print(f"\nBATCH PROCESSING COMPLETE")
            print(f"  Successfully processed: {processed}")
            print(f"  Skipped (insufficient data): {skipped}")
            print(f"  Errors: {errors}")
            print(f"  Total documents checked: {total_docs}")
            print(f"  Success rate: {(processed/total_docs*100):.1f}%\n")
            
            if processed > 0:
                print(f"{processed} predictions saved to 'stress_predictions2' collection!")
            else:
                print(f"No predictions were saved. Check the skipped count above.")
            
            return result
            
        except Exception as e:
            print(f"\nFATAL ERROR in batch processing")
            print(f"   Error: {str(e)}")
            import traceback
            traceback.print_exc()
            return {'error': str(e)}


def quick_data_check(db, hours=24):
    """Quick check of what's in the database"""
    print(f"\nQUICK DATA CHECK\n")
    
    cutoff = datetime.now() - timedelta(hours=hours)
    
    # Check heart_rate_data
    print(f"Checking heart_rate_data (last {hours} hours)...")
    docs = db.collection('heart_rate_data').where('timestamp', '>=', cutoff).stream()
    docs_list = list(docs)
    
    print(f"   Total documents: {len(docs_list)}")
    
    if len(docs_list) > 0:
        # Count documents with valid IBI data
        valid_ibi = 0
        total_ibi_values = 0
        
        for doc in docs_list:
            data = doc.to_dict()
            ibi = data.get('ibi', [])
            if isinstance(ibi, list) and len(ibi) >= 2:
                valid_ibi += 1
                total_ibi_values += len(ibi)
        
        print(f"   Documents with valid IBI (≥2 values): {valid_ibi}")
        print(f"   Total IBI values: {total_ibi_values}")
        print(f"   Average IBI per doc: {total_ibi_values/len(docs_list):.1f}")
        
        # Show sample
        print(f"\n   Sample document:")
        sample = docs_list[0].to_dict()
        print(f"      Fields: {list(sample.keys())}")
        print(f"      IBI count: {len(sample.get('ibi', []))}")
        if sample.get('ibi'):
            print(f"      IBI sample: {sample['ibi'][:10]}")
    
    # Check stress_predictions2
    print(f"\nChecking stress_predictions2...")
    pred_docs = db.collection('stress_predictions2').stream()
    pred_docs_list = list(pred_docs)
    
    print(f"   Total predictions: {len(pred_docs_list)}")
    
    if len(pred_docs_list) > 0:
        recent = pred_docs_list[-1].to_dict()
        print(f"\n   Most recent prediction:")
        print(f"      Stress level: {recent.get('stressLabel', 'N/A')}")
        print(f"      Confidence: {recent.get('confidence', 'N/A'):.1%}")
        print(f"      Source: {recent.get('source', 'N/A')}")
    
    print("\n")

In [None]:
"""
Stress Prediction System - NO USER ID REQUIRED
Processes ALL heart_rate_data without user filtering
"""

from datetime import datetime, timedelta
import joblib
import numpy as np


class StressPredictor:
    """Stress predictor - processes ALL data regardless of user"""
    
    def __init__(self, model_file, scaler_file, db, default_user_id='QOyZROlPzUf25tKPvv0FWnd3NZw2', default_email='user@healmind.com'):
        self.model = joblib.load(model_file)
        self.scaler = joblib.load(scaler_file)
        self.db = db
        self.default_user_id = default_user_id
        self.default_email = default_email
        print("Model and scaler loaded successfully")
        print(f"Default user: {default_email} (ID: {default_user_id})")
    
    def calculate_hrv_from_ibi(self, ibi_data):
        """Calculate HRV metrics from IBI data"""
        if not isinstance(ibi_data, list) or len(ibi_data) < 2:
            return None
        
        ibi = np.array(ibi_data)
        
        # Filter valid IBI values (300-2000ms)
        ibi = ibi[(ibi > 300) & (ibi < 2000)]
        
        if len(ibi) < 2:
            return None
        
        # Calculate HRV metrics (4 features to match model training)
        rmssd = np.sqrt(np.mean(np.diff(ibi) ** 2))
        sdnn = np.std(ibi)
        mean_hr = 60000 / np.mean(ibi) if np.mean(ibi) > 0 else 0
        
        # Calculate pNN50 (4th feature) - percentage of successive RR intervals that differ by more than 50ms
        diff_ibi = np.abs(np.diff(ibi))
        pnn50 = (np.sum(diff_ibi > 50) / len(diff_ibi) * 100) if len(diff_ibi) > 0 else 0
        
        return {
            'rmssd': float(rmssd),
            'sdnn': float(sdnn),
            'mean_hr': float(mean_hr),
            'pnn50': float(pnn50)
        }
    
    def predict_stress(self, hrv_metrics):
        """Predict stress level from HRV metrics"""
        if hrv_metrics is None:
            return None
        
        # Prepare features (4 features to match model training)
        features = np.array([[
            hrv_metrics['rmssd'],
            hrv_metrics['sdnn'],
            hrv_metrics['mean_hr'],
            hrv_metrics['pnn50']
        ]])
        
        # Scale and predict
        features_scaled = self.scaler.transform(features)
        probabilities = self.model.predict_proba(features_scaled)[0]
        prediction = self.model.predict(features_scaled)[0]
        
        stress_labels = ['LOW', 'MEDIUM', 'HIGH']
        
        return {
            'stressLevel': int(prediction),
            'stressLabel': stress_labels[int(prediction)],
            'confidence': float(max(probabilities)),
            'probabilities': {
                'class_0_low': float(probabilities[0]),
                'class_1_medium': float(probabilities[1]),
                'class_2_high': float(probabilities[2])
            }
        }
    
    def run_batch(self, hours=24, verbose=True):
        """Process ALL documents from heart_rate_data - NO USER FILTER"""
        cutoff = datetime.now() - timedelta(hours=hours)
        
        if verbose:
            print(f"\n---------------------------------------------------------------")
            print(f"BATCH PROCESSING - ALL DATA")
            print(f"Time window: Last {hours} hours")
            print(f"Cutoff: {cutoff}")
            print(f"---------------------------------------------------------------\n")
        
        try:
            # Get ALL documents - NO userId filter
            print(f"Querying heart_rate_data collection...")
            docs = self.db.collection('heart_rate_data')\
                .where('timestamp', '>=', cutoff)\
                .stream()
            
            docs_list = list(docs)
            total_docs = len(docs_list)
            
            print(f"Found {total_docs} documents\n")
            
            if total_docs == 0:
                print(f"NO DOCUMENTS FOUND in the last {hours} hours!")
                return {'processed': 0, 'skipped': 0, 'errors': 0, 'total': 0}
            
            processed = 0
            skipped = 0
            errors = 0
            
            for idx, doc in enumerate(docs_list, 1):
                try:
                    data = doc.to_dict()
                    
                    # Extract IBI data
                    ibi_data = data.get('ibi', [])
                    
                    if not ibi_data:
                        skipped += 1
                        continue
                    
                    # Calculate HRV
                    hrv_metrics = self.calculate_hrv_from_ibi(ibi_data)
                    
                    if hrv_metrics is None:
                        skipped += 1
                        continue
                    
                    # Predict stress
                    prediction = self.predict_stress(hrv_metrics)
                    
                    if prediction is None:
                        skipped += 1
                        continue
                    
                    # Save prediction - use default userId and email
                    self.db.collection('stress_predictions2').add({
                        'userId': self.default_user_id,
                        'userEmail': self.default_email,
                        'stress_probabilities': prediction['probabilities'],
                        'stressLevel': prediction['stressLevel'],
                        'stressLabel': prediction['stressLabel'],
                        'confidence': prediction['confidence'],
                        'prediction_timestamp': firestore.SERVER_TIMESTAMP,
                        'source': 'batch_prediction',
                        'source_doc_id': doc.id,
                        'hrv_metrics': hrv_metrics
                    })
                    
                    processed += 1
                    
                    # Progress indicator
                    if verbose and processed % 50 == 0:
                        print(f"  Progress: {idx}/{total_docs} docs | {processed} processed | {skipped} skipped")
                    
                except Exception as e:
                    if verbose:
                        print(f"  Error on doc {doc.id}: {str(e)}")
                    errors += 1
            
            result = {
                'processed': processed,
                'skipped': skipped,
                'errors': errors,
                'total': total_docs
            }
            
            print(f"\n---------------------------------------------------------------")
            print(f"BATCH PROCESSING COMPLETE")
            print(f"---------------------------------------------------------------")
            print(f"  Successfully processed: {processed}")
            print(f"  Skipped (insufficient data): {skipped}")
            print(f"  Errors: {errors}")
            print(f"  Total documents checked: {total_docs}")
            print(f"  Success rate: {(processed/total_docs*100):.1f}%")
            print(f"---------------------------------------------------------------\n")
            
            if processed > 0:
                print(f"{processed} predictions saved to 'stress_predictions2' collection!")
            else:
                print(f"No predictions were saved. Check the skipped count above.")
            
            return result
            
        except Exception as e:
            print(f"\nFATAL ERROR in batch processing")
            print(f"   Error: {str(e)}")
            import traceback
            traceback.print_exc()
            return {'error': str(e)}


# ------------------------------------------------------------
# DEBUGGING UTILITIES
# ------------------------------------------------------------

def quick_data_check(db, hours=24):
    """Quick check of what's in the database"""
    print(f"\n---------------------------------------------------------------")
    print(f"QUICK DATA CHECK")
    print(f"---------------------------------------------------------------\n")
    
    cutoff = datetime.now() - timedelta(hours=hours)
    
    # Check heart_rate_data
    print(f"Checking heart_rate_data (last {hours} hours)...")
    docs = db.collection('heart_rate_data').where('timestamp', '>=', cutoff).stream()
    docs_list = list(docs)
    
    print(f"   Total documents: {len(docs_list)}")
    
    if len(docs_list) > 0:
        # Count documents with valid IBI data
        valid_ibi = 0
        total_ibi_values = 0
        
        for doc in docs_list:
            data = doc.to_dict()
            ibi = data.get('ibi', [])
            if isinstance(ibi, list) and len(ibi) >= 2:
                valid_ibi += 1
                total_ibi_values += len(ibi)
        
        print(f"   Documents with valid IBI (≥2 values): {valid_ibi}")
        print(f"   Total IBI values: {total_ibi_values}")
        print(f"   Average IBI per doc: {total_ibi_values/len(docs_list):.1f}")
        
        # Show sample
        print(f"\n   Sample document:")
        sample = docs_list[0].to_dict()
        print(f"      Fields: {list(sample.keys())}")
        print(f"      IBI count: {len(sample.get('ibi', []))}")
        if sample.get('ibi'):
            print(f"      IBI sample: {sample['ibi'][:10]}")
    
    # Check stress_predictions2
    print(f"\nChecking stress_predictions2...")
    pred_docs = db.collection('stress_predictions2').stream()
    pred_docs_list = list(pred_docs)
    
    print(f"   Total predictions: {len(pred_docs_list)}")
    
    if len(pred_docs_list) > 0:
        recent = pred_docs_list[-1].to_dict()
        print(f"\n   Most recent prediction:")
        print(f"      Stress level: {recent.get('stressLabel', 'N/A')}")
        print(f"      Confidence: {recent.get('confidence', 'N/A'):.1%}")
        print(f"      Source: {recent.get('source', 'N/A')}")
    
    print(f"\n---------------------------------------------------------------\n")

In [6]:
# imports 

import os
import firebase_admin
from firebase_admin import credentials, firestore
from datetime import datetime, timedelta
import joblib
import numpy as np

In [7]:
# find firebase credentials

print("Looking for Firebase credentials...")

startPath = os.path.abspath('../../')
credPath = None

for root, dirs, files in os.walk(startPath):
    if 'healmind-2025-firebase-adminsdk-fbsvc-12242dbda6.json' in files:
        credPath = os.path.join(root, 'healmind-2025-firebase-adminsdk-fbsvc-12242dbda6.json')
        break

print("cred path:", credPath)


# connect firebase

print("connecting to firebase...")

cred = credentials.Certificate(credPath)

try:
    firebase_admin.initialize_app(cred)
except:
    pass

db = firestore.client()
print("firebase connected")


# search for model + scaler

print("searching for model files...")

startPath = os.path.abspath('../../')   # reused
modelPath = None
scalerPath = None

for root, dirs, files in os.walk(startPath):
    if 'stress_model.pkl' in files:
        modelPath = os.path.join(root, 'stress_model.pkl')
        print("model found:", modelPath)

    if 'scaler.pkl' in files:
        scalerPath = os.path.join(root, 'scaler.pkl')
        print("scaler found:", scalerPath)


# load model stuff

print("loading model...")
model = joblib.load(modelPath)
print("model loaded")

print("loading scaler...")
scaler = joblib.load(scalerPath)
print("scaler loaded")


Looking for Firebase credentials...
cred path: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver2 - Copy\HealMind_Ver3\healmind-2025-firebase-adminsdk-fbsvc-12242dbda6.json
connecting to firebase...
firebase connected
searching for model files...
model found: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver2 - Copy\HealMind_Ver3\HRVModule\XG_Calc_predict\stress_model.pkl
scaler found: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver2 - Copy\HealMind_Ver3\HRVModule\XG_Calc_predict\scaler.pkl
loading model...
model loaded
loading scaler...
scaler loaded


In [17]:
# predictor

print("predictor...")

predictor = StressPredictor(
    model_file=modelPath,
    scaler_file=scalerPath,
    db=db,
    default_user_id='QOyZROlPzUf25tKPvv0FWnd3NZw2',
    default_email='test@email.com'
)

print("predictor ready")


print("SETUP DONE")


#  run it

print("running batch...")

result = predictor.run_batch(hours=1)

print("done")




predictor...
Model and scaler loaded successfully
Default user: test@email.com (ID: QOyZROlPzUf25tKPvv0FWnd3NZw2)
predictor ready
SETUP DONE
running batch...

BATCH PROCESSING - ALL DATA
Time window: Last 1 hours
Cutoff: 2026-01-20 03:00:36.478498

Querying heart_rate_data collection...

FATAL ERROR in batch processing
   Error: Timeout of 300.0s exceeded, last exception: 429 Quota exceeded.
done


Traceback (most recent call last):
  File "d:\laiba\anaconda3\envs\TF\lib\site-packages\google\api_core\grpc_helpers.py", line 169, in error_remapped_callable
    return _StreamingResponseIterator(
  File "d:\laiba\anaconda3\envs\TF\lib\site-packages\google\api_core\grpc_helpers.py", line 91, in __init__
    self._stored_first_result = next(self._wrapped)
  File "d:\laiba\anaconda3\envs\TF\lib\site-packages\grpc\_channel.py", line 538, in __next__
    return self._next()
  File "d:\laiba\anaconda3\envs\TF\lib\site-packages\grpc\_channel.py", line 962, in _next
    raise self
grpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
	status = StatusCode.RESOURCE_EXHAUSTED
	details = "Quota exceeded."
	debug_error_string = "UNKNOWN:Error received from peer ipv6:%5B2404:6800:4001:818::200a%5D:443 {grpc_message:"Quota exceeded.", grpc_status:8}"
>

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
 

In [None]:
# continuous prediction loop (keeps running)

import time

print("starting continuous stress prediction...")

# keep track of last processed timestamp
from datetime import timezone

last_checked_time = datetime.now(timezone.utc) - timedelta(minutes=5)
print("initial last_checked_time:", last_checked_time)


while True:

    try:
        print("\nchecking for new heart rate data...")

        # query only NEW documents
        docs = (
            db.collection('heart_rate_data')
            .where('timestamp', '>', last_checked_time)
            .order_by('timestamp')
            .stream()
        )

        docs = list(docs)
        print("new docs found:", len(docs))

        if len(docs) == 0:
            print("no new data, waiting...")
        else:
            for doc in docs:
                data = doc.to_dict()

                # update last processed time
                ts = data.get('timestamp')
                if ts:
                    last_checked_time = max(last_checked_time, ts)

                # basic sanity check
                if 'ibi' not in data or len(data.get('ibi', [])) < 2:
                    print(f"skipping {doc.id} (invalid IBI)")
                    continue

                # run prediction (your existing logic)
                print(f"predicting stress for doc: {doc.id}")

                result = predictor.runBatch(
                    doc_id=doc.id,
                    data=data
                )

                if result:
                    print(f"stored prediction for {doc.id}")
                else:
                    print(f"prediction skipped for {doc.id}")

        # wait before next poll
        time.sleep(30)   # adjust if needed

    except Exception as e:
        print("error occurred:", e)
        print("sleeping before retry...")
        time.sleep(10)


starting continuous stress prediction...
initial last_checked_time: 2026-01-19 18:35:05.347902+00:00

checking for new heart rate data...


KeyboardInterrupt: 