In [4]:
# Install dependencies
%pip install firebase-admin xgboost scikit-learn pandas numpy joblib schedule


Collecting xgboost
  Using cached xgboost-3.1.3-py3-none-win_amd64.whl.metadata (2.0 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.8.0-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting schedule
  Using cached schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Collecting threadpoolctl>=3.2.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached xgboost-3.1.3-py3-none-win_amd64.whl (72.0 MB)
Using cached scikit_learn-1.8.0-cp311-cp311-win_amd64.whl (8.1 MB)
Using cached schedule-1.2.2-py3-none-any.whl (12 kB)
Using cached threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, schedule, xgboost, scikit-learn

   ---------------------------------------- 0/4 [threadpoolctl]
   -------------------- ------------------- 2/4 [xgboost]
   -------------------- ------------------- 2/4 [xgboost]
   -------------------- ------------------- 2/4 [xgboost]
   -------------------- ------------------- 2/4 [

How to firebase keyy??

Step by step to get Firebase service account key:

Go to: https://console.firebase.google.com

Click your existing project (the one with your 2 apps + website)

Click the ⚙️ gear icon (top left, next to project name)

Click "Project Settings"

Click the "Service Accounts" tab

Click the blue "Generate New Private Key" button

A JSON file downloads to your computer

In [5]:
#Import and setup fire base no need to upload like in canva
import firebase_admin
from firebase_admin import credentials, firestore
import pandas as pd
import numpy as np
import joblib
from datetime import datetime, timedelta
import schedule
import time
import os

def findFirebaseKey():
    startPath = os.path.abspath('../../')  # Go up 2 folders
    for root, dirs, files in os.walk(startPath):
        if 'healmind-2025-firebase-adminsdk-fbsvc-12242dbda6.json' in files:
            return os.path.join(root, 'healmind-2025-firebase-adminsdk-fbsvc-12242dbda6.json')
    raise FileNotFoundError("Firebase key not found!")

cred_path = findFirebaseKey()
cred = credentials.Certificate(cred_path)

try:
    firebase_admin.initialize_app(cred)
except ValueError:
    pass

db = firestore.client()
print(f"Firebase initialized! Found key at: {cred_path}")

Firebase initialized! Found key at: d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver2 - Copy\HealMind_Ver3\healmind-2025-firebase-adminsdk-fbsvc-12242dbda6.json


In [6]:
import joblib
import os

# Find the model files
def findFiles(filename):
    startPath = os.path.abspath('../../')
    for root, dirs, files in os.walk(startPath):
        if filename in files:
            return os.path.join(root, filename)
    raise FileNotFoundError(f"{filename} not found!")

# Load model
try:
    modelPath = findFiles('stress_model.pkl')
    model = joblib.load(modelPath)
    print(f"loaded model from {modelPath}")
except Exception as e:
    print(f"error loading model: {e}")
    model = None

# Load scaler
try:
    scalerPath = findFiles('scaler.pkl')
    scaler = joblib.load(scalerPath)
    print(f"loaded scaler from {scalerPath}")
except Exception as e:
    print(f"error loading scaler: {e}")
    scaler = None

if model and scaler:
    print("Files loaded successfully.")
else:
    print("Error: Model or scaler not loaded properly.")

loaded model from d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver2 - Copy\HealMind_Ver3\HRVModule\XGBoost\stress_model.pkl
loaded scaler from d:\laiba\Desktop\USM\CAT304W Drafts\Working\HealMind_ver2 - Copy\HealMind_Ver3\HRVModule\XGBoost\scaler.pkl
Files loaded successfully.


In [7]:
# Batch prediction class
class StressPredictor:
    def __init__(self, model, scaler, db):
        self.model = model
        self.scaler = scaler
        self.db = db
        self.feature_names = ['sdnn', 'rmssd']

    def fetch_unprocessed_data(self, hours=1):
        cutoff_time = datetime.utcnow() - timedelta(hours=hours)

        query = self.db.collection('heart_rate_data') \
            .where('timestamp', '>=', cutoff_time) \
            .stream()

        data_points = []
        for doc in query:
            data = doc.to_dict()
            data['doc_id'] = doc.id
            data_points.append(data)

        return pd.DataFrame(data_points) if data_points else pd.DataFrame()

    def group_by_window(self, df, window_minutes=5):
        if df.empty:
            return []

        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df = df.sort_values('timestamp')

        windows = []
        for i in range(0, len(df), window_minutes * 12):
            window = df.iloc[i:i+window_minutes*12]
            if len(window) >= 3:
                windows.append(window)

        return windows

    def process_window(self, window):
        all_ibi = []
        for ibi_list in window['ibi'].dropna():
            if ibi_list:
                all_ibi.extend(ibi_list)

        if not all_ibi or len(all_ibi) < 2:
            return None

        ibi = np.array(all_ibi, dtype=float)
        sdnn = np.std(ibi)
        rmssd = np.sqrt(np.mean(np.diff(ibi) ** 2))

        X = np.array([[sdnn, rmssd]])
        X_scaled = self.scaler.transform(X)

        prediction = self.model.predict(X_scaled)[0]
        probability = self.model.predict_proba(X_scaled)[0]

        return {
            'stress_level': int(prediction),
            'stress_probabilities': {
                'class_0': float(probability[0]),
                'class_1': float(probability[1]) if len(probability) > 1 else 0.0
            },
            'sdnn': float(sdnn),
            'rmssd': float(rmssd),
            'window_start': window['timestamp'].min(),
            'window_end': window['timestamp'].max(),
            'prediction_timestamp': datetime.utcnow(),
            'num_samples': len(window)
        }

    def store_predictions(self, results):
        batch = self.db.batch()

        for result in results:
            doc_ref = self.db.collection('stress_predictions').document()
            batch.set(doc_ref, result)

        batch.commit()
        return len(results)

    def run_batch(self, hours=1):
        print(f"\n{'='*60}")
        print(f"BATCH JOB: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"{'='*60}")

        try:
            print(f"Fetching data from last {hours} hour(s)...")
            df = self.fetch_unprocessed_data(hours=hours)

            if df.empty:
                print("No new data to process")
                return

            print(f"Loaded {len(df)} data points")

            windows = self.group_by_window(df)
            print(f"Created {len(windows)} time windows")

            results = []
            for i, window in enumerate(windows):
                result = self.process_window(window)
                if result:
                    results.append(result)

            print(f"Processed {len(results)} windows")

            if results:
                stored = self.store_predictions(results)
                print(f"Stored {stored} predictions to Firestore")

                stress_high = sum(1 for r in results if r['stress_level'] == 1)
                avg_prob = np.mean([r['stress_probabilities']['class_1'] for r in results])
                print(f"\nSummary:")
                print(f"  High stress: {stress_high}/{len(results)}")
                print(f"  Avg probability: {avg_prob:.2%}")

            print(f"{'='*60}\n")
            return results

        except Exception as e:
            print(f"Error: {str(e)}")
            return None


In [8]:
# Batch prediction class
class StressPredictor:
    def __init__(self, model, scaler, db):
        self.model = model
        self.scaler = scaler
        self.db = db
        self.feature_names = ['sdnn', 'rmssd']

    def fetch_unprocessed_data(self, hours=1):
        cutoff_time = datetime.utcnow() - timedelta(hours=hours)

        query = self.db.collection('heart_rate_data') \
            .where('timestamp', '>=', cutoff_time) \
            .stream()

        data_points = []
        for doc in query:
            data = doc.to_dict()
            data['doc_id'] = doc.id
            data_points.append(data)

        return pd.DataFrame(data_points) if data_points else pd.DataFrame()

    def group_by_window(self, df, window_minutes=5):
        if df.empty:
            return []

        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df = df.sort_values('timestamp')

        windows = []
        for i in range(0, len(df), window_minutes * 12):
            window = df.iloc[i:i+window_minutes*12]
            if len(window) >= 3:
                windows.append(window)

        return windows

    def process_window(self, window):
        all_ibi = []
        for ibi_list in window['ibi'].dropna():
            if ibi_list:
                all_ibi.extend(ibi_list)

        if not all_ibi or len(all_ibi) < 2:
            return None

        ibi = np.array(all_ibi, dtype=float)
        sdnn = np.std(ibi)
        rmssd = np.sqrt(np.mean(np.diff(ibi) ** 2))

        X = np.array([[sdnn, rmssd]])
        X_scaled = self.scaler.transform(X)

        prediction = self.model.predict(X_scaled)[0]
        probability = self.model.predict_proba(X_scaled)[0]

        # Map prediction to stress level label
        stress_labels = {0: 'low', 1: 'medium', 2: 'high'}
        
        return {
            'stress_level': int(prediction),
            'stress_label': stress_labels[int(prediction)],
            'stress_probabilities': {
                'class_0_low': float(probability[0]),
                'class_1_medium': float(probability[1]) if len(probability) > 1 else 0.0,
                'class_2_high': float(probability[2]) if len(probability) > 2 else 0.0
            },
            'sdnn': float(sdnn),
            'rmssd': float(rmssd),
            'window_start': window['timestamp'].min(),
            'window_end': window['timestamp'].max(),
            'prediction_timestamp': datetime.utcnow(),
            'num_samples': len(window)
        }

    def store_predictions(self, results):
        batch = self.db.batch()

        for result in results:
            doc_ref = self.db.collection('stress_predictions').document()
            batch.set(doc_ref, result)

        batch.commit()
        return len(results)

    def run_batch(self, hours=1):
        print(f"\n{'='*60}")
        print(f"BATCH JOB: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"{'='*60}")

        try:
            print(f"Fetching data from last {hours} hour(s)...")
            df = self.fetch_unprocessed_data(hours=hours)

            if df.empty:
                print("No new data to process")
                return

            print(f"Loaded {len(df)} data points")

            windows = self.group_by_window(df)
            print(f"Created {len(windows)} time windows")

            results = []
            for i, window in enumerate(windows):
                result = self.process_window(window)
                if result:
                    results.append(result)

            print(f"Processed {len(results)} windows")

            if results:
                stored = self.store_predictions(results)
                print(f"Stored {stored} predictions to Firestore")

                # Count each stress level
                stress_low = sum(1 for r in results if r['stress_level'] == 0)
                stress_medium = sum(1 for r in results if r['stress_level'] == 1)
                stress_high = sum(1 for r in results if r['stress_level'] == 2)
                
                # Average probabilities for each class
                avg_prob_low = np.mean([r['stress_probabilities']['class_0_low'] for r in results])
                avg_prob_medium = np.mean([r['stress_probabilities']['class_1_medium'] for r in results])
                avg_prob_high = np.mean([r['stress_probabilities']['class_2_high'] for r in results])
                
                print(f"\nSummary:")
                print(f"  Low stress:    {stress_low}/{len(results)} (avg prob: {avg_prob_low:.2%})")
                print(f"  Medium stress: {stress_medium}/{len(results)} (avg prob: {avg_prob_medium:.2%})")
                print(f"  High stress:   {stress_high}/{len(results)} (avg prob: {avg_prob_high:.2%})")

            print(f"{'='*60}\n")
            return results

        except Exception as e:
            print(f"Error: {str(e)}")
            return None

In [9]:
    def fetch_unprocessed_data(self, hours=1):
        cutoff_time = datetime.utcnow() - timedelta(hours=hours)

        # FIX THE WARNING: Use FieldFilter
        query = self.db.collection('heart_rate_data') \
            .where(filter=FieldFilter('timestamp', '>=', cutoff_time)) \
            .stream()

        data_points = []
        for doc in query:
            data = doc.to_dict()
            data['doc_id'] = doc.id
            data_points.append(data)

        return pd.DataFrame(data_points) if data_points else pd.DataFrame()

In [10]:
# Initialize and run 
predictor = StressPredictor(model, scaler, db)

print("Testing batch processor...")
predictor.run_batch(hours=120)

Testing batch processor...

BATCH JOB: 2026-01-15 05:48:49
Fetching data from last 120 hour(s)...


  return query.where(field_path, op_string, value)


Loaded 1426 data points
Created 24 time windows
Processed 17 windows
Stored 17 predictions to Firestore

Summary:
  Low stress:    14/17 (avg prob: 63.09%)
  Medium stress: 3/17 (avg prob: 28.04%)
  High stress:   0/17 (avg prob: 8.87%)



[{'stress_level': 0,
  'stress_label': 'low',
  'stress_probabilities': {'class_0_low': 0.925655722618103,
   'class_1_medium': 0.07093185186386108,
   'class_2_high': 0.0034124229568988085},
  'sdnn': 177.38187506055968,
  'rmssd': 228.4857763625561,
  'window_start': Timestamp('2026-01-11 13:46:37.004000+0000', tz='UTC'),
  'window_end': Timestamp('2026-01-11 13:46:46.096000+0000', tz='UTC'),
  'prediction_timestamp': datetime.datetime(2026, 1, 15, 5, 48, 52, 563846),
  'num_samples': 60},
 {'stress_level': 1,
  'stress_label': 'medium',
  'stress_probabilities': {'class_0_low': 0.46332642436027527,
   'class_1_medium': 0.5202280282974243,
   'class_2_high': 0.016445549204945564},
  'sdnn': 118.04339479850431,
  'rmssd': 181.07556903714996,
  'window_start': Timestamp('2026-01-11 13:46:46.107000+0000', tz='UTC'),
  'window_end': Timestamp('2026-01-11 13:47:22.427000+0000', tz='UTC'),
  'prediction_timestamp': datetime.datetime(2026, 1, 15, 5, 48, 52, 567846),
  'num_samples': 60},
 {

In [None]:
# Schedule to run every 30 minutes
def job():
    predictor.run_batch(hours=1)

schedule.every(5).minutes.do(job)

print("Scheduler started!")
print("Will run predictions every 30 minutes")
print("\nKeep this cell running to continue processing...")
job()
while True:
    schedule.run_pending()
    time.sleep(60)

Scheduler started!
Will run predictions every 30 minutes

Keep this cell running to continue processing...

BATCH JOB: 2026-01-15 05:48:58
Fetching data from last 1 hour(s)...
No new data to process


  return query.where(field_path, op_string, value)



BATCH JOB: 2026-01-15 05:49:58
Fetching data from last 1 hour(s)...


  return query.where(field_path, op_string, value)


Loaded 200 data points
Created 4 time windows
Processed 4 windows
Stored 4 predictions to Firestore

Summary:
  Low stress:    4/4 (avg prob: 98.60%)
  Medium stress: 0/4 (avg prob: 1.24%)
  High stress:   0/4 (avg prob: 0.16%)

