In [1]:
# Install dependencies
!pip install firebase-admin xgboost scikit-learn pandas numpy joblib schedule


Collecting schedule
  Downloading schedule-1.2.2-py3-none-any.whl.metadata (3.8 kB)
Downloading schedule-1.2.2-py3-none-any.whl (12 kB)
Installing collected packages: schedule
Successfully installed schedule-1.2.2


How to firebase keyy??

Step by step to get Firebase service account key:

Go to: https://console.firebase.google.com

Click your existing project (the one with your 2 apps + website)

Click the ⚙️ gear icon (top left, next to project name)

Click "Project Settings"

Click the "Service Accounts" tab

Click the blue "Generate New Private Key" button

A JSON file downloads to your computer

In [2]:
#Import and setup
import firebase_admin
from firebase_admin import credentials, firestore
import pandas as pd
import numpy as np
import joblib
from datetime import datetime, timedelta
import schedule
import time

print("Upload your Firebase service account key (JSON file)")
from google.colab import files
uploaded = files.upload()
key_filename = list(uploaded.keys())[0]

cred = credentials.Certificate(key_filename)
firebase_admin.initialize_app(cred)
db = firestore.client()

print("Firebase initialized!")

Upload your Firebase service account key (JSON file)


Saving healmind-2025-firebase-adminsdk-fbsvc-12242dbda6.json to healmind-2025-firebase-adminsdk-fbsvc-12242dbda6.json
Firebase initialized!


In [3]:
# Cell 3: Upload your model files
print("\nUpload your model files:")
print("1. stress_model.pkl")
print("2. scaler.pkl")

model_uploaded = files.upload()

model = joblib.load('stress_model.pkl')
scaler = joblib.load('scaler.pkl')

print("✓ Model and scaler loaded!")



Upload your model files:
1. stress_model.pkl
2. scaler.pkl


Saving scaler.pkl to scaler.pkl
Saving stress_model.pkl to stress_model.pkl
✓ Model and scaler loaded!


configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.

  setstate(state)


In [4]:
# Cell 4: Batch prediction class
class ColabStressPredictor:
    def __init__(self, model, scaler, db):
        self.model = model
        self.scaler = scaler
        self.db = db
        self.feature_names = ['sdnn', 'rmssd']

    def fetch_unprocessed_data(self, hours=1):
        cutoff_time = datetime.utcnow() - timedelta(hours=hours)

        query = self.db.collection('heart_rate_data') \
            .where('timestamp', '>=', cutoff_time) \
            .stream()

        data_points = []
        for doc in query:
            data = doc.to_dict()
            data['doc_id'] = doc.id
            data_points.append(data)

        return pd.DataFrame(data_points) if data_points else pd.DataFrame()

    def group_by_window(self, df, window_minutes=5):
        if df.empty:
            return []

        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df = df.sort_values('timestamp')

        windows = []
        for i in range(0, len(df), window_minutes * 12):
            window = df.iloc[i:i+window_minutes*12]
            if len(window) >= 3:
                windows.append(window)

        return windows

    def process_window(self, window):
        all_ibi = []
        for ibi_list in window['ibi'].dropna():
            if ibi_list:
                all_ibi.extend(ibi_list)

        if not all_ibi or len(all_ibi) < 2:
            return None

        ibi = np.array(all_ibi, dtype=float)
        sdnn = np.std(ibi)
        rmssd = np.sqrt(np.mean(np.diff(ibi) ** 2))

        X = np.array([[sdnn, rmssd]])
        X_scaled = self.scaler.transform(X)

        prediction = self.model.predict(X_scaled)[0]
        probability = self.model.predict_proba(X_scaled)[0]

        return {
            'stress_level': int(prediction),
            'stress_probabilities': {
                'class_0': float(probability[0]),
                'class_1': float(probability[1]) if len(probability) > 1 else 0.0
            },
            'sdnn': float(sdnn),
            'rmssd': float(rmssd),
            'window_start': window['timestamp'].min(),
            'window_end': window['timestamp'].max(),
            'prediction_timestamp': datetime.utcnow(),
            'num_samples': len(window)
        }

    def store_predictions(self, results):
        batch = self.db.batch()

        for result in results:
            doc_ref = self.db.collection('stress_predictions').document()
            batch.set(doc_ref, result)

        batch.commit()
        return len(results)

    def run_batch(self, hours=1):
        print(f"\n{'='*60}")
        print(f"BATCH JOB: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"{'='*60}")

        try:
            print(f"Fetching data from last {hours} hour(s)...")
            df = self.fetch_unprocessed_data(hours=hours)

            if df.empty:
                print("✓ No new data to process")
                return

            print(f"✓ Loaded {len(df)} data points")

            windows = self.group_by_window(df)
            print(f"✓ Created {len(windows)} time windows")

            results = []
            for i, window in enumerate(windows):
                result = self.process_window(window)
                if result:
                    results.append(result)

            print(f"✓ Processed {len(results)} windows")

            if results:
                stored = self.store_predictions(results)
                print(f"✓ Stored {stored} predictions to Firestore")

                stress_high = sum(1 for r in results if r['stress_level'] == 1)
                avg_prob = np.mean([r['stress_probabilities']['class_1'] for r in results])
                print(f"\nSummary:")
                print(f"  High stress: {stress_high}/{len(results)}")
                print(f"  Avg probability: {avg_prob:.2%}")

            print(f"{'='*60}\n")
            return results

        except Exception as e:
            print(f"❌ Error: {str(e)}")
            return None


In [7]:
# Cell 5: Initialize and run
predictor = ColabStressPredictor(model, scaler, db)

print("Testing batch processor...")
predictor.run_batch(hours=160)

Testing batch processor...

BATCH JOB: 2026-01-04 07:10:47
Fetching data from last 160 hour(s)...


  print(f"BATCH JOB: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
  cutoff_time = datetime.utcnow() - timedelta(hours=hours)
  return query.where(field_path, op_string, value)


✓ Loaded 80 data points
✓ Created 2 time windows
✓ Processed 2 windows


  'prediction_timestamp': datetime.utcnow(),
  'prediction_timestamp': datetime.utcnow(),


✓ Stored 2 predictions to Firestore

Summary:
  High stress: 0/2
  Avg probability: 13.06%



[{'stress_level': 0,
  'stress_probabilities': {'class_0': 0.8060089349746704,
   'class_1': 0.1939910650253296},
  'sdnn': 76.47809333613502,
  'rmssd': 55.91937717988324,
  'window_start': Timestamp('2026-01-02 09:00:15.664000+0000', tz='UTC'),
  'window_end': Timestamp('2026-01-02 09:00:26.578000+0000', tz='UTC'),
  'prediction_timestamp': datetime.datetime(2026, 1, 4, 7, 10, 48, 98079),
  'num_samples': 60},
 {'stress_level': 0,
  'stress_probabilities': {'class_0': 0.9327389001846313,
   'class_1': 0.06726108491420746},
  'sdnn': 59.73159888472302,
  'rmssd': 61.93157514547809,
  'window_start': Timestamp('2026-01-02 09:00:26.588000+0000', tz='UTC'),
  'window_end': Timestamp('2026-01-02 09:00:26.896000+0000', tz='UTC'),
  'prediction_timestamp': datetime.datetime(2026, 1, 4, 7, 10, 48, 101020),
  'num_samples': 20}]

In [None]:
# Cell 6: Schedule to run every 30 minutes
def job():
    predictor.run_batch(hours=1)

schedule.every(30).minutes.do(job)

print("✓ Scheduler started!")
print("Will run predictions every 30 minutes")
print("\nKeep this Colab cell running to continue processing...")

while True:
    schedule.run_pending()
    time.sleep(60)

✓ Scheduler started!
Will run predictions every 30 minutes

Keep this Colab cell running to continue processing...

BATCH JOB: 2026-01-04 07:40:49
Fetching data from last 1 hour(s)...


  print(f"BATCH JOB: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
  cutoff_time = datetime.utcnow() - timedelta(hours=hours)
  return query.where(field_path, op_string, value)


✓ No new data to process

BATCH JOB: 2026-01-04 07:40:51
Fetching data from last 1 hour(s)...


  print(f"BATCH JOB: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
  cutoff_time = datetime.utcnow() - timedelta(hours=hours)
  return query.where(field_path, op_string, value)


✓ No new data to process

BATCH JOB: 2026-01-04 08:10:51
Fetching data from last 1 hour(s)...


  print(f"BATCH JOB: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
  cutoff_time = datetime.utcnow() - timedelta(hours=hours)
  return query.where(field_path, op_string, value)


✓ No new data to process

BATCH JOB: 2026-01-04 08:10:52
Fetching data from last 1 hour(s)...


  print(f"BATCH JOB: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
  cutoff_time = datetime.utcnow() - timedelta(hours=hours)
  return query.where(field_path, op_string, value)


✓ No new data to process

BATCH JOB: 2026-01-04 08:40:53
Fetching data from last 1 hour(s)...


  print(f"BATCH JOB: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
  cutoff_time = datetime.utcnow() - timedelta(hours=hours)
  return query.where(field_path, op_string, value)


✓ No new data to process

BATCH JOB: 2026-01-04 08:40:54
Fetching data from last 1 hour(s)...


  print(f"BATCH JOB: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
  cutoff_time = datetime.utcnow() - timedelta(hours=hours)
  return query.where(field_path, op_string, value)


✓ No new data to process
