In [2]:
#bootstrpping repo root + .env

import sys
from pathlib import Path
from dotenv import load_dotenv
import os

# Find repo root (walk up until .env found)
root = Path().resolve()
while root != root.parent and not (root / ".env").exists():
    root = root.parent

root_dir = str(root)
print("Root dir:", root_dir)

if root_dir not in sys.path:
    sys.path.append(root_dir)

load_dotenv(Path(root_dir) / ".env")

assert os.getenv("HOPSWORKS_API_KEY"), "Missing HOPSWORKS_API_KEY in .env"
print("Loaded .env successfully")

Root dir: /Users/sreenijaveladri/Downloads/llm_project_starter/scalable-ml-project
Loaded .env successfully


In [4]:
#loadin models w the cols

import json, joblib
import pandas as pd
import numpy as np
from pathlib import Path

ARTIFACTS_DIR = Path(root_dir) / "artifacts" / "models"

def load_model(key: str):
    model_dir = ARTIFACTS_DIR / key
    model = joblib.load(model_dir / "model.joblib")
    # col indices / training schema shld match btwn the model and inference input df
    cols = json.loads((model_dir / "feature_columns.json").read_text(encoding="utf-8"))
    return model, cols

In [5]:
# handling user input in one row
def build_row_modeA(
    phase: str,
    is_weekend: int,
    sleep_duration_minutes: float,
    resting_hr: float,
    cramps_num: float | None = None,
    headaches_num: float | None = None,
    sleepissue_num: float | None = None,
    stress_num: float | None = None,
):
    # these df col names (inference input schema) == training feature schema/cols names
    row = {
        "phase": phase,
        "is_weekend": is_weekend,
        "sleep_duration_minutes": sleep_duration_minutes,
        "resting_heart_rate__value": resting_hr,
        "cramps_num": cramps_num,
        "headaches_num": headaches_num,
        "sleepissue_num": sleepissue_num,
        "stress_num": stress_num,
    }
    return pd.DataFrame([row])

In [14]:
#same mapping user inputs as trained feature inputs
def one_hot_phase(df: pd.DataFrame) -> pd.DataFrame:
    return pd.get_dummies(df, columns=["phase"], dummy_na=True)

#NaN-ing the missing cols
def align_to_training_columns(X: pd.DataFrame, feature_cols: list[str]) -> pd.DataFrame:
    # add missing cols
    for c in feature_cols:
        if c not in X.columns:
            X[c] = np.nan
    # drop extra cols
    X = X[feature_cols]
    return X

In [9]:
def predict_energy(inputs: dict):
    # Route
    use_modeB = "lag1_energy" in inputs and inputs["lag1_energy"] is not None
    key = "energy_modeB" if use_modeB else "energy_modeA"

    model, feature_cols = load_model(key)

    X = build_row_modeA(
        phase=inputs["phase"],
        is_weekend=inputs["is_weekend"],
        sleep_duration_minutes=inputs["sleep_duration_minutes"],
        resting_hr=inputs["resting_heart_rate__value"],
        cramps_num=inputs.get("cramps_num"),
        headaches_num=inputs.get("headaches_num"),
        sleepissue_num=inputs.get("sleepissue_num"),
        stress_num=inputs.get("stress_num"),
    )

    if use_modeB:
        X["lag1_energy"] = inputs["lag1_energy"]

    X = one_hot_phase(X)
    X = align_to_training_columns(X, feature_cols)

    pred = int(model.predict(X)[0])
    proba = model.predict_proba(X)[0].tolist()
    return {"model_used": key, "pred_class": pred, "proba": proba}

def predict_mood(inputs: dict):
    use_modeB = "lag1_mood" in inputs and inputs["lag1_mood"] is not None
    key = "mood_modeB" if use_modeB else "mood_modeA"

    model, feature_cols = load_model(key)

    X = build_row_modeA(
        phase=inputs["phase"],
        is_weekend=inputs["is_weekend"],
        sleep_duration_minutes=inputs["sleep_duration_minutes"],
        resting_hr=inputs["resting_heart_rate__value"],
        cramps_num=inputs.get("cramps_num"),
        headaches_num=inputs.get("headaches_num"),
        sleepissue_num=inputs.get("sleepissue_num"),
        stress_num=inputs.get("stress_num"),
    )

    if use_modeB:
        X["lag1_mood"] = inputs["lag1_mood"]

    X = one_hot_phase(X)
    X = align_to_training_columns(X, feature_cols)

    pred = int(model.predict(X)[0])
    proba = model.predict_proba(X)[0].tolist()
    return {"model_used": key, "pred_class": pred, "proba": proba}

In [13]:
sample_inputs = {
    "phase": "Luteal",
    "is_weekend": 0,
    "sleep_duration_minutes": 420,
    "resting_heart_rate__value": 62,
    "cramps_num": 2,
    "stress_num": 3,
    # optional mode B:
    # "lag1_energy": 1,
    # "lag1_mood": 2,
}

print(predict_energy(sample_inputs))
print(predict_mood(sample_inputs))

{'model_used': 'energy_modeA', 'pred_class': 1, 'proba': [0.29911513397532824, 0.6779867723609673, 0.02289809366370569]}
{'model_used': 'mood_modeA', 'pred_class': 1, 'proba': [0.11272786425905848, 0.46987821338131797, 0.41739392235962336]}
