In [6]:
# Cell 0: Install dependencies
!pip install pymongo


Collecting pymongo
  Using cached pymongo-4.13.2-cp312-cp312-win_amd64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Using cached dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Using cached pymongo-4.13.2-cp312-cp312-win_amd64.whl (903 kB)
Using cached dnspython-2.7.0-py3-none-any.whl (313 kB)
Installing collected packages: dnspython, pymongo
Successfully installed dnspython-2.7.0 pymongo-4.13.2



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
# Cell 1: Imports and setup
import pandas as pd
import numpy as np
from pymongo import MongoClient
import sys
import os
sys.path.append(os.path.abspath(".."))  # Go up one level to root project folder


from behavioral_alerts.core.utils import *
from behavioral_alerts.core.threshold_adjustment import *
from behavioral_alerts.core.incident_prediction import *
from behavioral_alerts.core.profiling import *


# Connect to MongoDB and collections
client = MongoClient('mongodb://localhost:27017/')
db = client['hydatis']   # or your DB_NAME
users_collection = setup_users_collection()
ts_collection = setup_timeseries_collection()

# Set user_id for testing
test_user = 'user1'


Collection exists or error: collection user_locations_ts already exists


In [2]:
# Cell 2: Test Threshold Model

# Prepare data
thresh_features, thresh_target = prepare_threshold_data(ts_collection, test_user)

if thresh_features is None:
    print(f"Insufficient data for threshold model for {test_user}")
else:
    # Train model
    thresh_model = train_threshold_model(thresh_features, thresh_target)
    # Save locally and to DB
    save_threshold_model(
        test_user,
        thresh_model,
        save_to_mongo=True,
        users_collection=users_collection,
        save_local=True
    )
    # Load back from local and from DB
    local_thresh_model = load_threshold_model(test_user)
    db_thresh_doc = users_collection.find_one({"user_id": test_user})
    db_thresh_blob = db_thresh_doc.get('threshold_model', {}).get('model')
    # Predict threshold on the same features
    predicted = predict_threshold(thresh_model, thresh_features[0])
    print("Predicted dynamic threshold:", predicted)


[✓] Saved threshold model for user1 to MongoDB
[✓] Saved threshold model locally for user1
Predicted dynamic threshold: 0.049999999999999906


In [None]:
# Cell 3: Test Incident Model

import os
import joblib
import sys
import os
sys.path.append(os.path.abspath(".."))  # Go up one level to root project folder

print("Current working directory:", os.getcwd())

import os
import joblib

def load_incident_model(user_id):
    model_path = os.path.join("..", "behavioral_alerts", "models", user_id, f"{user_id}_xgboost_incident_pred.pkl")
    scaler_path = os.path.join("..", "behavioral_alerts", "models", user_id, f"{user_id}_xgboost_incident_pred_scaler.pkl")
    try:
        model = joblib.load(model_path)
        scaler = joblib.load(scaler_path)
        return model, scaler
    except FileNotFoundError:
        print(f"[✗] Could not find model or scaler at:\n{model_path}\n{scaler_path}")
        return None, None


model, scaler = load_incident_model("user1")
print("Model:", model)
print("Scaler:", scaler)


loaded_db_model, loaded_db_scaler = load_incident_model_from_db(test_user, users_collection)
    # Predict incident probability for a sample anomaly
sample_loc_score = 0.8
sample_time_score = 0.7
prob_local = predict_incident(model, scaler, sample_loc_score, sample_time_score)
prob_db = predict_incident(loaded_db_model, loaded_db_scaler, sample_loc_score, sample_time_score)
print(f"Incident probability (local model): {prob_local:.2f}")
print(f"Incident probability (DB model): {prob_db:.2f}")



Current working directory: e:\Solution_securite_perso\notebooks
Model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='logloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=None, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=None,
              n_jobs=None, num_parallel_tree=None, random_state=42, ...)
Scaler: StandardScaler()
Incident probability (local model): 0.10
Incident probability (DB model): 0.10


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [15]:
# Cell 4: Edge Cases Testing

# Test with insufficient data user
dummy_user = 'new_user'
print("\nTesting with insufficient data for user:", dummy_user)

# Threshold
dummy_thresh, _ = prepare_threshold_data(ts_collection, dummy_user)
print("Threshold preparation result:", dummy_thresh)

# Incident
dummy_inc_feat, _ = prepare_incident_data(users_collection, dummy_user)
print("Incident preparation result:", dummy_inc_feat)



Testing with insufficient data for user: new_user
Threshold preparation result: None
Incident preparation result: None


system gracefully handles edge cases when a user has insufficient or no data.

=> we can use this behavior to skip training/scoring for new users until data is available:
if thresh_features is None:
    print(f"User {user_id} has no enough behavioral history yet. Skipping threshold training.")
else:
    # train/save model

