In [1]:
# Cell 0: Install dependencies
!pip install pymongo





[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# Cell 1: Imports and setup
import pandas as pd
import numpy as np
from pymongo import MongoClient
import sys
import os
sys.path.append(os.path.abspath(".."))  # Go up one level to root project folder


from behavioral_alerts.core.utils import *
from behavioral_alerts.core.threshold_adjustment import *
from behavioral_alerts.core.incident_prediction import *
from behavioral_alerts.core.profiling import *
from behavioral_alerts.core.retrain_controller import retrain_user_profile

"""

"""

# Connect to MongoDB and collections
client = MongoClient('mongodb://localhost:27017/')
db = client['hydatis']   # or your DB_NAME
users_collection = setup_users_collection()
ts_collection = setup_timeseries_collection()

# Set user_id for testing
test_user = 'user1'


Collection exists or error: collection user_locations_ts already exists


In [3]:
# Before training/predicting anything
retrain_user_profile(test_user, ts_collection, users_collection)

✅ Retraining behavioral profile for user1
[✓] Saved ML model for user1 in MongoDB
[✓] Saved model locally for user1
[✓] Cached profile in DB for user1 at 2025-07-11 09:31:16 CET
[✓] Saved threshold model for user1 to MongoDB
[✓] Saved threshold model locally for user1
✅ Threshold model updated for user1


In [4]:
# Cell 2: Test Threshold Model

# Prepare data
thresh_features, thresh_target = prepare_threshold_data(ts_collection, test_user)

if thresh_features is None:
    print(f"Insufficient data for threshold model for {test_user}")
else:
    # Train model
    thresh_model = train_threshold_model(thresh_features, thresh_target)
    # Save locally and to DB
    save_threshold_model(
        test_user,
        thresh_model,
        save_to_mongo=True,
        users_collection=users_collection,
        save_local=True
    )
    # Load back from local and from DB
    local_thresh_model = load_threshold_model(test_user)
    db_thresh_doc = users_collection.find_one({"user_id": test_user})
    db_thresh_blob = db_thresh_doc.get('threshold_model', {}).get('model')
    
    # Predict threshold on the same features
    predicted = predict_threshold(thresh_model, thresh_features[0])
    print("Predicted dynamic threshold:", predicted)


[✓] Saved threshold model for user1 to MongoDB
[✓] Saved threshold model locally for user1
Predicted dynamic threshold: 0.049999999999999906


In [5]:
# Cell 3: Test Incident Model

import os
import joblib
import sys
import os
sys.path.append(os.path.abspath(".."))  # Go up one level to root project folder

print("Current working directory:", os.getcwd())

import os
import joblib

def load_incident_model(user_id):
    model_path = os.path.join("..", "behavioral_alerts", "models", user_id, f"{user_id}_xgboost_incident_pred.pkl")
    scaler_path = os.path.join("..", "behavioral_alerts", "models", user_id, f"{user_id}_xgboost_incident_pred_scaler.pkl")
    try:
        model = joblib.load(model_path)
        scaler = joblib.load(scaler_path)
        return model, scaler
    except FileNotFoundError:
        print(f"[✗] Could not find model or scaler at:\n{model_path}\n{scaler_path}")
        return None, None


incident_model, scaler = load_incident_model("user1")
print("Model:", incident_model)
print("Scaler:", scaler)


loaded_db_model, loaded_db_scaler = load_incident_model_from_db(test_user, users_collection)
    # Predict incident probability for a sample anomaly
sample_loc_score = 0.8
sample_time_score = 0.7
prob_local = predict_incident(incident_model, scaler, sample_loc_score, sample_time_score)
prob_db = predict_incident(loaded_db_model, loaded_db_scaler, sample_loc_score, sample_time_score)

if incident_model and scaler:
    prob_local = predict_incident(incident_model, scaler, sample_loc_score, sample_time_score)
    print(f"✅ Incident probability (local model): {prob_local:.2f}")
if loaded_db_model and loaded_db_scaler:
    prob_db = predict_incident(loaded_db_model, loaded_db_scaler, sample_loc_score, sample_time_score)
    print(f"✅ Incident probability (DB model): {prob_db:.2f}")
    


Current working directory: e:\Solution_securite_perso\notebooks


configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Model: XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='logloss',
              feature_types=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=None, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=None,
              n_jobs=None, num_parallel_tree=None, random_state=42, ...)
Scaler: StandardScaler()
✅ Incident probability (local model): 0.10
✅ Incident probability (DB model): 0.10


In [6]:
# Cell 4: Edge Cases Testing

# Test with insufficient data user
dummy_user = 'new_user'
print("\nTesting with insufficient data for user:", dummy_user)

# Threshold
dummy_thresh, _ = prepare_threshold_data(ts_collection, dummy_user)
print("Threshold preparation result:", dummy_thresh)

# Incident
dummy_inc_feat, _ = prepare_incident_data(users_collection, dummy_user)
print("Incident preparation result:", dummy_inc_feat)



Testing with insufficient data for user: new_user
Threshold preparation result: None
Incident preparation result: None


system gracefully handles edge cases when a user has insufficient or no data.

=> we can use this behavior to skip training/scoring for new users until data is available:
if thresh_features is None:
    print(f"User {user_id} has no enough behavioral history yet. Skipping threshold training.")
else:
    # train/save model



In [7]:

import sys
import os
sys.path.append(os.path.abspath("../behavioral_alerts/core"))
from behavioral_alerts.core.scoring import evaluate_user_behavior

result = evaluate_user_behavior(
    user_id=test_user,
    location_score=0.7,
    time_score=0.6,
    ts_collection=ts_collection,
    incident_model=incident_model,
    scaler=scaler,
    threshold_model=thresh_model
)
if "error" in result:
    print(result["error"])
else:
    print("Probability:", result["incident_probability"])
    print("Threshold:", result["dynamic_threshold"])
    print("Anomaly?", result["anomaly"])



Probability: 0.10366619
Threshold: 0.049999999999999906
Anomaly? True
