In [1]:
import pandas as pd
from sqlalchemy import create_engine
from sklearn.ensemble import IsolationForest
import joblib
import os

DATABASE_URL = "postgresql://postgres:mysecretpassword@localhost/postgres"
engine = create_engine(DATABASE_URL)

print("--- [AXON ANOMALY TRAINER] Notebook Initialized ---")

--- [AXON ANOMALY TRAINER] Notebook Initialized ---


In [2]:
print("\nLoading process data from local PostgreSQL database...")
try:
    query = "SELECT json_array_elements(details->'processes')->>'name' as process_name FROM snapshots"
    df_local = pd.read_sql(query, engine)
    print(f"✅ Successfully loaded {len(df_local)} process events from the database.")
except Exception as e:
    print(f"ERROR: Could not load data from database. Is it running? Details: {e}")
    df_local = pd.DataFrame() # Create empty dataframe on error


Loading process data from local PostgreSQL database...
ERROR: Could not load data from database. Is it running? Details: (psycopg2.OperationalError) connection to server at "localhost" (::1), port 5432 failed: FATAL:  password authentication failed for user "postgres"

(Background on this error at: https://sqlalche.me/e/20/e3q8)


In [3]:
if not df_local.empty:
    print("\nPerforming feature engineering...")
    process_counts = df_local['process_name'].value_counts(normalize=True)
    df_local['frequency'] = df_local['process_name'].map(process_counts)
    features = df_local[['frequency']]
    print("✅ Feature 'frequency' calculated for all processes.")

In [4]:
if not df_local.empty:
    print("\nTraining Isolation Forest model...")
    # 'contamination' assumes about 1% of our processes are anomalies.
    anomaly_model = IsolationForest(contamination=0.01, random_state=42)
    anomaly_model.fit(features)
    print("✅ Anomaly detection model training complete.")

In [5]:
if not df_local.empty:
    print("\nSaving the model and feature data...")
    joblib.dump(anomaly_model, 'anomaly_model.joblib')
    joblib.dump(process_counts, 'process_frequencies.joblib')
    print("✅ Anomaly model saved as 'anomaly_model.joblib'.")
    print("✅ Feature data saved as 'process_frequencies.joblib'.")
    print("--- [AXON ANOMALY TRAINER] Notebook finished ---")