In [4]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from preprocess import preprocess_data
import joblib
import os

In [5]:
def train_model():
    """Trains the Isolation Forest Model"""
    df = preprocess_data()
    X = df.drop(columns=["y", "time"])  # Drop target & timestamp
    y = df["y"]

    # Split Data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train Model
    model = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
    model.fit(X_train)

    # ✅ Ensure the 'models' directory exists before saving
    os.makedirs("models", exist_ok=True)

    # Save Model
    joblib.dump(model, "models/model.pkl")

    # Predict & Evaluate
    y_pred = model.predict(X_test)
    y_pred = [1 if pred == -1 else 0 for pred in y_pred]

    print(classification_report(y_test, y_pred))
    return model

if __name__ == "__main__":
    model = train_model()


              precision    recall  f1-score   support

           0       1.00      0.95      0.97      3658
           1       0.03      0.23      0.05        22

    accuracy                           0.95      3680
   macro avg       0.51      0.59      0.51      3680
weighted avg       0.99      0.95      0.97      3680



In [6]:
!jupyter nbconvert --to script "model.ipynb"

[NbConvertApp] Converting notebook model.ipynb to script
[NbConvertApp] Writing 1008 bytes to model.py
