In [1]:
# 03_testing_and_deployment.ipynb


"""Objective: Evaluate saved models on the test set and provide a minimal deployment example.
Contents:
- Load preprocessor and best model
- Evaluate on test set (classification report, confusion matrix, ROC-AUC, PR curve)
- Threshold tuning
- Minimal Flask app example
"""

'Objective: Evaluate saved models on the test set and provide a minimal deployment example.\nContents:\n- Load preprocessor and best model\n- Evaluate on test set (classification report, confusion matrix, ROC-AUC, PR curve)\n- Threshold tuning\n- Minimal Flask app example\n'

In [3]:
import os
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, precision_recall_curve, auc


PREP_DIR = "C:\\Users\\HarshaSri\\Desktop\\IDS_PROJECT\\data\\processed"
MODELS_DIR = "C:\\Users\\HarshaSri\\Desktop\\IDS_PROJECT\\models"

In [7]:
# Load preprocessor and data
preprocessor = joblib.load(os.path.join(PREP_DIR, 'preprocessor.joblib'))
X_train_bal, y_train_bal, X_test_t, y_test = joblib.load(os.path.join(PREP_DIR, 'data_splits.joblib'))

In [9]:
# Find models
models = [f for f in os.listdir(MODELS_DIR) if f.endswith('.joblib')]
print('Models found:', models)

Models found: ['logistic.joblib', 'rf.joblib', 'rf_best.joblib', 'xgboost.joblib']


In [32]:
# Choose a model
for i in range(4):
    print()
    print(models[i])
    print()
    model_path = os.path.join(MODELS_DIR, models[i]) if models else None
    if model_path:
        model = joblib.load(model_path)
    
    # Predict
    try:
        probs = model.predict_proba(X_test_t)[:,1]
        preds = (probs >= 0.5).astype(int)
    except Exception:
        preds = model.predict(X_test_t)
        probs = None
    # If test labels exist
    if y_test is not None:
        print('Classification report:')
        print(classification_report(y_test, preds, digits=4))
        print('Confusion matrix:\n', confusion_matrix(y_test, preds))
        if probs is not None:
                print('ROC-AUC:', roc_auc_score(y_test, probs))
    else:
        print('Test labels not available; outputting predictions for test set')
        out = pd.DataFrame({'pred': preds})
        if probs is not None:
            out['prob'] = probs
        out.to_csv(os.path.join(MODELS_DIR, 'test_predictions.csv'), index=False)
        print('Saved test_predictions.csv')
    print()


logistic.joblib

Classification report:
              precision    recall  f1-score   support

           0     0.9863    0.9900    0.9881      2690
           1     0.9885    0.9842    0.9863      2349

    accuracy                         0.9873      5039
   macro avg     0.9874    0.9871    0.9872      5039
weighted avg     0.9873    0.9873    0.9873      5039

Confusion matrix:
 [[2663   27]
 [  37 2312]]
ROC-AUC: 0.9977093788229114


rf.joblib

Classification report:
              precision    recall  f1-score   support

           0     0.9952    0.9996    0.9974      2690
           1     0.9996    0.9945    0.9970      2349

    accuracy                         0.9972      5039
   macro avg     0.9974    0.9970    0.9972      5039
weighted avg     0.9972    0.9972    0.9972      5039

Confusion matrix:
 [[2689    1]
 [  13 2336]]
ROC-AUC: 0.9999283884149073


rf_best.joblib

Classification report:
              precision    recall  f1-score   support

           0     0.9952  