In [1]:
# utility
import sys
sys.path.append("..")

import utility

# imports
import numpy as np
import json

# modelling
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.utils import shuffle
from sklearn.model_selection import ParameterGrid

# Tree Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# misc
import os




In [2]:
train_data, test_data, train_labels, test_labels, test_true_labels = utility.preprocess_data(False)

In [3]:
# Define parameter grid
param_grid = {
    'n_estimators': [64, 128, 256, 512],
    'contamination': [0.0004],
    'max_features': [10, 20, 50, 100, 0.1, 0.2, 0.5, 0.8, 5, 75, 200, 250, 500, 784],  # Use floats in the range (0.0, 1.0]
    'max_samples': [0.1, 0.2, 0.3, 0.5, 1, 10, 25, 50, 100, 250, 500],
    'random_state': [42],
}


In [4]:
# Store results in a list
results = []

for params in ParameterGrid(param_grid):
    model = IsolationForest(**params)
    train_data_generator = utility.data_generator(train_data)

    for batch in train_data_generator:
        model.fit(batch)
    scores_anom = model.decision_function(test_data)
    
    # Calculate AUC score using roc_auc_score
    auc_score = roc_auc_score(test_true_labels, scores_anom)
    results.append({'params': params, 'auc_score': auc_score})
    

# Sort results from best to worst
results = sorted(results, key=lambda x: x['auc_score'], reverse=True)

# Print results
for i, result in enumerate(results, start=1):
    print(f"Rank {i}: AUC Score {result['auc_score']:.4f} - Parameters: {result['params']}")

ValueError: multi_class must be in ('ovo', 'ovr')

In [None]:
# Rank 1: AUC Score 0.2955 - Parameters: {'contamination': 0.1, 'max_features': 250, 'n_estimators': 64, 'random_state': 42}
# Rank 2: AUC Score 0.2937 - Parameters: {'contamination': 0.1, 'max_features': 784, 'n_estimators': 64, 'random_state': 42}
# Rank 3: AUC Score 0.2832 - Parameters: {'contamination': 0.1, 'max_features': 250, 'n_estimators': 128, 'random_state': 42}
# Rank 4: AUC Score 0.2804 - Parameters: {'contamination': 0.1, 'max_features': 50, 'n_estimators': 64, 'random_state': 42}
# Rank 5: AUC Score 0.2742 - Parameters: {'contamination': 0.1, 'max_features': 100, 'n_estimators': 128, 'random_state': 42}
# Rank 6: AUC Score 0.2726 - Parameters: {'contamination': 0.1, 'max_features': 250, 'n_estimators': 512, 'random_state': 42}
# Rank 7: AUC Score 0.2725 - Parameters: {'contamination': 0.1, 'max_features': 784, 'n_estimators': 512, 'random_state': 42}
# Rank 8: AUC Score 0.2719 - Parameters: {'contamination': 0.1, 'max_features': 250, 'n_estimators': 256, 'random_state': 42}
# Rank 9: AUC Score 0.2709 - Parameters: {'contamination': 0.1, 'max_features': 50, 'n_estimators': 128, 'random_state': 42}
# Rank 10: AUC Score 0.2700 - Parameters: {'contamination': 0.1, 'max_features': 0.5, 'n_estimators': 64, 'random_state': 42}
# Rank 11: AUC Score 0.2690 - Parameters: {'contamination': 0.1, 'max_features': 75, 'n_estimators': 64, 'random_state': 42}
# Rank 12: AUC Score 0.2686 - Parameters: {'contamination': 0.1, 'max_features': 784, 'n_estimators': 128, 'random_state': 42}
# Rank 13: AUC Score 0.2667 - Parameters: {'contamination': 0.1, 'max_features': 784, 'n_estimators': 256, 'random_state': 42}
# Rank 14: AUC Score 0.2659 - Parameters: {'contamination': 0.1, 'max_features': 100, 'n_estimators': 64, 'random_state': 42}
# Rank 15: AUC Score 0.2655 - Parameters: {'contamination': 0.1, 'max_features': 100, 'n_estimators': 256, 'random_state': 42}
# Rank 16: AUC Score 0.2653 - Parameters: {'contamination': 0.1, 'max_features': 500, 'n_estimators': 128, 'random_state': 42}
# Rank 17: AUC Score 0.2651 - Parameters: {'contamination': 0.1, 'max_features': 200, 'n_estimators': 256, 'random_state': 42}
# Rank 18: AUC Score 0.2637 - Parameters: {'contamination': 0.1, 'max_features': 0.1, 'n_estimators': 64, 'random_state': 42}
# Rank 19: AUC Score 0.2634 - Parameters: {'contamination': 0.1, 'max_features': 200, 'n_estimators': 512, 'random_state': 42}
# Rank 20: AUC Score 0.2634 - Parameters: {'contamination': 0.1, 'max_features': 200, 'n_estimators': 128, 'random_state': 42}
# Rank 21: AUC Score 0.2618 - Parameters: {'contamination': 0.1, 'max_features': 0.8, 'n_estimators': 64, 'random_state': 42}
# Rank 22: AUC Score 0.2612 - Parameters: {'contamination': 0.1, 'max_features': 0.5, 'n_estimators': 128, 'random_state': 42}
# Rank 23: AUC Score 0.2591 - Parameters: {'contamination': 0.1, 'max_features': 500, 'n_estimators': 256, 'random_state': 42}
# Rank 24: AUC Score 0.2591 - Parameters: {'contamination': 0.1, 'max_features': 0.1, 'n_estimators': 512, 'random_state': 42}
# Rank 25: AUC Score 0.2589 - Parameters: {'contamination': 0.1, 'max_features': 100, 'n_estimators': 512, 'random_state': 42}
# Rank 26: AUC Score 0.2587 - Parameters: {'contamination': 0.1, 'max_features': 500, 'n_estimators': 512, 'random_state': 42}
# Rank 27: AUC Score 0.2583 - Parameters: {'contamination': 0.1, 'max_features': 0.5, 'n_estimators': 512, 'random_state': 42}
# Rank 28: AUC Score 0.2577 - Parameters: {'contamination': 0.1, 'max_features': 0.8, 'n_estimators': 256, 'random_state': 42}
# Rank 29: AUC Score 0.2577 - Parameters: {'contamination': 0.1, 'max_features': 200, 'n_estimators': 64, 'random_state': 42}
# Rank 30: AUC Score 0.2576 - Parameters: {'contamination': 0.1, 'max_features': 0.1, 'n_estimators': 128, 'random_state': 42}
# Rank 31: AUC Score 0.2571 - Parameters: {'contamination': 0.1, 'max_features': 0.5, 'n_estimators': 256, 'random_state': 42}
# Rank 32: AUC Score 0.2569 - Parameters: {'contamination': 0.1, 'max_features': 75, 'n_estimators': 128, 'random_state': 42}
# Rank 33: AUC Score 0.2561 - Parameters: {'contamination': 0.1, 'max_features': 0.1, 'n_estimators': 256, 'random_state': 42}
# Rank 34: AUC Score 0.2553 - Parameters: {'contamination': 0.1, 'max_features': 75, 'n_estimators': 512, 'random_state': 42}
# Rank 35: AUC Score 0.2553 - Parameters: {'contamination': 0.1, 'max_features': 20, 'n_estimators': 64, 'random_state': 42}
# Rank 36: AUC Score 0.2524 - Parameters: {'contamination': 0.1, 'max_features': 0.8, 'n_estimators': 128, 'random_state': 42}
# Rank 37: AUC Score 0.2521 - Parameters: {'contamination': 0.1, 'max_features': 0.2, 'n_estimators': 256, 'random_state': 42}
# Rank 38: AUC Score 0.2513 - Parameters: {'contamination': 0.1, 'max_features': 75, 'n_estimators': 256, 'random_state': 42}
# Rank 39: AUC Score 0.2512 - Parameters: {'contamination': 0.1, 'max_features': 50, 'n_estimators': 512, 'random_state': 42}
# Rank 40: AUC Score 0.2508 - Parameters: {'contamination': 0.1, 'max_features': 50, 'n_estimators': 256, 'random_state': 42}
# Rank 41: AUC Score 0.2501 - Parameters: {'contamination': 0.1, 'max_features': 0.2, 'n_estimators': 512, 'random_state': 42}
# Rank 42: AUC Score 0.2482 - Parameters: {'contamination': 0.1, 'max_features': 20, 'n_estimators': 128, 'random_state': 42}
# Rank 43: AUC Score 0.2475 - Parameters: {'contamination': 0.1, 'max_features': 0.8, 'n_estimators': 512, 'random_state': 42}
# Rank 44: AUC Score 0.2450 - Parameters: {'contamination': 0.1, 'max_features': 500, 'n_estimators': 64, 'random_state': 42}
# Rank 45: AUC Score 0.2433 - Parameters: {'contamination': 0.1, 'max_features': 0.2, 'n_estimators': 64, 'random_state': 42}
# Rank 46: AUC Score 0.2420 - Parameters: {'contamination': 0.1, 'max_features': 20, 'n_estimators': 512, 'random_state': 42}
# Rank 47: AUC Score 0.2419 - Parameters: {'contamination': 0.1, 'max_features': 10, 'n_estimators': 64, 'random_state': 42}
# Rank 48: AUC Score 0.2391 - Parameters: {'contamination': 0.1, 'max_features': 20, 'n_estimators': 256, 'random_state': 42}
# Rank 49: AUC Score 0.2385 - Parameters: {'contamination': 0.1, 'max_features': 0.2, 'n_estimators': 128, 'random_state': 42}
# Rank 50: AUC Score 0.2257 - Parameters: {'contamination': 0.1, 'max_features': 10, 'n_estimators': 512, 'random_state': 42}
# Rank 51: AUC Score 0.2229 - Parameters: {'contamination': 0.1, 'max_features': 5, 'n_estimators': 64, 'random_state': 42}
# Rank 52: AUC Score 0.2229 - Parameters: {'contamination': 0.1, 'max_features': 10, 'n_estimators': 128, 'random_state': 42}
# Rank 53: AUC Score 0.2201 - Parameters: {'contamination': 0.1, 'max_features': 5, 'n_estimators': 256, 'random_state': 42}
# Rank 54: AUC Score 0.2135 - Parameters: {'contamination': 0.1, 'max_features': 10, 'n_estimators': 256, 'random_state': 42}
# Rank 55: AUC Score 0.2085 - Parameters: {'contamination': 0.1, 'max_features': 5, 'n_estimators': 512, 'random_state': 42}
# Rank 56: AUC Score 0.1998 - Parameters: {'contamination': 0.1, 'max_features': 5, 'n_estimators': 128, 'random_state': 42}