In [2]:
# utility
import sys
sys.path.append("..")

import utility

# imports
import numpy as np
import json

# modelling
from sklearn.neighbors import LocalOutlierFactor
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.utils import shuffle
from sklearn.model_selection import ParameterGrid

# Tree Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# misc
import os




In [3]:
train_data, test_data, train_labels, test_labels, test_true_labels, anom_data = utility.preprocess_data(False, n_anomalies=345, n_samples=1000)

345
Splitting data
normalize data
Reshape data
Convert data
Add anomalies


In [4]:
# Define parameter grid
param_grid = {
    'n_neighbors': [5, 10, 15],
    'algorithm': ['auto', 'brute', 'ball_tree'],
    'leaf_size': [10, 30, 50],
    'metric': ['euclidean', 'manhattan', 'minkowski'],
    'contamination': [0.0004]
}

In [5]:
# Store results in a list
results = []

for params in ParameterGrid(param_grid):
    model = LocalOutlierFactor(**params, novelty=True)

    train_data_generator = utility.data_generator(train_data)

    for batch in train_data_generator:
        model.fit(batch)

    scores_anom = model.decision_function(test_data)
    
    # Calculate AUC score using roc_auc_score
    auc_score = roc_auc_score(test_true_labels, scores_anom)
    results.append({'params': params, 'auc_score': auc_score})
    

# Sort results from best to worst
results = sorted(results, key=lambda x: x['auc_score'], reverse=True)

# Print results
for i, result in enumerate(results, start=1):
    print(f"Rank {i}: AUC Score {result['auc_score']:.4f} - Parameters: {result['params']}")

KeyboardInterrupt: 

In [None]:
# Rank 1: AUC Score 0.4780 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'euclidean', 'n_neighbors': 5}
# Rank 2: AUC Score 0.4780 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'minkowski', 'n_neighbors': 5}
# Rank 3: AUC Score 0.4780 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'euclidean', 'n_neighbors': 5}
# Rank 4: AUC Score 0.4780 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'minkowski', 'n_neighbors': 5}
# Rank 5: AUC Score 0.4780 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'euclidean', 'n_neighbors': 5}
# Rank 6: AUC Score 0.4780 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'minkowski', 'n_neighbors': 5}
# Rank 7: AUC Score 0.4780 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'euclidean', 'n_neighbors': 5}
# Rank 8: AUC Score 0.4780 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'minkowski', 'n_neighbors': 5}
# Rank 9: AUC Score 0.4780 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'euclidean', 'n_neighbors': 5}
# Rank 10: AUC Score 0.4780 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'minkowski', 'n_neighbors': 5}
# Rank 11: AUC Score 0.4780 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'euclidean', 'n_neighbors': 5}
# Rank 12: AUC Score 0.4780 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'minkowski', 'n_neighbors': 5}
# Rank 13: AUC Score 0.4780 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'euclidean', 'n_neighbors': 5}
# Rank 14: AUC Score 0.4780 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'minkowski', 'n_neighbors': 5}
# Rank 15: AUC Score 0.4780 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'euclidean', 'n_neighbors': 5}
# Rank 16: AUC Score 0.4780 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'minkowski', 'n_neighbors': 5}
# Rank 17: AUC Score 0.4780 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'euclidean', 'n_neighbors': 5}
# Rank 18: AUC Score 0.4780 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'minkowski', 'n_neighbors': 5}
# Rank 19: AUC Score 0.4105 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'manhattan', 'n_neighbors': 5}
# Rank 20: AUC Score 0.4105 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'manhattan', 'n_neighbors': 5}
# Rank 21: AUC Score 0.4105 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'manhattan', 'n_neighbors': 5}
# Rank 22: AUC Score 0.4105 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'manhattan', 'n_neighbors': 5}
# Rank 23: AUC Score 0.4105 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'manhattan', 'n_neighbors': 5}
# Rank 24: AUC Score 0.4105 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'manhattan', 'n_neighbors': 5}
# Rank 25: AUC Score 0.4105 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'manhattan', 'n_neighbors': 5}
# Rank 26: AUC Score 0.4105 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'manhattan', 'n_neighbors': 5}
# Rank 27: AUC Score 0.4105 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'manhattan', 'n_neighbors': 5}
# Rank 28: AUC Score 0.4068 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'euclidean', 'n_neighbors': 10}
# Rank 29: AUC Score 0.4068 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'minkowski', 'n_neighbors': 10}
# Rank 30: AUC Score 0.4068 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'euclidean', 'n_neighbors': 10}
# Rank 31: AUC Score 0.4068 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'minkowski', 'n_neighbors': 10}
# Rank 32: AUC Score 0.4068 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'euclidean', 'n_neighbors': 10}
# Rank 33: AUC Score 0.4068 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'minkowski', 'n_neighbors': 10}
# Rank 34: AUC Score 0.4068 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'euclidean', 'n_neighbors': 10}
# Rank 35: AUC Score 0.4068 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'minkowski', 'n_neighbors': 10}
# Rank 36: AUC Score 0.4068 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'euclidean', 'n_neighbors': 10}
# Rank 37: AUC Score 0.4068 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'minkowski', 'n_neighbors': 10}
# Rank 38: AUC Score 0.4068 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'euclidean', 'n_neighbors': 10}
# Rank 39: AUC Score 0.4068 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'minkowski', 'n_neighbors': 10}
# Rank 40: AUC Score 0.4068 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'euclidean', 'n_neighbors': 10}
# Rank 41: AUC Score 0.4068 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'minkowski', 'n_neighbors': 10}
# Rank 42: AUC Score 0.4068 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'euclidean', 'n_neighbors': 10}
# Rank 43: AUC Score 0.4068 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'minkowski', 'n_neighbors': 10}
# Rank 44: AUC Score 0.4068 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'euclidean', 'n_neighbors': 10}
# Rank 45: AUC Score 0.4068 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'minkowski', 'n_neighbors': 10}
# Rank 46: AUC Score 0.3771 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'euclidean', 'n_neighbors': 15}
# Rank 47: AUC Score 0.3771 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'minkowski', 'n_neighbors': 15}
# Rank 48: AUC Score 0.3771 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'euclidean', 'n_neighbors': 15}
# Rank 49: AUC Score 0.3771 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'minkowski', 'n_neighbors': 15}
# Rank 50: AUC Score 0.3771 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'euclidean', 'n_neighbors': 15}
# Rank 51: AUC Score 0.3771 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'minkowski', 'n_neighbors': 15}
# Rank 52: AUC Score 0.3771 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'euclidean', 'n_neighbors': 15}
# Rank 53: AUC Score 0.3771 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'minkowski', 'n_neighbors': 15}
# Rank 54: AUC Score 0.3771 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'euclidean', 'n_neighbors': 15}
# Rank 55: AUC Score 0.3771 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'minkowski', 'n_neighbors': 15}
# Rank 56: AUC Score 0.3771 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'euclidean', 'n_neighbors': 15}
# Rank 57: AUC Score 0.3771 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'minkowski', 'n_neighbors': 15}
# Rank 58: AUC Score 0.3771 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'euclidean', 'n_neighbors': 15}
# Rank 59: AUC Score 0.3771 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'minkowski', 'n_neighbors': 15}
# Rank 60: AUC Score 0.3771 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'euclidean', 'n_neighbors': 15}
# Rank 61: AUC Score 0.3771 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'minkowski', 'n_neighbors': 15}
# Rank 62: AUC Score 0.3771 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'euclidean', 'n_neighbors': 15}
# Rank 63: AUC Score 0.3771 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'minkowski', 'n_neighbors': 15}
# Rank 64: AUC Score 0.3360 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'manhattan', 'n_neighbors': 10}
# Rank 65: AUC Score 0.3360 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'manhattan', 'n_neighbors': 10}
# Rank 66: AUC Score 0.3360 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'manhattan', 'n_neighbors': 10}
# Rank 67: AUC Score 0.3360 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'manhattan', 'n_neighbors': 10}
# Rank 68: AUC Score 0.3360 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'manhattan', 'n_neighbors': 10}
# Rank 69: AUC Score 0.3360 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'manhattan', 'n_neighbors': 10}
# Rank 70: AUC Score 0.3360 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'manhattan', 'n_neighbors': 10}
# Rank 71: AUC Score 0.3360 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'manhattan', 'n_neighbors': 10}
# Rank 72: AUC Score 0.3360 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'manhattan', 'n_neighbors': 10}
# Rank 73: AUC Score 0.3084 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'manhattan', 'n_neighbors': 15}
# Rank 74: AUC Score 0.3084 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'manhattan', 'n_neighbors': 15}
# Rank 75: AUC Score 0.3084 - Parameters: {'algorithm': 'auto', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'manhattan', 'n_neighbors': 15}
# Rank 76: AUC Score 0.3084 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'manhattan', 'n_neighbors': 15}
# Rank 77: AUC Score 0.3084 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'manhattan', 'n_neighbors': 15}
# Rank 78: AUC Score 0.3084 - Parameters: {'algorithm': 'brute', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'manhattan', 'n_neighbors': 15}
# Rank 79: AUC Score 0.3084 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 10, 'metric': 'manhattan', 'n_neighbors': 15}
# Rank 80: AUC Score 0.3084 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 30, 'metric': 'manhattan', 'n_neighbors': 15}
# Rank 81: AUC Score 0.3084 - Parameters: {'algorithm': 'ball_tree', 'contamination': 0.1, 'leaf_size': 50, 'metric': 'manhattan', 'n_neighbors': 15}