<a href="https://colab.research.google.com/github/Ramesh-Arvind/AI/blob/master/ensemble_anomaly.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
# Importing necessary packages
import numpy as np
from numpy import percentile
import matplotlib.pyplot as plt
import matplotlib.font_manager
from scipy.optimize import minimize

In [16]:
# Importing all models
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.ocsvm import OCSVM
from pyod.models.deep_svdd import DeepSVDD
from pyod.models.auto_encoder import AutoEncoder
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
from pyod.utils.example import visualize
from sklearn import metrics


In [8]:
# Importing cardio.npz data set
cardio = np.load('/content/cardio.npz')
lst = cardio.files
x,tx,ty=cardio['x'],cardio['tx'],cardio['ty']
# for item in lst:
#     print(item)
#     print(data[item])

In [23]:
# Collecting Train and Test Scores
n_clf = 5
train_scores = np.zeros([len(cardio['x']), n_clf])
test_scores = np.zeros([len(cardio['tx']), n_clf])

In [24]:
# Fitting Auto-Encoder
ae = AutoEncoder(epochs=30, contamination=0.1, hidden_neurons=[60,10,60])
ae.fit(cardio['x'])
ae_train_pred = ae.labels_
ae_train_scores = ae.decision_scores_
ae_test_pred = ae.predict(cardio['tx'])  # outlier labels (0 or 1)
ae_test_scores = ae.decision_function(cardio['tx'])  # outlier scores
train_scores[:, 3] = ae_train_scores
test_scores[:, 3] = ae_test_scores
print("AE ROC AUC score", round(metrics.roc_auc_score(cardio['ty'], ae_test_scores),4))

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 21)                462       
                                                                 
 dropout_5 (Dropout)         (None, 21)                0         
                                                                 
 dense_9 (Dense)             (None, 21)                462       
                                                                 
 dropout_6 (Dropout)         (None, 21)                0         
                                                                 
 dense_10 (Dense)            (None, 60)                1320      
                                                                 
 dropout_7 (Dropout)         (None, 60)                0         
                                                                 
 dense_11 (Dense)            (None, 10)               

In [25]:
# Fitting Deep One-Class Classification
dsvdd = DeepSVDD(use_ae=False, epochs=5, contamination=0.1, random_state=10)
dsvdd.fit(cardio['x'])
dsvdd_train_pred = dsvdd.labels_
dsvdd_train_scores = dsvdd.decision_scores_
dsvdd_test_pred = dsvdd.predict(cardio['tx'])  # outlier labels (0 or 1)
dsvdd_test_scores = dsvdd.decision_function(cardio['tx'])  # outlier scores
train_scores[:, 3] = dsvdd_train_scores
test_scores[:, 3] = dsvdd_test_scores
print("DSVDD ROC AUC score", round(metrics.roc_auc_score(cardio['ty'], dsvdd_test_scores),4))

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 21)]              0         
                                                                 
 dense_15 (Dense)            (None, 64)                1344      
                                                                 
 net_output (Dense)          (None, 32)                2048      
                                                                 
 tf.math.subtract_3 (TFOpLam  (None, 32)               0         
 bda)                                                            
                                                                 
 tf.math.pow_3 (TFOpLambda)  (None, 32)                0         
                                                                 
 tf.math.reduce_sum_3 (TFOpL  (None,)                  0         
 ambda)                                                    

In [26]:
# Fitting Isolation Forest
iof = IForest()
iof.fit(cardio['x'])
iof_train_pred = iof.labels_
iof_train_scores = iof.decision_scores_
iof_test_pred = iof.predict(cardio['tx'])  # outlier labels (0 or 1)
iof_test_scores = iof.decision_function(cardio['tx'])  # outlier scores
train_scores[:, 2] = iof_train_scores
test_scores[:, 2] = iof_test_scores
print("Isolation Forest ROC AUC score", round(metrics.roc_auc_score(cardio['ty'], iof_test_scores),4))

Isolation Forest ROC AUC score 0.9508


In [27]:
# Fitting K Nearest Neighbors
knn = KNN()
knn.fit(cardio['x'])
knn_train_pred = knn.labels_
knn_train_scores = knn.decision_scores_
knn_test_pred = knn.predict(cardio['tx'])  # outlier labels (0 or 1)
knn_test_scores = knn.decision_function(cardio['tx'])  # outlier scores
train_scores[:, 1] = knn_train_scores
test_scores[:, 1] = knn_test_scores
print("KNN ROC AUC score", round(metrics.roc_auc_score(cardio['ty'], knn_test_scores),4))

KNN ROC AUC score 0.927


In [28]:
# Fitting One-class Support Vector Machine
ovm = OCSVM()
ovm.fit(cardio['x'])
ovm_train_pred = ovm.labels_
ovm_train_scores = ovm.decision_scores_
ovm_test_pred = ovm.predict(cardio['tx'])  # outlier labels (0 or 1)
ovm_test_scores = ovm.decision_function(cardio['tx'])  # outlier scores
train_scores[:, 3] = ovm_train_scores
test_scores[:, 3] = ovm_test_scores
print("OCSVM ROC AUC score", round(metrics.roc_auc_score(cardio['ty'], ovm_test_scores),4))

OCSVM ROC AUC score 0.9415


In [29]:
# Averaging scores
avg_test_scores = (np.array(ae_test_scores) + np.array(dsvdd_test_scores) + np.array(iof_test_scores)
                   + np.array(knn_test_scores) + np.array(ovm_test_scores)) / 5.0
print("Average ROC AUC score", round(metrics.roc_auc_score(cardio['ty'], avg_test_scores),4))

Average ROC AUC score 0.944


In [30]:
# Maximizing scores
max_test_scores = np.fmax((np.fmax((np.fmax(np.array(ae_test_scores), np.array(dsvdd_test_scores))),
                          (np.fmax(np.array(iof_test_scores), np.array(knn_test_scores))))),
                            (np.array(ovm_test_scores)))
print("Max ROC AUC score", round(metrics.roc_auc_score(cardio['ty'], max_test_scores),4))

Max ROC AUC score 0.9415


In [31]:
# Creating Score Matrix for each data point corresponding to each algorithm
model_scores = np.stack((ae_test_scores, dsvdd_test_scores, iof_test_scores, knn_test_scores,  ovm_test_scores))
np.array(model_scores).shape

(5, 352)

In [73]:
n_iterations = 100
best_weights_overall = None
best_roc_auc_overall = -np.inf

def objective(weights):
    # Linear combination of the test scores using the weights
    combined_test_scores = np.dot(weights.T,model_scores)
    # ROC AUC score of the combined test scores
    roc_auc = metrics.roc_auc_score(cardio['ty'], combined_test_scores)
    return -roc_auc

for iteration in range(n_iterations):
    initial_weights = np.random.rand(n_clf)
    initial_weights = initial_weights / np.sum(initial_weights)
    bounds = [(0, 1) for _ in range(len(initial_weights))]
    result = minimize(objective, initial_weights, bounds=bounds, constraints={"type": "eq", "fun": lambda w: 1 - sum(w)}, method='SLSQP')
    if -result.fun > best_roc_auc_overall:
        best_weights_overall = result.x
        best_roc_auc_overall = -result.fun
    print("Iteration {}: Best Weights: {} Best ROC_AUC: {}".format(iteration,best_weights_overall,best_roc_auc_overall))

best_algo_index = np.argmax(best_weights_overall)
print("The best algorithm is: Algorithm {} with weight: {}".format(best_algo_index+1, best_weights_overall[best_algo_index]))


Iteration 0: Best Weights: [0.01538423 0.28505202 0.1135283  0.27027257 0.31576288] Best ROC_AUC: 0.9427298553719009
Iteration 1: Best Weights: [0.28832167 0.00183928 0.39026448 0.21401127 0.1055633 ] Best ROC_AUC: 0.9454093491735537
Iteration 2: Best Weights: [0.28832167 0.00183928 0.39026448 0.21401127 0.1055633 ] Best ROC_AUC: 0.9454093491735537
Iteration 3: Best Weights: [0.28832167 0.00183928 0.39026448 0.21401127 0.1055633 ] Best ROC_AUC: 0.9454093491735537
Iteration 4: Best Weights: [0.28832167 0.00183928 0.39026448 0.21401127 0.1055633 ] Best ROC_AUC: 0.9454093491735537
Iteration 5: Best Weights: [0.28832167 0.00183928 0.39026448 0.21401127 0.1055633 ] Best ROC_AUC: 0.9454093491735537
Iteration 6: Best Weights: [0.28832167 0.00183928 0.39026448 0.21401127 0.1055633 ] Best ROC_AUC: 0.9454093491735537
Iteration 7: Best Weights: [0.28832167 0.00183928 0.39026448 0.21401127 0.1055633 ] Best ROC_AUC: 0.9454093491735537
Iteration 8: Best Weights: [0.28832167 0.00183928 0.39026448 0.2