## Intro
This code will show the whole code flow of our proposed ADANS method. For the Kyoto 2006+ dataset, the Anomaly Detector in ADANS uses the AutoEncoder anomaly detection model.


In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook
import sys
sys.path.append('../moudles/')
sys.path.append('../')
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from moudles import AE
from moudles.ShiftDetector import ShiftDetector
from moudles.DANN import DANN
import myutils as utils
import random
from moudles.RepSampleSelector import Screener

## Prepare AutoEncoder model and data

In [2]:
utils.set_random_seed()
feat = np.load('data/2008.npz')
X, y = feat['X'], feat['y']
X_ben = X[y==0]
train_num=50000
X_train = X_ben[:train_num]
scaler = MinMaxScaler().fit(X_train)
X_train = scaler.transform(X_train)
feature_size=X_train.shape[-1]
model,thres= AE.train(X_train,feature_size)

epoch:0/10 |Loss: 0.08840341866016388
epoch:1/10 |Loss: 0.08830832690000534
epoch:2/10 |Loss: 0.08753897249698639
epoch:3/10 |Loss: 0.08715357631444931
epoch:4/10 |Loss: 0.08621897548437119
epoch:5/10 |Loss: 0.085350900888443
epoch:6/10 |Loss: 0.084992915391922
epoch:7/10 |Loss: 0.08500108867883682
epoch:8/10 |Loss: 0.08406953513622284
epoch:9/10 |Loss: 0.0839557871222496
max AD score 0.39479694


In [3]:
FEAT_0 = np.load('data/2008.npz')
X_0, y_0 = scaler.transform(FEAT_0['X']), FEAT_0['y']
FEAT_1 = np.load('data/2011.npz')
X_1, y_1 = scaler.transform(FEAT_1['X']), FEAT_1['y']

## See how AE performs on new data (data where normality shifts occur) and old data (data where normality shifts do not occur)

In [4]:
print('****************************** Before Normality Shift occurs ******************************')
y_pred_0, y_prob_0 = AE.test(model, thres, X_0)
utils.TPR_FPR(y_prob_0, y_0, thres)


****************************** Before Normality Shift occurs ******************************
*********************** The relevant test indicators are as follows ***********************
FPR (False Positive Rate): 0.01045333333333333
TP: 20583, FP: 3136, TN: 296864, FN: 79417
[Anomaly Class] Precision: 0.8677853197858221, Recall: 0.2058299999999998, F1: 0.3327378979474907
--------------------
Macro Precision: 0.8283638131002213
Macro Recall   : 0.597688333333333
Macro F1-Score : 0.6053344085730953


In [5]:
print('****************************** After Normality Shift occurs ******************************')
y_pred_1, y_prob_1 = AE.test(model, thres, X_1)
utils.TPR_FPR(y_prob_1, y_1, thres)

****************************** After Normality Shift occurs ******************************
*********************** The relevant test indicators are as follows ***********************
FPR (False Positive Rate): 0.060506666666666646
TP: 7825, FP: 18152, TN: 281848, FN: 92175
[Anomaly Class] Precision: 0.3012280093929234, Recall: 0.07824999999999992, F1: 0.12422902590056827
--------------------
Macro Precision: 0.5273929728347846
Macro Recall   : 0.5088716666666664
Macro F1-Score : 0.48027235026941495


**Apparently, the new data shows a 14% decrease in the AUC metric and a significant decrease in the performance of the anomaly detection model.
Next let's use ADANS to solve the problem of anomaly detection models facing normality shift**

## Let's use ADANS！

In [6]:
# 新旧数据各自有30万个正常样本，10万个异常样本
vali_num = 100000
print(len(X_0))
X_0_normal=X_0[y_0==0]
print(len(X_0_normal))
y_0_normal=y_0[y_0==0]
y_prob_0_normal=y_prob_0[y_0==0]
utils.set_random_seed()
# 随机选择10万个样本，旧数据只有正常的，新数据混合有正常和异常的样本
random_sequence_o = random.sample(range(0,len(X_0_normal)), vali_num)
rmse_o = y_prob_0_normal[random_sequence_o]
X_o_normal = X_0_normal[random_sequence_o]
y_o_normal=y_0_normal[random_sequence_o]

random_sequence_n = random.sample(range(0,len(X_1)), vali_num)
X_n = X_1[random_sequence_n]
rmse_n = y_prob_1[random_sequence_n]
y_n=y_1[random_sequence_n]

# Number of anomalous samples included in 100,000 samples of old data
j=0
for i in range(100000):
    if(y_o_normal[i]==1):
        j=j+1
print(j)
# Number of anomalous samples contained in 100,000 samples of new data
m=0
for i in range(100000):
    if(y_n[i]==1):
        m=m+1
print(m)


400000
300000
0
24788


## Normality Shift Detector

In [None]:

X_o_rep_nor_np=X_o_normal
X_n_rep_nor_np=X_n

utils.set_random_seed()
sd = ShiftDetector()

random_sequence_o = random.sample(range(0,len(X_o_rep_nor_np)), len(X_n_rep_nor_np))
X_o_rep_nor_np_len=X_o_rep_nor_np[random_sequence_o]
# print(len(X_o_rep_nor_np_len))
t = utils.get_params('ShiftDetector')['test_thres']
p_value,observed_SCMD = sd.Monte_Carlo_Encoder(model.encoder,X_o_rep_nor_np_len,X_n_rep_nor_np)
if p_value >= t:
    print("No normality shift!", p_value)
else:
    print('Shift! P-value is', p_value)


SCMD为: 1.501585602760315
KL 散度（直方图）: 13.897589294978436
Z_x 均值: tensor([-0.2027,  0.0737, -0.0512, -0.1065])
Z_y 均值: tensor([-0.2032,  0.0718, -0.0519, -0.1058])
Z_x 方差: tensor([1.0796e-05, 6.3963e-05, 1.5110e-05, 5.5867e-06])
Z_y 方差: tensor([2.3154e-01, 1.8806e-04, 1.8880e-01, 3.8912e-02])
Z_x 协方差矩阵:
 tensor([[ 1.0796e-05, -2.1084e-06, -1.0251e-05, -6.2678e-06],
        [-2.1084e-06,  6.3961e-05, -1.7852e-06,  3.7115e-06],
        [-1.0251e-05, -1.7852e-06,  1.5109e-05,  7.0569e-06],
        [-6.2678e-06,  3.7115e-06,  7.0569e-06,  5.5864e-06]])
Z_y 协方差矩阵:
 tensor([[ 2.3152e-01, -4.2277e-03, -2.0906e-01, -9.4896e-02],
        [-4.2277e-03,  1.8804e-04,  3.8045e-03,  1.7394e-03],
        [-2.0906e-01,  3.8045e-03,  1.8878e-01,  8.5688e-02],
        [-9.4896e-02,  1.7394e-03,  8.5688e-02,  3.8902e-02]])


## Normality Shift Screener

In [None]:

old_num = 50000
label_num =10000
labeling_probability = label_num/vali_num
print(labeling_probability)

scranner = Screener(model, X_o_normal, X_n, y_n, old_num, label_num, X_1, observed_SCMD)
result = scranner.select_samples()

In [None]:
X_o_rep_nor=result[0]
X_n_rep_nor=result[2]
print(type(X_o_rep_nor))
print(X_o_rep_nor.shape)

print(type(X_n_rep_nor))
print(X_n_rep_nor.shape)

## Normality Shift Adapter

In [None]:

utils.set_random_seed()
dann=DANN(model,X_o_rep_nor,X_n_rep_nor,feature_size,thres*0.18,labeling_probability)
dann.update_AE()


## Re-testing the performance of the anomaly detection model (AE) on new and old data

In [None]:
%matplotlib inline
print('After ADANS Adaptation @2011:')
y_pred, y_prob = AE.test(dann.updated_AE,thres, X_1)
utils.TPR_FPR(y_prob, y_1, thres)

In [None]:
%matplotlib inline
print('After ADANS Adaptation @2008:')
y_pred, y_prob = AE.test(dann.updated_AE,thres, X_0)
utils.TPR_FPR(y_prob, y_0, thres)