### CUSUM of AUCs using Detecta package

This is an implementation of detecting a sudden drift scenario using the CUSUM algorithm from the detecta package.


Day 0-99 are samples from one distribution which has AUCs 0.86 and Day 100-199 are samples from another distribution with classificatiAUCs 0.80


In [1]:
# Imports
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm # Colormaps
import matplotlib.gridspec as gridspec
from mpl_toolkits.axes_grid1 import make_axes_locatable
import seaborn as sns

sns.set_style('darkgrid')
np.random.seed(42)
#

In [2]:
# Load Data - AUC 0.86 and  AUC 0.80
data86 = np.load('d0-AUC.86.npy')
data80 = np.load('d5-AUC.80.npy')

In [3]:
#print(data86.shape)
#print(data80.shape)

In [None]:
import sklearn
#print(samples.shape)
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('always') 
warnings.filterwarnings('ignore')

#--------------------------------------------------------
# Train Classifier - MLP
#--------------------------------------------------------
samples = data86[:,[0,1]]
labels = data86[:,2]


X_train, X_test, y_train, y_test = train_test_split(samples, labels, test_size=0.20, random_state=5)
print("Train",X_train.shape)
print("Test",X_test.shape)
mlp = MLPClassifier(hidden_layer_sizes=(2,4,4,1), activation='relu', solver='adam', max_iter=2000,learning_rate_init=0.001,learning_rate="constant",random_state=4,shuffle=True,batch_size=8)
mlp.fit(X_train,y_train)
predict_train = mlp.predict(X_train)
#predict_test = mlp.predict(X_test)

#Evaluate the Model
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_train,predict_train))
print(classification_report(y_train,predict_train))

tn, fp, fn, tp = confusion_matrix(y_train,predict_train).ravel()
specificity = tn / (tn+fp)

AUC = roc_auc_score(y_train,predict_train)
print("Specificity:", specificity)
print("AUC:", AUC)
#print(predict_train.shape)

Train (16800, 2)
Test (4200, 2)


In [None]:
import random

arr = np.empty((0,2), int)
print(arr.shape)
X_test.shape[0]
#help(np.random.choice)
number_of_rows = X_test.shape[0]
random_indices = np.random.choice(number_of_rows, 
                                  size=10, 
                                  replace=False)
#random_indices
#print(random_indices[[0]])
X_test[random_indices,:].shape

In [None]:
# Random samples from test set (day0-day99)
# import modules
import random
test_days = 0
test_AUC =  np.array([])
while (test_days < 100):
    test_samples = np.array([])
    test_labels = np.array([])
    

    number_of_rows = X_test.shape[0]
    random_indices = np.random.choice(number_of_rows, 
                                  size=50, 
                                  replace=False)
    
    
    test_samples = X_test[random_indices,:]
    test_labels  = y_test[random_indices]
    #test_samples.shape
    #print(y_test.shape)
    #test_labels

    predict_test = mlp.predict(test_samples)

    #print(confusion_matrix(test_labels,predict_test))
    #print(classification_report(test_labels,predict_test))
    tn, fp, fn, tp = confusion_matrix(test_labels,predict_test).ravel()
    specificity = tn / (tn+fp)


    AUC = roc_auc_score(test_labels,predict_test)
    #print("Specificity:", specificity)
    #print("AUC:", AUC)

    test_AUC = np.append(test_AUC, AUC)
    #print("day",test_days)
    #AUC
    test_days += 1

In [None]:
test_AUC.shape

In [None]:
# Samples from the second distribution -  (day100-199)
new_samples = data80[:,[0,1]]
new_labels = data80[:,2]

X_train, X_test80, y_train, y_test80 = train_test_split(new_samples, new_labels, test_size=0.20, random_state=5)

while (test_days < 200):
    test_samples80 = np.array([])
    test_labels80 = np.array([])
    

    number_of_rows80 = X_test80.shape[0]
    random_indices80 = np.random.choice(number_of_rows80, 
                                  size=50, 
                                  replace=False)
    
    
    test_samples80 = X_test80[random_indices80,:]
    test_labels80  = y_test80[random_indices80]
    #test_samples.shape
    #print(y_test.shape)
    #test_labels

    predict_test80 = mlp.predict(test_samples80)

    #print(confusion_matrix(test_labels,predict_test))
    #print(classification_report(test_labels,predict_test))
    tn80, fp80, fn80, tp80 = confusion_matrix(test_labels80,predict_test80).ravel()
    specificity80 = tn80 / (tn80+fp80)


    AUC80 = roc_auc_score(test_labels80,predict_test80)
    #print("Specificity:", specificity)
    #print("AUC:", AUC)

    test_AUC = np.append(test_AUC, AUC80)
    #print("day",test_days)
    #AUC
    test_days += 1

In [None]:
test_AUC.shape

In [None]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

import detecta
from detecta import detect_cusum

ta, tai, taf, amp = detect_cusum(test_AUC, 0.05, 0.08, True, True)

In [None]:
# Simulate 1000 of the above runs - Samples from A86 from day 0-99 and A80 from day100-199
runs = 0
test_AUC =  np.array([])
while (runs < 10):
    test_days = 0
    
    while (test_days < 100):     #day0-99 from A86
        test_samples = np.array([])
        test_labels = np.array([])
    

        number_of_rows = X_test.shape[0]
        random_indices = np.random.choice(number_of_rows, 
                                  size=50, 
                                  replace=False)
    
    
        test_samples = X_test[random_indices,:]
        test_labels  = y_test[random_indices]
        #test_samples.shape
        #print(y_test.shape)
        #test_labels

        predict_test = mlp.predict(test_samples)

        #print(confusion_matrix(test_labels,predict_test))
        #print(classification_report(test_labels,predict_test))
        tn, fp, fn, tp = confusion_matrix(test_labels,predict_test).ravel()
        specificity = tn / (tn+fp)


        AUC = roc_auc_score(test_labels,predict_test)
        #print("Specificity:", specificity)
        #print("AUC:", AUC)

        test_AUC = np.append(test_AUC, AUC)
        #print("day",test_days)
        #AUC
        test_days += 1
    
    while (test_days < 200):
        test_samples80 = np.array([])
        test_labels80 = np.array([])
    

        number_of_rows80 = X_test80.shape[0]
        random_indices80 = np.random.choice(number_of_rows80, 
                                  size=50, 
                                  replace=False)
    
    
        test_samples80 = X_test80[random_indices80,:]
        test_labels80  = y_test80[random_indices80]
        #test_samples.shape
        #print(y_test.shape)
        #test_labels

        predict_test80 = mlp.predict(test_samples80)

        #print(confusion_matrix(test_labels,predict_test))
        #print(classification_report(test_labels,predict_test))
        tn80, fp80, fn80, tp80 = confusion_matrix(test_labels80,predict_test80).ravel()
        specificity80 = tn80 / (tn80+fp80)


        AUC80 = roc_auc_score(test_labels80,predict_test80)
        #print("Specificity:", specificity)
        #print("AUC:", AUC)

        test_AUC = np.append(test_AUC, AUC80)
        #print("day",test_days)
        #AUC
        test_days += 1
    runs += 1

In [None]:
test_AUC.shape
# Write test_AUCs to npy
#np.save('test_AUC-Day0-2000.npy', test_AUC)

In [None]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

import detecta
from detecta import detect_cusum

ta, tai, taf, amp = detect_cusum(test_AUC, 0.09, 0.08, True, True)