In [1]:
from detectors.FEDD import FEDD
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
def insert_value_to_array(array, value):
    '''
    This function adds a value to a given array
    :param: array: the array to add the value
    :param: value: the value to be added
    :return: the array with the added value
    '''

    if(len(array) == 1):
        aux2 = len(array[0])
        aux = [0] * aux2
        aux[len(array[0])-1] = value
        aux[:len(aux)-1] = array[0][1:]
        array[0] = aux
        array[0] = np.asarray(array[0])
        array[0] = np.column_stack(array[0])

        return array

    else:
        aux2 = len(array)
        aux = [0] * aux2
        aux[len(array)-1] = value
        aux[:len(aux)-1] = array[1:]
        array = aux
        array = np.asarray(array)
        array = np.column_stack(array)

        return array

# Reading Data

Training data is the initial reference data.

In [3]:
data_train = pd.read_csv('./datasets/train/real_53.csv')
data_train = data_train.loc[:, ~data_train.columns.str.contains('^Unnamed')]
data_train

Unnamed: 0,timestamp,value,is_anomaly
0,1,39.234298,0
1,2,39.191655,0
2,3,39.322198,0
3,4,40.124975,0
4,5,39.485182,0
...,...,...,...
725,726,38.147664,0
726,727,37.778743,0
727,728,37.720224,0
728,729,37.856228,0


Testing data is the data that we compare the reference data with.

In [4]:
data_test = pd.read_csv('./datasets/test/real_53.csv')
data_test = data_test.loc[:, ~data_test.columns.str.contains('^Unnamed')]
data_test

Unnamed: 0,timestamp,value,is_anomaly
0,731,35.613821,0
1,732,34.576222,0
2,733,34.067086,0
3,734,32.971614,0
4,735,32.782703,0
...,...,...,...
726,1457,21.622908,1
727,1458,23.093519,1
728,1459,22.667180,1
729,1460,23.114132,1


Extract TS values for testing.

Testing values -> stream

In [5]:
stream = list(data_test.value)

In [6]:
print('TEST DATA LEN',len(stream))

TEST DATA LEN 731


In [7]:
# Adjustable parameters
Lambda = 0.2 # higher dive more importance to recent values, vulnerable to noise and outliers
w = 0.25
c = 0.25
n = 300 # timestamps / adjustable??

In [8]:
def Execute(grafico = None):
    
    # Detector initialization
    # Initialization is done by extracting two consecutive time series of length = len(reference data)-qtd from the initial reference data and computing the features/distances with FEDD
    qtd = 3 # variable for shifting the reference data
    final = len(data_train) 
    
    print(final)
    
    fedd = FEDD(Lambda, w, c)
    
    # extracting features reference data time series 1
    feature_vector_0 = fedd.FE(data_train.value[:final-qtd])
    

    # extracting features reference data time series 2 and computing the distance
    distances_array = []
    for i in range(1, qtd):
        feature_vector = fedd.FE(data_train.value[i:final-qtd+i])
                
        distance = fedd.compute_distance(feature_vector_0, feature_vector)
        distances_array.append(distance)
    
    # initializing M0
    fedd.storing_agv_dev(feature_vector_0, np.mean(distances_array), np.std(distances_array))
    
    
    drift_occured = False # variable that indicates when drift is detected

    reference_data = list(data_train.value) # the initial reference data (can be changed)
    
    indexes_drifts = [] # variable for storing the index of drifts
    indexes_alarms = [] # variable for storing the index of warnings
    
    # this method works in an online fashion (it has a sliding window of one sample)
    for i in tqdm(range(1, len(stream))):
        
        
        if(drift_occured == False):
        
            # Sliding Window technique
            # inserting a value from the stream to the initial reference data and forgeting the first value of it
            time_series_window = insert_value_to_array(reference_data, stream[i])
            reference_data = time_series_window
            

            # extracting features from the current time series
            current_feature_vector = fedd.FE(time_series_window[0])


            # computing the distance
            distance = fedd.compute_distance(fedd.initial_feature_vector, current_feature_vector)
            
            

            # updating the distance average and standard deviation of ewma
            fedd.update_ewma(distance, i+1)
            
            # monitor changes
            string_fedd = fedd.monitoring()
            
            
            if(string_fedd == fedd.alert):
                if(grafico == True):
                    print("[%d] Warning" % (i))
                indexes_alarms.append(i)

            if(string_fedd == fedd.change):
                if(grafico == True):
                    print("[%d] Change detected" % (i))
                indexes_drifts.append(i)

                drift_occured = True

        else:
            
            # if drift occured before perform initialization after shifting the TS with n to avoid including too much of the old concept
            if(i < indexes_drifts[len(indexes_drifts)-1] + n):
                time_series_window = insert_value_to_array(reference_data, stream[i])
                reference_data = time_series_window
            else:
                
                                
                fedd = FEDD(Lambda, w, c)
                final = len(time_series_window[0])
                qtd = 3
                
                # reinitialization on the new TS
                
                feature_vector_0 = fedd.FE(reference_data[0][:final-qtd])
                
                
                distances_array = []
                
                # computing distances
                for i in range(1, qtd):
                    vetor_caracteristicas = fedd.FE(reference_data[0][i:final-qtd+i])
                    distance = fedd.compute_distance(feature_vector_0, vetor_caracteristicas)
                    distances_array.append(distance)
                
                # updating the distance average and standard deviation of ewma 
                fedd.storing_agv_dev(feature_vector_0, np.mean(distances_array), np.std(distances_array))
                
                drift_occured = False

    return indexes_drifts, indexes_alarms

In [12]:
detected, alarms = Execute(grafico = True)

730


 31%|██████████████████████████████████████████████████████▍                                                                                                                         | 226/730 [00:00<00:00, 655.77it/s]

[120] Change detected


 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                        | 627/730 [00:00<00:00, 931.93it/s]

[426] Change detected


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 730/730 [00:00<00:00, 783.26it/s]


In [13]:
detected

[120, 426]

In [15]:
alarms

[]