### Import libraries

In [6]:
import time
import json
from capymoa.stream import NumpyStream
from src import iids_util
from typing import List, Dict, Union, Literal
from base import util
import pandas as pd
import os




### Loading sample datasets

In [11]:
os.path.exists('precalc/edge_fridge_StandardScaler_60.npz')

False

In [5]:
len(set(x) - set(y))

0

In [None]:
feature, target, header = iids_util.loading_edge_dataset(
    base_device='modbus', 
    all_in_fusion=True,
    load_all=False,
    low_memory=False,
    sample_size=.1,
    random_seed=80)

scaled = iids_util.online_normalization(data=feature,
                                        window_size=60,
                                        scaler_model='StandardScaler')

target = iids_util.map_as_binary_class(ntarget=target,
                                       class_0='normal')

feature dtype:  float64


### Make dataset as streaming

In [3]:

stream_cls = NumpyStream(X=scaled, y=target)


### Draws simulation

In [None]:
# Function for Classifier Models
def run_classifier(stream_cls: NumpyStream,
                   model: str,
                   random_seed: int =1,
                   time_limit: int =120):
    
    learner = iids_util.load_classifier_model(stream=stream_cls,
                                              method=model,
                                              random_seed=random_seed)
    
    stream_cls.restart()
    instance_seen = 0 # counter for evaluated instances
    err_instances = 0
    class_0_count = 0 # counter for predicted class as 0
    class_1_count = 0 # counter for predicted class as 1
    y_true = [] # Actual label from groundtruth
    y_pred = [] # Predicted label by model
    start = time.time() # runtime started at the beginning of the test-then-train loops

    while stream_cls.has_more_instances():
        curr_ins = stream_cls.next_instance()
        try:
            # test
            predict = learner.predict(curr_ins)
            # If model predict None, then always predict majority class
            if class_1_count > class_0_count:
                majority_class = 1
            else:
                majority_class = 0

            if predict == None:
                predict = majority_class
            
            # update majority class counter
            if predict == 0:
                class_0_count += 1
            elif predict == 1:
                class_1_count += 1

            # evaluator.update(curr_ins.y_index, predict)
            y_true.append(curr_ins.y_index)
            y_pred.append(predict)
            # train
            learner.train(curr_ins)
            instance_seen += 1
        except:
            print("Error Instance:", curr_ins, end='\r', flush=True)
            err_instances += 1

        if (instance_seen % 100 == 0) or (stream_cls.has_more_instances() == False):
            meter = instance_seen / stream_cls._len
            msg = util.progress_meter(progress=meter)
            print(f"{msg}. {model}: instance_seen:{instance_seen:,}. Error instances: {err_instances:,}", end='\r', flush=True)

        # check if run time is too long
        runtime = time.time() - start
        if runtime > time_limit:
            evaluator = iids_util.evaluation_metrics(y_pred=y_pred, y_true=y_true)
            evaluator.update({'instance_seen': instance_seen})
            evaluator.update({'runtime': round(runtime,3)})
            return evaluator

    runtime = time.time() - start
    evaluator = iids_util.evaluation_metrics(y_pred=y_pred, y_true=y_true)
    evaluator.update({'instance_seen': instance_seen})
    evaluator.update({'runtime': round(runtime,3)})

    return evaluator

# Function for Anomaly Detector
def run_detector(stream_cls: NumpyStream,
                 model: str,
                 random_seed: int =1,
                 time_limit: int =120):
    learner = iids_util.load_anomaly_model(stream=stream_cls,
                                           method=model,
                                           random_seed=random_seed)
    stream_cls.restart()
    instance_seen = 0 # counter for evaluated instances
    err_instances = 0
    y_true = [] # Actual label from groundtruth
    y_pred = [] # Predicted label by model
    start = time.time() # runtime started at the beginning of the test-then-train loops
    while stream_cls.has_more_instances():
        try:
            curr_ins = stream_cls.next_instance()
            # test
            score = learner.score_instance(curr_ins)
            y_scores = [score, score, score]
            y_models = iids_util.proba_prediction_rules(nscore=y_scores)
            y_predict = iids_util.voting_decision(npredicts=y_models)
            # train
            learner.train(curr_ins)
            # update results
            y_true.append(curr_ins.y_index)
            y_pred.append(y_predict)
            instance_seen += 1
        except:
            print("Error Instance:", curr_ins, end='\r', flush=True)
            err_instances += 1
        
        if (instance_seen % 100 == 0) or (stream_cls.has_more_instances() == False):
            meter = instance_seen / stream_cls._len
            msg = util.progress_meter(progress=meter)
            print(f"{msg}. {model}: instance_seen:{instance_seen:,}. Error Instances: {err_instances:,}", end='\r', flush=True)

        # check if run time is too long
        runtime = time.time() - start
        if runtime > time_limit:
            evaluator = iids_util.evaluation_metrics(y_pred=y_pred, y_true=y_true)
            evaluator.update({'instance_seen': instance_seen})
            evaluator.update({'runtime': round(runtime,3)})
            return evaluator
        
    runtime = time.time() - start
    evaluator = iids_util.evaluation_metrics(y_pred=y_pred, y_true=y_true)
    evaluator.update({'instance_seen': instance_seen})
    evaluator.update({'runtime': round(runtime,3)})
    
    return evaluator

### Run simulations - Classifier Models
Available models were inherit from CapyMOA.classifier

In [None]:
classifier_models = ["AdaptiveRandomForestClassifier", "DynamicWeightedMajority", "EFDT",
          "HoeffdingAdaptiveTree", "KNN", "LeveragingBagging", "NaiveBayes",
          "OnlineAdwinBagging", "OnlineBagging", "OnlineSmoothBoost",
          "OzaBoost", "PassiveAggressiveClassifier", "SGDClassifier",
          "StreamingGradientBoostedTrees", "StreamingRandomPatches",
          "HoeffdingTree"]

classifier_evaluator = {}

for model in classifier_models:
    evaluator = run_classifier(stream_cls=stream_cls,
                               model=model,
                               random_seed=80,
                               time_limit=120)
    
    classifier_evaluator.update({model: evaluator})
    del evaluator

eval_fname = f'output/Classifier_Evaluation_Table.json'
with open(eval_fname, 'w') as file:
    json.dump(classifier_evaluator, file)

c_table = pd.DataFrame()
for model in classifier_evaluator.keys():
    for metrics in classifier_evaluator.get(model).keys():
        c_table.loc[model, metrics] = classifier_evaluator.get(model).get(metrics)

c_table = c_table.sort_values('MCC', ascending=False).reset_index(names='Classifier')
c_table

### Run simulations - Anomaly Models
Available models were inherit from CapyMOA.anomaly

In [None]:
anomaly_models = ["Autoencoder","HalfSpaceTrees", "OnlineIsolationForest",
          "StreamRHF", "StreamingIsolationForest", "RobustRandomCutForest",
          "AdaptiveIsolationForest"]

detector_evaluator = {}
stream_cls.restart()

for model in anomaly_models:
    evaluator = run_detector(stream_cls=stream_cls,
                             model=model,
                             random_seed=80,
                             time_limit=120)
    
    detector_evaluator.update({model: evaluator})
    del evaluator

eval_fname = f'output/Anomaly_Evaluation_Table.json'
with open(eval_fname, 'w') as file:
    json.dump(detector_evaluator, file)

a_table = pd.DataFrame()
for model in detector_evaluator.keys():
    for metrics in detector_evaluator.get(model).keys():
        a_table.loc[model, metrics] = detector_evaluator.get(model).get(metrics)

a_table = a_table.sort_values('MCC', ascending=False).reset_index(names='Detector')
a_table