## ARF Evaluations

In [None]:
from prequential import run_prequential

from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.drift_detection.adwin import ADWIN

from float.feature_selection import FIRES, OFS           # online feature methods
from skmultiflow.data import FileStream             # create stream from file

import pandas as pd

In [None]:
data_loader = FileStream(filepath='../merged_cesnet.csv')

In [None]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()

In [None]:
arf_params = {
    'n_estimators': 6,
    'max_features': "auto",
    'drift_detection_method': ADWIN(0.9),
    'warning_detection_method': ADWIN(0.9),
    'grace_period': 25,
    'split_criterion': "gini",
    'split_confidence': 0.01,
    'tie_threshold': 0.01,
    'leaf_prediction': "nba"
}

fires_params = {
    'penalty_s': 0.1,
    'penalty_r': 1,
    'lr_mu': 0.025,
    'lr_sigma': 0.1,
    'n_total_features': data_loader.n_features,
    'n_selected_features': 10,
    'classes': data_loader.target_values,
    'baseline': "gaussian",
    'ref_sample': ref_sample
}

ofs_params = {
    'n_selected_features': 5,
    'n_total_features': data_loader.n_features,
    'baseline': "gaussian",
    'ref_sample': ref_sample    
}

### Without ADWIN

In [None]:
# no feature selection
print("EVALUATING: ARF")
accuracy, precision, recall, f1, auc, avg_processing_time, _ = run_prequential(
    classifier=AdaptiveRandomForestClassifier(
        **(arf_params | {'drift_detection_method': None, 'warning_detection_method': None})
    ), 
    stream=data_loader, 
    feature_selector=None, 
    drift_detection=None, 
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")

# FIRES
print("EVALUATING: ARF + FIRES")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, _ = run_prequential(
    classifier=AdaptiveRandomForestClassifier(
        **(arf_params | {'max_features': None, 'drift_detection_method': None, 'warning_detection_method': None})
    ), 
    stream=data_loader, 
    feature_selector=FIRES(**fires_params), 
    drift_detection=None, 
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")

# OFS
print("EVALUATING: ARF + OFS")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, _ = run_prequential(
    classifier=AdaptiveRandomForestClassifier(
        **(arf_params | {'max_features': None, 'drift_detection_method': None, 'warning_detection_method': None})
    ),  
    stream=data_loader, 
    feature_selector=OFS(**ofs_params), 
    drift_detection=None, 
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")

### With ADWIN

In [None]:
# no feature selection
print("EVALUATING: ARF + ADWIN")
accuracy, precision, recall, f1, auc, avg_processing_time, drift_idx_list = run_prequential(
    classifier=AdaptiveRandomForestClassifier(**arf_params), 
    stream=data_loader, 
    feature_selector=None, 
    drift_detection=ADWIN(), 
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")
print(drift_idx_list)

# FIRES
print("EVALUATING: ARF + ADWIN + FIRES")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, drift_idx_list = run_prequential(
    classifier=AdaptiveRandomForestClassifier(**arf_params), 
    stream=data_loader, 
    feature_selector=FIRES(**fires_params), 
    drift_detection=ADWIN(), 
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")
print(drift_idx_list)

# OFS
print("EVALUATING: ARF + ADWIN + OFS")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, drift_idx_list = run_prequential(
    classifier=AdaptiveRandomForestClassifier(**arf_params), 
    stream=data_loader, 
    feature_selector=OFS(**ofs_params), 
    drift_detection=ADWIN(), 
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")
print(drift_idx_list)