## ARF Evaluations

In [1]:
from prequential import run_prequential

from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.drift_detection.adwin import ADWIN

from float.feature_selection import FIRES, OFS           # online feature methods
from skmultiflow.data import FileStream                 # create stream from file

import pandas as pd

In [2]:
data_loader = FileStream(filepath='../merged_cesnet.csv')

In [3]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()             

In [4]:
arf_params = {
    'n_estimators': 6,
    'max_features': "auto",
    'drift_detection_method': None,
    'warning_detection_method': None,
    'grace_period': 25,
    'split_criterion': "gini",
    'split_confidence': 0.01,
    'tie_threshold': 0.01,
    'leaf_prediction': "nba"
}

fires_params = {
    'penalty_s': 0.1,
    'penalty_r': 1,
    'lr_mu': 0.025,
    'lr_sigma': 0.1,
    'n_total_features': data_loader.n_features,
    'n_selected_features': 10,
    'classes': data_loader.target_values,
    'baseline': "gaussian",
    'ref_sample': ref_sample
}

ofs_params = {
    'n_selected_features': 5,
    'n_total_features': data_loader.n_features,
    'baseline': "gaussian",
    'ref_sample': ref_sample    
}

### Without ADWIN

In [None]:
# # no feature selection
# print("EVALUATING: ARF")
# accuracy, precision, recall, f1, auc, avg_processing_time, _ = run_prequential(
#     classifier=AdaptiveRandomForestClassifier(**arf_params), 
#     stream=data_loader, 
#     feature_selector=None, 
#     drift_detection=None, 
#     preq_samples=20000
# )
# print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
# print(f"Average processing time: {avg_processing_time}")

In [5]:
# no feature selection
print("EVALUATING: ARF")
accuracy, precision, recall, f1, auc, avg_processing_time, _ = run_prequential(
    classifier=AdaptiveRandomForestClassifier(**arf_params), 
    stream=data_loader, 
    feature_selector=None, 
    drift_detection=None, 
    preq_samples=20000
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")

new_arf_params = arf_params.copy()
new_arf_params.update({'max_features': None})

# FIRES
print("EVALUATING: ARF + FIRES")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, _ = run_prequential(
    classifier=AdaptiveRandomForestClassifier(**new_arf_params),
    stream=data_loader, 
    feature_selector=FIRES(**fires_params), 
    drift_detection=None, 
    preq_samples=20000
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")


# OFS
print("EVALUATING: ARF + OFS")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, _ = run_prequential(
    classifier=AdaptiveRandomForestClassifier(**new_arf_params), 
    stream=data_loader, 
    feature_selector=OFS(**ofs_params), 
    drift_detection=None, 
    preq_samples=20000
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")

EVALUATING: ARF
Accuracy: 0.999050, Precision: 0.999100, Recall: 0.999000, F1: 0.999050, AUC: 0.999480
Average processing time: 0.0033508421700000133
EVALUATING: ARF + FIRES


  self._mu += self._lr_mu * np.mean(nabla_mu / marginal, axis=1)
  self._sigma += self._lr_sigma * np.mean(nabla_sigma / marginal, axis=1)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max 

Accuracy: 0.997650, Precision: 0.999298, Recall: 0.996000, F1: 0.997646, AUC: 0.998549
Average processing time: 0.01990865944999998
EVALUATING: ARF + OFS




Accuracy: 0.998450, Precision: 0.998899, Recall: 0.998000, F1: 0.998449, AUC: 0.999371
Average processing time: 0.019304809370000037


### With ADWIN

In [5]:
new_arf_params = arf_params.copy()
new_arf_params.update({'drift_detection_method': ADWIN(0.9), 'warning_detection_method': ADWIN(0.9)})

# no feature selection
print("EVALUATING: ARF + ADWIN")
accuracy, precision, recall, f1, auc, avg_processing_time, drift_idx_list = run_prequential(
    classifier=AdaptiveRandomForestClassifier(**new_arf_params), 
    stream=data_loader, 
    feature_selector=None, 
    drift_detection=ADWIN(), 
    preq_samples=20000
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")
print(drift_idx_list)

new_arf_params = arf_params.copy()
new_arf_params.update({'max_features': None, 'drift_detection_method': ADWIN(0.9), 'warning_detection_method': ADWIN(0.9)})

# FIRES
print("EVALUATING: ARF + ADWIN + FIRES")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, drift_idx_list = run_prequential(
    classifier=AdaptiveRandomForestClassifier(**new_arf_params),
    stream=data_loader, 
    feature_selector=FIRES(**fires_params), 
    drift_detection=ADWIN(), 
    preq_samples=20000
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")
print(drift_idx_list)

# OFS
print("EVALUATING: ARF + ADWIN + OFS")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, drift_idx_list = run_prequential(
    classifier=AdaptiveRandomForestClassifier(**new_arf_params),
    stream=data_loader, 
    feature_selector=OFS(**ofs_params), 
    drift_detection=ADWIN(), 
    preq_samples=20000
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")
print(drift_idx_list)

EVALUATING: ARF + ADWIN
Accuracy: 0.999500, Precision: 0.999600, Recall: 0.999400, F1: 0.999500, AUC: 0.999791
Average processing time: 0.0035860976350000195
[]
EVALUATING: ARF + ADWIN + FIRES


  self._mu += self._lr_mu * np.mean(nabla_mu / marginal, axis=1)
  self._sigma += self._lr_sigma * np.mean(nabla_sigma / marginal, axis=1)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max 

Accuracy: 0.998550, Precision: 0.999199, Recall: 0.997900, F1: 0.998549, AUC: 0.999296
Average processing time: 0.020878821550000173
[]
EVALUATING: ARF + ADWIN + OFS




Accuracy: 0.998700, Precision: 0.998999, Recall: 0.998400, F1: 0.998700, AUC: 0.999253
Average processing time: 0.02152077224999913
[]
