In [1]:
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.drift_detection.adwin import ADWIN

from float.feature_selection import FIRES           # online feature methods
from skmultiflow.data import FileStream             # create stream from file
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations

from prequential import run_prequential

In [2]:
data_loader = FileStream(filepath='../out.csv')

In [3]:
# base arf classifier
arf = AdaptiveRandomForestClassifier()

In [4]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()

fires_fs = FIRES(n_total_features=data_loader.n_features,
                n_selected_features=10,
                classes=data_loader.target_values,
                ref_sample=ref_sample,
                baseline='gaussian')

print(ref_sample)

[[5.4865e+04 3.0000e+00 2.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [5.5054e+04 1.0900e+02 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [5.5055e+04 5.2000e+01 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 ...
 [8.6900e+03 1.7000e+01 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [8.6940e+03 1.2300e+02 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [5.7683e+04 4.0000e+00 2.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]]


In [8]:
parameters = {
    'penalty_s': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1],    # default=0.01
    'penalty_r': [0.01, 0.1, 1],                # default=0.01
    'lr_mu': [0.01, 0.025, 0.1, 1],         # default=0.01
    'lr_sigma': [0.01, 0.025, 0.1, 1],      # default=0.01    
}

fires_grid = ParameterGrid(parameters)

In [None]:
best_params = None
best_score = 0
best_metrics = []

for params in fires_grid:
    print(f"Evaluating parameters: {params}")
    data_loader.restart()

    accuracy, precision, recall, f1 = run_prequential(
        classifier=arf,
        stream=data_loader,
        feature_selector = FIRES(
                **params,
                n_total_features=data_loader.n_features,
                n_selected_features=10,
                classes=data_loader.target_values,
                ref_sample=ref_sample
        ),
        n_pretrain=200
    )

    print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")

    if (accuracy + precision + recall + f1) / 4 > best_score:
        best_params = params
        best_score = (accuracy + precision + recall + f1) / 4
        best_metrics = [accuracy, precision, recall, f1]
        

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Metrics: {best_metrics}")

In [10]:
print(best_params)

{'baseline': 'gaussian', 'lr_mu': 0.01, 'lr_sigma': 0.01, 'penalty_r': 0.1, 'penalty_s': 0.0001}


In [11]:
print(best_metrics)

[0.99846, 0.9984742898999174, 0.9984395316250042, 0.9984567656765417]


In [14]:
print(len(fires_grid))

288
