In [3]:
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.drift_detection.adwin import ADWIN

from float.feature_selection import FIRES           # online feature methods
from skmultiflow.data import FileStream             # create stream from file
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations

from prequential import run_prequential

import pandas as pd

In [4]:
data_loader = FileStream(filepath='../merged.csv')

In [5]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()

In [None]:
parameters = {
    'penalty_s': [0.00001, 0.0001, 0.001, 0.01, 0.1],    # default=0.01
    'penalty_r': [0.01, 0.1, 1],                # default=0.01
    'lr_mu': [1],         # default=0.01
    'lr_sigma': [0.1],      # default=0.01    
}

fires_grid = ParameterGrid(parameters)

In [7]:
# warm up run before memory measurement

run_prequential(
    classifier=AdaptiveRandomForestClassifier(),
    stream=data_loader,
    feature_selector = FIRES(
        n_total_features=data_loader.n_features,
        n_selected_features=10,
        classes=data_loader.target_values,
        baseline='gaussian',
        ref_sample=ref_sample
    ),
    preq_samples=10
)

(1.0, 0.0, 0.0, 0.0, 0.00858628999976645)

In [None]:
best_params = None
best_score = 0
best_metrics = []

results = []

for params in fires_grid:
    print(f"Evaluating parameters: {params}")
    data_loader.restart()

    # tracemalloc.start()

    accuracy, precision, recall, f1, avg_processing_time = run_prequential(
        classifier=AdaptiveRandomForestClassifier(),
        stream=data_loader,
        feature_selector = FIRES(
                **params,
                n_total_features=data_loader.n_features,
                n_selected_features=10,
                classes=data_loader.target_values,
                baseline='gaussian',
                ref_sample=ref_sample
        ),
        preq_samples=50000
    )

    # current_mem, peak_mem = tracemalloc.get_traced_memory()
    # tracemalloc.stop()

    print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}")
    print(f"Average processing time: {avg_processing_time}")
    # print(f"Current memory usage (KB): {current_mem / 1024}")
    # print(f"Peak memory usage (KB): {peak_mem / 1024}")

    avg_score = (accuracy + precision + recall + f1) / 4

    if avg_score > best_score:
        best_params = params
        best_score = avg_score
        best_metrics = [accuracy, precision, recall, f1]
    
    results.append({
        **params,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'avg_score': avg_score,
        'avg_processing_time': avg_processing_time
    })

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Metrics: {best_metrics}")

Evaluating parameters: {'lr_mu': 1, 'lr_sigma': 0.01, 'penalty_r': 0.01, 'penalty_s': 1e-05}
Accuracy: 0.998670, Precision: 0.998820, Recall: 0.998520, F1: 0.998670
Average processing time: 0.0075901929559990455
Evaluating parameters: {'lr_mu': 1, 'lr_sigma': 0.01, 'penalty_r': 0.01, 'penalty_s': 0.0001}
Accuracy: 0.998630, Precision: 0.998820, Recall: 0.998440, F1: 0.998630
Average processing time: 0.0075471270030049165
Evaluating parameters: {'lr_mu': 1, 'lr_sigma': 0.01, 'penalty_r': 0.01, 'penalty_s': 0.001}
Accuracy: 0.998770, Precision: 0.998780, Recall: 0.998760, F1: 0.998770
Average processing time: 0.007136718330997883
Evaluating parameters: {'lr_mu': 1, 'lr_sigma': 0.01, 'penalty_r': 0.01, 'penalty_s': 0.01}
Accuracy: 0.998910, Precision: 0.999120, Recall: 0.998700, F1: 0.998910
Average processing time: 0.0070604685289998815
Evaluating parameters: {'lr_mu': 1, 'lr_sigma': 0.01, 'penalty_r': 0.01, 'penalty_s': 0.1}
Accuracy: 0.998640, Precision: 0.998760, Recall: 0.998520, F1:

In [None]:
results_df = pd.DataFrame(results)
results_df.to_csv("fires-tuning.csv", index=False)