In [None]:
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.drift_detection.adwin import ADWIN

from float.feature_selection import OFS           # online feature methods
from skmultiflow.data import FileStream             # create stream from file
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations

from prequential import run_prequential

import pandas as pd
import numpy as np

In [None]:
data_loader = FileStream(filepath='../merged_cesnet.csv')

In [None]:
# base arf classifier
arf = AdaptiveRandomForestClassifier()

In [None]:
parameters = {
    'n_selected_features': [5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 
}

ofs_grid = ParameterGrid(parameters)

In [None]:
best_params = None
best_score = 0
best_metrics = []

results = []

for params in ofs_grid:
    print(f"Evaluating parameters: {params}")
    data_loader.restart()
    ref_sample, _ = data_loader.next_sample(50)
    data_loader.reset()

    accuracy, precision, recall, f1, avg_processing_time = run_prequential(
        classifier=arf,
        stream=data_loader,
        feature_selector = OFS(
                **params,
                n_total_features=data_loader.n_features,
                ref_sample=ref_sample,
                baseline='gaussian'
        ),
        preq_samples=75000
    )

    print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}")
    print(f"Average processing time: {avg_processing_time}")

    avg_score = (accuracy + precision + recall + f1) / 4

    if avg_score > best_score:
        best_params = params
        best_score = avg_score
        best_metrics = [accuracy, precision, recall, f1]
    
    results.append({
        **params,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'avg_score': avg_score,
        'avg_processing_time': avg_processing_time
    })

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Metrics: {best_metrics}")

In [None]:
results = pd.DataFrame(results)
results.to_csv('ofs.csv', index=False)