In [1]:
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.drift_detection.adwin import ADWIN

from float.feature_selection import OFS           # online feature methods
from skmultiflow.data import FileStream             # create stream from file
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations

from prequential import run_prequential

import pandas as pd
import numpy as np

In [2]:
data_loader = FileStream(filepath='../merged.csv')

In [3]:
# base arf classifier
arf = AdaptiveRandomForestClassifier()

In [4]:
parameters = {
    'n_selected_features': [5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 
}

ofs_grid = ParameterGrid(parameters)

In [5]:
best_params = None
best_score = 0
best_metrics = []

results = []

for params in ofs_grid:
    print(f"Evaluating parameters: {params}")
    data_loader.restart()
    ref_sample, _ = data_loader.next_sample(50)
    data_loader.reset()

    accuracy, precision, recall, f1, avg_processing_time = run_prequential(
        classifier=arf,
        stream=data_loader,
        feature_selector = OFS(
                **params,
                n_total_features=data_loader.n_features,
                ref_sample=ref_sample,
                baseline='gaussian'
        ),
        preq_samples=100000
    )

    print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}")
    print(f"Average processing time: {avg_processing_time}")

    avg_score = (accuracy + precision + recall + f1) / 4

    if avg_score > best_score:
        best_params = params
        best_score = avg_score
        best_metrics = [accuracy, precision, recall, f1]
    
    results.append({
        **params,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'avg_score': avg_score,
        'avg_processing_time': avg_processing_time
    })

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Metrics: {best_metrics}")

Evaluating parameters: {'n_selected_features': 5}




Accuracy: 0.998870, Precision: 0.999179, Recall: 0.998560, F1: 0.998870
Average processing time: 0.006568743401000002
Evaluating parameters: {'n_selected_features': 10}




Accuracy: 0.998560, Precision: 0.998660, Recall: 0.998460, F1: 0.998560
Average processing time: 0.00666841868599971
Evaluating parameters: {'n_selected_features': 15}




Accuracy: 0.998590, Precision: 0.998680, Recall: 0.998500, F1: 0.998590
Average processing time: 0.006730426233999974
Evaluating parameters: {'n_selected_features': 20}




Accuracy: 0.998680, Precision: 0.998740, Recall: 0.998620, F1: 0.998680
Average processing time: 0.006619215756001508
Evaluating parameters: {'n_selected_features': 25}




Accuracy: 0.998660, Precision: 0.998959, Recall: 0.998360, F1: 0.998660
Average processing time: 0.006703484196999825
Evaluating parameters: {'n_selected_features': 30}




Accuracy: 0.998480, Precision: 0.998919, Recall: 0.998040, F1: 0.998479
Average processing time: 0.006751294878000267
Evaluating parameters: {'n_selected_features': 40}




Accuracy: 0.998500, Precision: 0.998699, Recall: 0.998300, F1: 0.998500
Average processing time: 0.006642826368999149
Evaluating parameters: {'n_selected_features': 50}




Accuracy: 0.998280, Precision: 0.998559, Recall: 0.998000, F1: 0.998280
Average processing time: 0.00671149655299977
Evaluating parameters: {'n_selected_features': 60}




Accuracy: 0.998420, Precision: 0.998540, Recall: 0.998300, F1: 0.998420
Average processing time: 0.006570774163001624
Evaluating parameters: {'n_selected_features': 70}




Accuracy: 0.998340, Precision: 0.998420, Recall: 0.998260, F1: 0.998340
Average processing time: 0.006577888462001028
Grid Search Completed.
Best Parameters: {'n_selected_features': 5}
Best Metrics: [0.99887, 0.9991794912845965, 0.99856, 0.9988696495913733]


In [6]:
results = pd.DataFrame(results)
results.to_csv('ofs.csv', index=False)