In [4]:
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.drift_detection.adwin import ADWIN

from float.feature_selection import OFS           # online feature methods
from skmultiflow.data import FileStream             # create stream from file
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations

from prequential import run_prequential

In [6]:
data_loader = FileStream(filepath='../out.csv')

In [7]:
# base arf classifier
arf = AdaptiveRandomForestClassifier()

In [16]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()

ofs_fs = OFS(n_total_features=data_loader.n_features,
                n_selected_features=7,
                ref_sample=ref_sample,
                baseline='gaussian')

print(ref_sample)

[[5.4865e+04 3.0000e+00 2.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [5.5054e+04 1.0900e+02 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [5.5055e+04 5.2000e+01 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 ...
 [8.6900e+03 1.7000e+01 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [8.6940e+03 1.2300e+02 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [5.7683e+04 4.0000e+00 2.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]]


In [12]:
parameters = {
    'n_selected_features': [5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 
}

fires_grid = ParameterGrid(parameters)

In [18]:
best_params = None
best_score = 0
best_metrics = []

for params in fires_grid:
    print(f"Evaluating parameters: {params}")
    data_loader.restart()

    accuracy, precision, recall, f1 = run_prequential(
        classifier=arf,
        stream=data_loader,
        feature_selector = OFS(
                **params,
                n_total_features=data_loader.n_features,
                ref_sample=ref_sample,
                baseline='gaussian'
        ),
        n_pretrain=200
    )

    print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")

    if (accuracy + precision + recall + f1) / 4 > best_score:
        best_params = params
        best_score = (accuracy + precision + recall + f1) / 4
        best_metrics = [accuracy, precision, recall, f1]
        

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Metrics: {best_metrics}")

Evaluating parameters: {'n_selected_features': 5}




Accuracy: 0.9982, Precision: 0.9980, Recall: 0.9982, F1: 0.9981
Evaluating parameters: {'n_selected_features': 10}




Accuracy: 0.9985, Precision: 0.9982, Recall: 0.9988, F1: 0.9985
Evaluating parameters: {'n_selected_features': 15}




Accuracy: 0.9983, Precision: 0.9985, Recall: 0.9980, F1: 0.9983
Evaluating parameters: {'n_selected_features': 20}




Accuracy: 0.9988, Precision: 0.9990, Recall: 0.9986, F1: 0.9988
Evaluating parameters: {'n_selected_features': 25}




Accuracy: 0.9989, Precision: 0.9992, Recall: 0.9986, F1: 0.9989
Evaluating parameters: {'n_selected_features': 30}




Accuracy: 0.9990, Precision: 0.9999, Recall: 0.9979, F1: 0.9989
Evaluating parameters: {'n_selected_features': 40}


  self.mdbl_width += self.width


Accuracy: 0.9993, Precision: 0.9999, Recall: 0.9987, F1: 0.9993
Evaluating parameters: {'n_selected_features': 50}


  incremental_variance = node.get_variance(0) + n1 * self._width * (u1 - self._total / self._width) * \


Accuracy: 0.9994, Precision: 0.9999, Recall: 0.9987, F1: 0.9993
Evaluating parameters: {'n_selected_features': 60}




Accuracy: 0.9997, Precision: 1.0000, Recall: 0.9994, F1: 0.9997
Evaluating parameters: {'n_selected_features': 70}


  incremental_variance = node.get_variance(0) + n1 * self._width * (u1 - self._total / self._width) * \


Accuracy: 0.9998, Precision: 1.0000, Recall: 0.9995, F1: 0.9998
Grid Search Completed.
Best Parameters: {'n_selected_features': 70}
Best Metrics: [0.99978, 1.0, 0.9995390739576786, 0.9997694838533917]


In [19]:
print(best_params)
print(best_metrics)
print(len(fires_grid))

{'n_selected_features': 70}
[0.99978, 1.0, 0.9995390739576786, 0.9997694838533917]
10
