In [1]:
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.drift_detection.adwin import ADWIN

from skmultiflow.data import FileStream             # create stream from file
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations

from prequential import run_prequential

In [2]:
data_loader = FileStream(filepath='../out.csv')

In [3]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()

In [4]:
parameters = {
    # 'delta': [0.001, 0.01, 0.1],
    'drift_detection_method': [None, ADWIN()]
}

adwin_grid = ParameterGrid(parameters)

In [5]:
best_params = None
best_score = 0
best_metrics = []

results = []

for params in adwin_grid:
    print(f"Evaluating parameters: {params}")
    data_loader.restart()

    # tracemalloc.start()

    accuracy, precision, recall, f1, avg_processing_time = run_prequential(
        classifier=AdaptiveRandomForestClassifier(
            drift_detection_method=params['drift_detection_method'],
            warning_detection_method=None
        ),
        stream=data_loader,
        # drift_detector=ADWIN(params['delta']),
        feature_selector = None,
        preq_samples=100000
    )

    # current_mem, peak_mem = tracemalloc.get_traced_memory()
    # tracemalloc.stop()

    print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}")
    print(f"Average processing time: {avg_processing_time}")
    # print(f"Current memory usage (KB): {current_mem / 1024}")
    # print(f"Peak memory usage (KB): {peak_mem / 1024}")

    avg_score = (accuracy + precision + recall + f1) / 4

    if avg_score > best_score:
        best_params = params
        best_score = avg_score
        best_metrics = [accuracy, precision, recall, f1]
    
    # results.append({
    #     **params,
    #     'accuracy': accuracy,
    #     'precision': precision,
    #     'recall': recall,
    #     'f1': f1,
    #     'avg_score': avg_score,
    #     'avg_processing_time': avg_processing_time
    # })

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Metrics: {best_metrics}")

Evaluating parameters: {'drift_detection_method': None}
Accuracy: 0.999510, Precision: 0.999788, Recall: 0.999412, F1: 0.999600
Average processing time: 0.004015435868
Evaluating parameters: {'drift_detection_method': ADWIN(delta=0.002)}
Accuracy: 0.999490, Precision: 0.999820, Recall: 0.999347, F1: 0.999583
Average processing time: 0.005582031401000078
Grid Search Completed.
Best Parameters: {'drift_detection_method': None}
Best Metrics: [0.99951, 0.9997875330958063, 0.9994118511983532, 0.9995996568487274]
