In [1]:
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.drift_detection.adwin import ADWIN

from skmultiflow.data import FileStream             # create stream from file
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations

from prequential import run_prequential
import pandas as pd

In [2]:
data_loader = FileStream(filepath='../merged_cesnet.csv')

In [3]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()

In [4]:
parameters = {
    'drift_delta': [0.001, 0.01, 0.1, 0.5, 0.7, 0.9],
    'warning_delta': [0.00001, 0.0001, 0.001, 0.01, 0.1, 0.5]
}

adwin_grid = ParameterGrid(parameters)

In [None]:
best_params = None
best_score = 0
best_metrics = []

results = []

for params in adwin_grid:
    print(f"Evaluating parameters: drift_detection=ADWIN({params['drift_delta']}), warning_detection=ADWIN({params['warning_delta']})")
    data_loader.restart()

    accuracy, precision, recall, f1, auc, avg_processing_time, drift_idx = run_prequential(
        classifier=AdaptiveRandomForestClassifier(
            drift_detection_method=ADWIN(params['drift_delta']),
            warning_detection_method=ADWIN(params['warning_delta'])
        ),
        stream=data_loader,
        feature_selector = None,
        preq_samples=75000
    )

    # current_mem, peak_mem = tracemalloc.get_traced_memory()
    # tracemalloc.stop()

    print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}")
    print(f"Average processing time: {avg_processing_time}")
    print(f"drift list: {drift_idx}")

    avg_score = (accuracy + precision + recall + f1) / 4

    if avg_score > best_score:
        best_params = params
        best_score = avg_score
        best_metrics = [accuracy, precision, recall, f1]
    
    results.append({
        **params,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'avg_score': avg_score,
        'avg_processing_time': avg_processing_time
    })

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Metrics: {best_metrics}")

0
DRIFT detected: 3
1
DRIFT detected: 3
2
DRIFT detected: 3
3
DRIFT detected: 4
4
DRIFT detected: 3
5
DRIFT detected: 2
6
DRIFT detected: 2
7
DRIFT detected: 2
8
DRIFT detected: 2
9
DRIFT detected: 4
Accuracy: 0.998453, Precision: 0.998518, Recall: 0.998177, F1: 0.998347
Average processing time: 0.0062088566346666825
drift list: [320, 322, 961, 1027, 1538, 20169, 30169, 30185, 30537, 30702, 30706, 31527, 39327, 40132, 40178, 40356, 40676, 41380, 47616, 57135, 60355, 60721, 60760, 70309, 70539, 70589, 75033]

0
DRIFT detected: 1
1
DRIFT detected: 1
2
DRIFT detected: 0
3
DRIFT detected: 0
4
DRIFT detected: 1
5
DRIFT detected: 1
6
DRIFT detected: 1
7
DRIFT detected: 0
8
DRIFT detected: 0
9
DRIFT detected: 1
Accuracy: 0.998907, Precision: 0.997804, Recall: 0.999500, F1: 0.998651
Average processing time: 0.005978715178498948
drift list: [320, 321, 21982]


In [None]:
results = pd.DataFrame(results)
results.to_csv('adwin2.csv', index=False)