In [6]:
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.drift_detection.adwin import ADWIN

from skmultiflow.data import FileStream             # create stream from file
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations

from prequential import run_prequential
import pandas as pd

In [7]:
data_loader = FileStream(filepath='../merged.csv')

In [8]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()

In [9]:
parameters = {
    'delta': [0.00001, 0.0001, 0.001, 0.01, 0.1, 0.5],
    'warning_detection_method': [None, 'ADWIN']
}

adwin_grid = ParameterGrid(parameters)

In [10]:
best_params = None
best_score = 0
best_metrics = []

results = []

for params in adwin_grid:
    print(f"Evaluating parameters: drift_detection=ADWIN({params['delta']}), warning_detection={params['warning_detection_method']}, delta={params['delta']}")
    data_loader.restart()

    # tracemalloc.start()
    warning_detector = ADWIN(params['delta']) if params['warning_detection_method'] == 'ADWIN' else None

    accuracy, precision, recall, f1, avg_processing_time = run_prequential(
        classifier=AdaptiveRandomForestClassifier(
            drift_detection_method=ADWIN(params['delta']),
            warning_detection_method=warning_detector
        ),
        stream=data_loader,
        # drift_detector=ADWIN(params['delta']),
        feature_selector = None,
        preq_samples=225000
    )

    # current_mem, peak_mem = tracemalloc.get_traced_memory()
    # tracemalloc.stop()

    print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}")
    print(f"Average processing time: {avg_processing_time}")
    # print(f"Current memory usage (KB): {current_mem / 1024}")
    # print(f"Peak memory usage (KB): {peak_mem / 1024}")

    avg_score = (accuracy + precision + recall + f1) / 4

    if avg_score > best_score:
        best_params = params
        best_score = avg_score
        best_metrics = [accuracy, precision, recall, f1]
    
    results.append({
        **params,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'avg_score': avg_score,
        'avg_processing_time': avg_processing_time
    })

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Metrics: {best_metrics}")

Accuracy: 0.998191, Precision: 0.998182, Recall: 0.998118, F1: 0.998150
Average processing time: 0.004428005983110998
Accuracy: 0.998182, Precision: 0.998154, Recall: 0.998127, F1: 0.998141
Average processing time: 0.004851558767999969
Accuracy: 0.998160, Precision: 0.998236, Recall: 0.998000, F1: 0.998118
Average processing time: 0.00441936693955573
Accuracy: 0.998036, Precision: 0.998081, Recall: 0.997900, F1: 0.997991
Average processing time: 0.004909408173777213
Accuracy: 0.998164, Precision: 0.998127, Recall: 0.998118, F1: 0.998123
Average processing time: 0.004489656885778548
Accuracy: 0.998324, Precision: 0.998318, Recall: 0.998255, F1: 0.998286
Average processing time: 0.004878239623111331
Accuracy: 0.998382, Precision: 0.998436, Recall: 0.998255, F1: 0.998345
Average processing time: 0.004429317245777222
Accuracy: 0.998493, Precision: 0.998600, Recall: 0.998318, F1: 0.998459
Average processing time: 0.004873985200887817
Accuracy: 0.998751, Precision: 0.998691, Recall: 0.998755

In [11]:
results = pd.DataFrame(results)
results.to_csv('adwin2.csv', index=False)

In [14]:
data_loader.restart()

# tracemalloc.start()

accuracy, precision, recall, f1, avg_processing_time = run_prequential(
    classifier=AdaptiveRandomForestClassifier(
        drift_detection_method=ADWIN(0.6),
        warning_detection_method=ADWIN(0.6)
    ),
    stream=data_loader,
    # drift_detector=ADWIN(params['delta']),
    feature_selector = None,
    preq_samples=225000
)

# current_mem, peak_mem = tracemalloc.get_traced_memory()
# tracemalloc.stop()

print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}")
print(f"Average processing time: {avg_processing_time}")

Accuracy: 0.999027, Precision: 0.998918, Recall: 0.999091, F1: 0.999005
Average processing time: 0.004998293041774741


In [7]:
data_loader.restart()

# tracemalloc.start()

accuracy, precision, recall, f1, avg_processing_time = run_prequential(
    classifier=AdaptiveRandomForestClassifier(
        drift_detection_method=None,
        warning_detection_method=None
    ),
    stream=data_loader,
    # drift_detector=ADWIN(params['delta']),
    feature_selector = None,
    preq_samples=300000
)

# current_mem, peak_mem = tracemalloc.get_traced_memory()
# tracemalloc.stop()

print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}")
print(f"Average processing time: {avg_processing_time}")

Accuracy: 0.858913, Precision: 0.853451, Recall: 0.866640, F1: 0.859995
Average processing time: 0.007894888422667102


In [8]:
data_loader.restart()

# tracemalloc.start()

accuracy, precision, recall, f1, avg_processing_time = run_prequential(
    classifier=AdaptiveRandomForestClassifier(
        drift_detection_method=ADWIN(0.1),
        warning_detection_method=ADWIN(0.1)
    ),
    stream=data_loader,
    # drift_detector=ADWIN(params['delta']),
    feature_selector = None,
    preq_samples=300000
)

# current_mem, peak_mem = tracemalloc.get_traced_memory()
# tracemalloc.stop()

print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}")
print(f"Average processing time: {avg_processing_time}")

Accuracy: 0.998707, Precision: 0.998773, Recall: 0.998640, F1: 0.998707
Average processing time: 0.004958556739668287
