In [14]:
from skmultiflow.meta import AdaptiveRandomForestClassifier

from skmultiflow.data import FileStream             # create stream from file
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations

from prequential import run_prequential

import pandas as pd

In [15]:
data_loader = FileStream(filepath='../merged.csv')

In [16]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()


print(ref_sample)

[[4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 9.61118521e-02
  9.44903759e-02 8.92843113e-03]
 [4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 9.67718844e-02
  2.34557365e-02 5.50171572e-04]
 [4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 4.74462999e-01
  3.11841899e-01 9.72453702e-02]
 ...
 [4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 7.80520998e-01
  1.14503742e-02 1.31111070e-04]
 [4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 1.79963453e-01
  1.60146083e-01 2.56467678e-02]
 [4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 5.24419081e-02
  1.01438357e-01 1.02897402e-02]]


In [17]:
parameters = {
    'n_estimators': [6, 9],             # default=10
    'max_features': ['None'],                  # default=auto
    'drift_detection_method': ['ADWIN(0.001)'],   
    'grace_period': [25, 50],    # default=50
    'split_criterion': ['info_gain', 'gini'],  # default=info_gain
    'split_confidence': [0.05, 0.1],     # default=0.01
    'tie_threshold': [0.05, 0.2],        # default=0.05
    'leaf_prediction': ['mc', 'nba'],    # default=nba
}

arf_grid = ParameterGrid(parameters)

In [18]:
best_params = None
best_score = 0
best_metrics = []

results = []

for params in arf_grid:
    print(f"Evaluating parameters: {params}")
    data_loader.restart()
    train_data, _ = data_loader.next_sample(200)
    test_data, _ = data_loader.next_sample(200)

    # Update the classifier with the current parameters
    arf = AdaptiveRandomForestClassifier(**params)

    accuracy, precision, recall, f1, avg_processing_time = run_prequential(
        classifier=arf,
        stream=data_loader,
        feature_selector=None,  # No feature selector
        n_pretrain=200
    )
    print(f"Accuracy: {accuracy:.8f}, Precision: {precision:.8f}, Recall: {recall:.8f}, F1: {f1:.8f}")
    print(f"Average processing time: {avg_processing_time}")
    
    avg_score = (accuracy + precision + recall + f1) / 4
    
    results.append({
        **params,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'avg_score': avg_score,
        'avg_processing_time': avg_processing_time
    })

    if (accuracy + precision + recall + f1) / 4 > best_score:
        best_params = params
        best_score = (accuracy + precision + recall + f1) / 4
        best_metrics = [accuracy, precision, recall, f1]
        

Evaluating parameters: {'drift_detection_method': 'ADWIN(0.001)', 'grace_period': 25, 'leaf_prediction': 'mc', 'max_features': 'None', 'n_estimators': 6, 'split_confidence': 0.05, 'split_criterion': 'info_gain', 'tie_threshold': 0.05}
Accuracy: 0.99098000, Precision: 0.98803934, Recall: 0.98965347, F1: 0.98884574
Average processing time: 0.0013176736440010427
Evaluating parameters: {'drift_detection_method': 'ADWIN(0.001)', 'grace_period': 25, 'leaf_prediction': 'mc', 'max_features': 'None', 'n_estimators': 6, 'split_confidence': 0.05, 'split_criterion': 'info_gain', 'tie_threshold': 0.2}
Accuracy: 0.99284000, Precision: 0.99036180, Recall: 0.99193069, F1: 0.99114563
Average processing time: 0.0010916348359853146
Evaluating parameters: {'drift_detection_method': 'ADWIN(0.001)', 'grace_period': 25, 'leaf_prediction': 'mc', 'max_features': 'None', 'n_estimators': 6, 'split_confidence': 0.05, 'split_criterion': 'gini', 'tie_threshold': 0.05}
Accuracy: 0.99540000, Precision: 0.99420907, Re

In [19]:
# Save results to CSV
df = pd.DataFrame(results)
df.to_csv("arf-tuning-2.csv", index=False)

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Metrics: {best_metrics}")
print("Results saved to arf-tuning-2.csv")


Grid Search Completed.
Best Parameters: {'drift_detection_method': 'ADWIN(0.001)', 'grace_period': 50, 'leaf_prediction': 'nba', 'max_features': 'None', 'n_estimators': 9, 'split_confidence': 0.05, 'split_criterion': 'gini', 'tie_threshold': 0.2}
Best Metrics: [0.9977, 0.9973750680996484, 0.9969306930693069, 0.99715283107623]
Results saved to arf-tuning-2.csv
