In [1]:
from skmultiflow.meta import AdaptiveRandomForestClassifier

from skmultiflow.data import FileStream             # create stream from file
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations

from prequential import run_prequential

In [2]:
data_loader = FileStream(filepath='../out.csv')

In [3]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()


print(ref_sample)

[[5.4865e+04 3.0000e+00 2.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [5.5054e+04 1.0900e+02 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [5.5055e+04 5.2000e+01 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 ...
 [8.6900e+03 1.7000e+01 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [8.6940e+03 1.2300e+02 1.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [5.7683e+04 4.0000e+00 2.0000e+00 ... 0.0000e+00 0.0000e+00 0.0000e+00]]


In [None]:
parameters = {
    'n_estimators': [3, 6, 9],             # default=10
    'max_features': ['None'],                  # default=auto
    'drift_detection_method': ['ADWIN(0.001)'],   
    'grace_period': [10, 25, 50, 100, 200],    # default=50
    'split_criterion': ['info_gain', 'gini'],  # default=info_gain
    'split_confidence': [0.01, 0.05, 0.1, 0.2, 0.25, 0.001],     # default=0.01
    'tie_threshold': [0.01, 0.05, 0.1, 0.2, 0.001],        # default=0.05
    'leaf_prediction': ['mc', 'nb', 'nba'],    # default=nba
}

arf_grid = ParameterGrid(parameters)

In [None]:
best_params = None
best_score = 0
best_metrics = []

results = []

for params in arf_grid:
    print(f"Evaluating parameters: {params}")
    data_loader.restart()
    train_data, _ = data_loader.next_sample(200)
    test_data, _ = data_loader.next_sample(200)

    # Update the classifier with the current parameters
    arf = AdaptiveRandomForestClassifier(**params)

    accuracy, precision, recall, f1, avg_processing_time = run_prequential(
        classifier=arf,
        stream=data_loader,
        feature_selector=None,  # No feature selector
        n_pretrain=200
    )
    print(f"Accuracy: {accuracy:.8f}, Precision: {precision:.8f}, Recall: {recall:.8f}, F1: {f1:.8f}")
    print(f"Average processing time: {avg_processing_time}")
    
    avg_score = (accuracy + precision + recall + f1) / 4
    
    results.append({
        **params,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'avg_score': avg_score,
        'avg_processing_time': avg_processing_time
    })

    if (accuracy + precision + recall + f1) / 4 > best_score:
        best_params = params
        best_score = (accuracy + precision + recall + f1) / 4
        best_metrics = [accuracy, precision, recall, f1]

Accuracy: 0.99770000, Precision: 0.99740303, Recall: 0.99777917, F1: 0.99759107
Accuracy: 0.99832000, Precision: 0.99857436, Recall: 0.99790488, F1: 0.99823951
Accuracy: 0.99858000, Precision: 0.99890967, Recall: 0.99811439, F1: 0.99851187
Accuracy: 0.99864000, Precision: 0.99895165, Recall: 0.99819820, F1: 0.99857478
Accuracy: 0.99596000, Precision: 0.99618369, Recall: 0.99534884, F1: 0.99576609
Accuracy: 0.99708000, Precision: 0.99706610, Recall: 0.99681542, F1: 0.99694074
Accuracy: 0.99774000, Precision: 0.99777852, Recall: 0.99748586, F1: 0.99763217
Accuracy: 0.99796000, Precision: 0.99828056, Recall: 0.99744396, F1: 0.99786208
Accuracy: 0.99872000, Precision: 0.99903552, Recall: 0.99828200, F1: 0.99865862
Accuracy: 0.99860000, Precision: 0.99899342, Recall: 0.99807249, F1: 0.99853274
Accuracy: 0.99670000, Precision: 0.99623116, Recall: 0.99685732, F1: 0.99654414
Accuracy: 0.99698000, Precision: 0.99669068, Recall: 0.99698303, F1: 0.99683684
Accuracy: 0.99828000, Precision: 0.99853

KeyboardInterrupt: 

In [None]:
# Save results to CSV
df = pd.DataFrame(results)
df.to_csv("arf-tuning.csv", index=False)

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Metrics: {best_metrics}")
print("Results saved to arf-tuning.csv")