# Hyperparameter Tuning

In [1]:
# classifiers
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.lazy import KNNClassifier, KNNADWINClassifier

from float.feature_selection import OFS, FIRES      # online feature methods
from skmultiflow.data import FileStream             # create stream from file
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations
import numpy as np
import time

In [2]:
data_loader = FileStream(filepath='out.csv')

## Classifiers

### ARF

In [3]:
# Define the parameter grid for ARF
arf_param_grid = {
    'n_estimators': [5, 10, 20, 30],    # default=10
    'grace_period': [50, 100],          # default=50
    'split_confidence': [0.01, 0.05],   # default=0.01
    'leaf_prediction': ['mc', 'nba']    # default='nba'
}

# Generate hyperparameter combinations
arf_grid = ParameterGrid(arf_param_grid)

In [4]:
def evaluate_model(params, stream):
    # Initialize the ARF model with the given parameters
    model = AdaptiveRandomForestClassifier(**params)
    
    # Pretrain the model
    X_pretrain, y_pretrain = stream.next_sample(200)
    model.partial_fit(X_pretrain, y_pretrain, classes=stream.target_values)
    
    # Stream evaluation
    n_samples = 0
    correct_predictions = 0

    true_labels = []
    pred_labels = []
    
    while n_samples < 50000 and stream.has_more_samples():
        # if n_samples % 1000 == 0: print(f"{n_samples} reached!")
        X, y = stream.next_sample()
        start = time.perf_counter()
        y_pred = model.predict(X)
        
        true_labels.append(y[0])
        pred_labels.append(y_pred[0])

        # Update metrics
        if y_pred == y:
            correct_predictions += 1
        
        # Train incrementally
        model.partial_fit(X, [y[0]])
        n_samples += 1

        end = time.perf_counter()
    
    # Calculate accuracy
    accuracy = correct_predictions / n_samples
    precision = precision_score(true_labels, pred_labels, average='macro', zero_division=0)
    recall = recall_score(true_labels, pred_labels, average='macro', zero_division=0)
    f1 = f1_score(true_labels, pred_labels, average='macro', zero_division=0)
    processing_time = end - start
    return accuracy, precision, recall, f1, processing_time

In [5]:
best_params = None
best_score = 0

for params in arf_grid:
    print(f"Evaluating parameters: {params}")
    data_loader.restart()

    accuracy, precision, recall, f1, processing_time = evaluate_model(params, data_loader)
    print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")
    print(f"Time per instance: {processing_time:.8f}")

    if accuracy > best_score:
        best_params = params
        best_score = accuracy

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Accuracy: {best_score:.4f}")

Evaluating parameters: {'grace_period': 50, 'leaf_prediction': 'mc', 'n_estimators': 5, 'split_confidence': 0.01}
Accuracy: 0.9970, Precision: 0.9970, Recall: 0.9970, F1: 0.9970
Time per instance: 0.00103050
Evaluating parameters: {'grace_period': 50, 'leaf_prediction': 'mc', 'n_estimators': 5, 'split_confidence': 0.05}
Accuracy: 0.9972, Precision: 0.9972, Recall: 0.9972, F1: 0.9972
Time per instance: 0.00117830
Evaluating parameters: {'grace_period': 50, 'leaf_prediction': 'mc', 'n_estimators': 10, 'split_confidence': 0.01}
Accuracy: 0.9969, Precision: 0.9969, Recall: 0.9969, F1: 0.9969
Time per instance: 0.00197520
Evaluating parameters: {'grace_period': 50, 'leaf_prediction': 'mc', 'n_estimators': 10, 'split_confidence': 0.05}
Accuracy: 0.9970, Precision: 0.9970, Recall: 0.9970, F1: 0.9970
Time per instance: 0.00217810
Evaluating parameters: {'grace_period': 50, 'leaf_prediction': 'mc', 'n_estimators': 20, 'split_confidence': 0.01}
Accuracy: 0.9970, Precision: 0.9969, Recall: 0.9970

## Dynamic Feature Selection