In [27]:
from skmultiflow.meta import AdaptiveRandomForestClassifier

from skmultiflow.data import FileStream             # create stream from file
from sklearn.model_selection import ParameterGrid   # hyperparameter combinations

from prequential import run_prequential

import pandas as pd

In [28]:
data_loader = FileStream(filepath='../merged.csv')

In [29]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()


print(ref_sample)

[[4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 9.61118521e-02
  9.44903759e-02 8.92843113e-03]
 [4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 9.67718844e-02
  2.34557365e-02 5.50171572e-04]
 [4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 4.74462999e-01
  3.11841899e-01 9.72453702e-02]
 ...
 [4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 7.80520998e-01
  1.14503742e-02 1.31111070e-04]
 [4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 1.79963453e-01
  1.60146083e-01 2.56467678e-02]
 [4.43000000e+02 1.33350000e+04 1.00000000e+00 ... 5.24419081e-02
  1.01438357e-01 1.02897402e-02]]


In [30]:
parameters = {
    'n_neighbors': [5, 10],                                   # default = 5
    'max_window_size': [250, 500, 1000],                            # default = 1000
    'leaf_size': [5, 70, 100],                                    # default = 30
    'metric': ['euclidean']                               # default = 'euclidean' 
}

knn_grid = ParameterGrid(parameters)


In [31]:
best_params = None
best_score = 0
best_metrics = []

results = []

for params in knn_grid:
    print(f"Evaluating parameters: {params}")
    data_loader.restart()
    train_data, _ = data_loader.next_sample(200)
    test_data, _ = data_loader.next_sample(200)

    # Update the classifier with the current parameters
    knn = KNNClassifier(**params)

    accuracy, precision, recall, f1, avg_processing_time = run_prequential(
        classifier=knn,
        stream=data_loader,
        feature_selector=None,  # No feature selector
        n_pretrain=200
    )
    print(f"Accuracy: {accuracy:.8f}, Precision: {precision:.8f}, Recall: {recall:.8f}, F1: {f1:.8f}")
    print(f"Average processing time: {avg_processing_time}")
    
    avg_score = (accuracy + precision + recall + f1) / 4
    
    results.append({
        **params,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'avg_score': avg_score,
        'avg_processing_time': avg_processing_time
    })

    if (accuracy + precision + recall + f1) / 4 > best_score:
        best_params = params
        best_score = (accuracy + precision + recall + f1) / 4
        best_metrics = [accuracy, precision, recall, f1]

Evaluating parameters: {'leaf_size': 5, 'max_window_size': 250, 'metric': 'euclidean', 'n_neighbors': 5}
Accuracy: 0.98954000, Precision: 0.98867531, Recall: 0.98539604, F1: 0.98703295
Average processing time: 0.0015393870919883192
Evaluating parameters: {'leaf_size': 5, 'max_window_size': 250, 'metric': 'euclidean', 'n_neighbors': 10}
Accuracy: 0.98950000, Precision: 0.98969585, Recall: 0.98425743, F1: 0.98696915
Average processing time: 0.0014842931459957617
Evaluating parameters: {'leaf_size': 5, 'max_window_size': 500, 'metric': 'euclidean', 'n_neighbors': 5}
Accuracy: 0.97978000, Precision: 0.97812329, Recall: 0.97168317, F1: 0.97489259
Average processing time: 0.002814194650020363
Evaluating parameters: {'leaf_size': 5, 'max_window_size': 500, 'metric': 'euclidean', 'n_neighbors': 10}
Accuracy: 0.97966000, Precision: 0.97983891, Recall: 0.96960396, F1: 0.97469457
Average processing time: 0.003216634084004909
Evaluating parameters: {'leaf_size': 5, 'max_window_size': 1000, 'metric

In [32]:
# Save results to CSV
df = pd.DataFrame(results)
df.to_csv("knn-tuning-2.csv", index=False)

print("Grid Search Completed.")
print(f"Best Parameters: {best_params}")
print(f"Best Metrics: {best_metrics}")
print("Results saved to knn-tuning-2.csv")


Grid Search Completed.
Best Parameters: {'leaf_size': 5, 'max_window_size': 250, 'metric': 'euclidean', 'n_neighbors': 5}
Best Metrics: [0.98954, 0.9886753091938608, 0.9853960396039604, 0.9870329506855429]
Results saved to knn-tuning-2.csv
