## KNN Evaluations

In [1]:
from prequential import run_prequential

from skmultiflow.lazy import KNNClassifier, KNNADWINClassifier
from skmultiflow.drift_detection.adwin import ADWIN

from float.feature_selection import FIRES, OFS           # online feature methods
from skmultiflow.data import FileStream             # create stream from file

import pandas as pd

In [2]:
data_loader = FileStream(filepath='merged_cesnet.csv')

In [3]:
ref_sample, _ = data_loader.next_sample(50)
data_loader.reset()

In [4]:
knn_params = {
    'n_neighbors': 5,
    'leaf_size': 5,
    'max_window_size': 250,
    'metric': 'euclidean'
}

fires_params = {
    'penalty_s': 0.1,
    'penalty_r': 1,
    'lr_mu': 0.025,
    'lr_sigma': 0.1,
    'n_total_features': data_loader.n_features,
    'n_selected_features': 10,
    'classes': data_loader.target_values,
    'baseline': "gaussian",
    'ref_sample': ref_sample
}

ofs_params = {
    'n_selected_features': 5,
    'n_total_features': data_loader.n_features,
    'baseline': "gaussian",
    'ref_sample': ref_sample    
}

### Without ADWIN

In [5]:
# no feature selection
print("EVALUATING: KNN")
accuracy, precision, recall, f1, auc, avg_processing_time, _ = run_prequential(
    classifier=KNNClassifier(**knn_params), 
    stream=data_loader, 
    feature_selector=None, 
    drift_detection=None, 
    preq_samples=20000
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")

EVALUATING: KNN
Accuracy: 0.992750, Precision: 0.992110, Recall: 0.993400, F1: 0.992755, AUC: 0.997417
Average processing time: 0.0032435930250000297


In [None]:
# no feature selection
print("EVALUATING: KNN")
accuracy, precision, recall, f1, auc, avg_processing_time, _ = run_prequential(
    classifier=KNNClassifier(**knn_params), 
    stream=data_loader, 
    feature_selector=None, 
    drift_detection=None, 
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")

# FIRES
print("EVALUATING: KNN + FIRES")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, _ = run_prequential(
    classifier=KNNClassifier(**knn_params),
    stream=data_loader, 
    feature_selector=FIRES(**fires_params), 
    drift_detection=None, 
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")

# OFS
print("EVALUATING: KNN + OFS")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, _ = run_prequential(
    classifier=KNNClassifier(**knn_params),  
    stream=data_loader, 
    feature_selector=OFS(**ofs_params), 
    drift_detection=None, 
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")

### With ADWIN

In [None]:
knn_adwin = KNNADWINClassifier(**knn_params)
knn_adwin.adwin = ADWIN(0.9)

# no feature selection
print("EVALUATING: KNN + ADWIN")
accuracy, precision, recall, f1, auc, avg_processing_time, drift_idx_list = run_prequential(
    classifier=knn_adwin, 
    stream=data_loader, 
    feature_selector=None,  
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")
print(drift_idx_list)


knn_adwin = KNNADWINClassifier(**knn_params)
knn_adwin.adwin = ADWIN(0.9)

# FIRES
print("EVALUATING: KNN + ADWIN + FIRES")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, drift_idx_list = run_prequential(
    classifier=knn_adwin, 
    stream=data_loader, 
    feature_selector=FIRES(**fires_params), 
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")
print(drift_idx_list)


knn_adwin = KNNADWINClassifier(**knn_params)
knn_adwin.adwin = ADWIN(0.9)


# OFS
print("EVALUATING: KNN + ADWIN + OFS")
accuracy, precision, recall, f1, auc, avg_processing_time, _, _, drift_idx_list = run_prequential(
    classifier=knn_adwin, 
    stream=data_loader, 
    feature_selector=OFS(**ofs_params), 
    preq_samples=data_loader.n_remaining_samples()
)
print(f"Accuracy: {accuracy:.6f}, Precision: {precision:.6f}, Recall: {recall:.6f}, F1: {f1:.6f}, AUC: {auc:.6f}")
print(f"Average processing time: {avg_processing_time}")
print(drift_idx_list)