# Dependencies

In [14]:
from skmultiflow.data import FileStream
from skmultiflow.bayes import NaiveBayes
from skmultiflow.lazy import KNNADWINClassifier
from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.evaluation import EvaluatePrequential

# Fetch dataset as data stream

In [15]:
# fetch data from GitHub as data stream 
path = "https://raw.githubusercontent.com/scikit-multiflow/streaming-datasets/master/elec.csv"
stream = FileStream(path)

# Preprare Classifiers and Evaluator

In [16]:
# create object for Naive Bayes, K-nearest Neighbor Classifier, and Adaptive Random Forest Classifier
nb = NaiveBayes()
knn = KNNADWINClassifier()
arf = AdaptiveRandomForestClassifier()

# evaluator
size = stream.n_remaining_samples()
evaluator = EvaluatePrequential(batch_size=200, 
                                pretrain_size=1000, 
                                max_samples=size, 
                                metrics=['accuracy','precision','recall', 'f1'],
                                show_plot=False, 
                                data_points_for_classification=False)


# Training Incremental Model

## Single K-nearest Neighbor Model

In [17]:
# Run evalation for single K-nearest Neighbor model
evaluator.evaluate(stream = stream, 
                   model = knn, 
                   model_names =['KNN'])


Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 1000 sample(s).
Evaluating...
 #################### [100%] [157.47s]
Processed samples: 45400
Mean performance:
KNN - Accuracy     : 0.7223
KNN - Precision: 0.6853
KNN - Recall: 0.6351
KNN - F1 score: 0.6593


[KNNADWINClassifier(leaf_size=30, max_window_size=1000, metric='euclidean',
                    n_neighbors=5)]

## Single Naive Bayes Model

In [18]:
# # Run evalation for single Naive Bayes model
evaluator.evaluate(stream = stream, model = nb, model_names =['NB'])

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 1000 sample(s).
Evaluating...
 #################### [100%] [8.64s]
Processed samples: 45400
Mean performance:
NB - Accuracy     : 0.7319
NB - Precision: 0.8336
NB - Recall: 0.4574
NB - F1 score: 0.5907


[NaiveBayes(nominal_attributes=None)]

## Adaptive Random Forest Model

In [6]:
evaluator.evaluate(stream = stream, 
                   model = arf, 
                   model_names = ["RF"])

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 1000 sample(s).
Evaluating...


  y_proba = np.asarray(y_proba)


 #################### [100%] [827.70s]
Processed samples: 45400
Mean performance:
RF - Accuracy     : 0.7887
RF - Precision: 0.7745
RF - Recall: 0.7061
RF - F1 score: 0.7387


[AdaptiveRandomForestClassifier(binary_split=False, disable_weighted_vote=False,
                                drift_detection_method=ADWIN(delta=0.001),
                                grace_period=50, lambda_value=6,
                                leaf_prediction='nba', max_byte_size=33554432,
                                max_features=2, memory_estimate_period=2000000,
                                n_estimators=10, nb_threshold=0,
                                no_preprune=False, nominal_attributes=None,
                                performance_metric='acc', random_state=None,
                                remove_poor_atts=False, split_confidence=0.01,
                                split_criterion='info_gain',
                                stop_mem_management=False, tie_threshold=0.05,

## Compare among K-nearest Neighbor, Naive Bayes and Adaptive Random Forest classifier

In [7]:
# # Run evaluation and compare two model
evaluator.evaluate(stream=stream, 
                   model=[knn, nb, arf], 
                   model_names=['KNN','NB','RF'])

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 1000 sample(s).
Evaluating...
 #################### [100%] [1167.83s]
Processed samples: 45400
Mean performance:
KNN - Accuracy     : 0.7223
KNN - Precision: 0.6853
KNN - Recall: 0.6351
KNN - F1 score: 0.6593
NB - Accuracy     : 0.7235
NB - Precision: 0.8919
NB - Recall: 0.3941
NB - F1 score: 0.5466
RF - Accuracy     : 0.7855
RF - Precision: 0.7684
RF - Recall: 0.7056
RF - F1 score: 0.7357


[KNNADWINClassifier(leaf_size=30, max_window_size=1000, metric='euclidean',
                    n_neighbors=5),
 NaiveBayes(nominal_attributes=None),
 AdaptiveRandomForestClassifier(binary_split=False, disable_weighted_vote=False,
                                drift_detection_method=ADWIN(delta=0.001),
                                grace_period=50, lambda_value=6,
                                leaf_prediction='nba', max_byte_size=33554432,
                                max_features=2, memory_estimate_period=2000000,
                                n_estimators=10, nb_threshold=0,
                                no_preprune=False, nominal_attributes=None,
                                performance_metric='acc', random_state=None,
                                remove_poor_atts=False, split_confidence=0.01,
                                split_criterion='info_gain',
                                stop_mem_management=False, tie_threshold=0.05,

## Training Naive Bayes model without built-in evaluator

In [8]:
# Setup variables to control loop and track performance
n_samples = 0
correct_cnt = 0
max_samples = size

# Train the estimator with the samples provided by the data stream
while n_samples < max_samples and stream.has_more_samples():
    X, y = stream.next_sample()
    y_pred = nb.predict(X)
    if y[0] == y_pred[0]:
        correct_cnt += 1
    nb.partial_fit(X, y)
    n_samples += 1

# Display results
print('Performance of Naive Bayes classifer')
print('{} samples analyzed.'.format(n_samples))
print('Accuracy: {}'.format(correct_cnt / n_samples))

Performance of Naive Bayes classifer
45312 samples analyzed.
Accuracy: 0.72704802259887
