In [125]:
%matplotlib notebook
import IPython
from IPython.display import display, HTML
from matplotlib.backends.backend_agg import FigureCanvasAgg
import matplotlib.pyplot as plt
from skmultiflow.data import FileStream
from skmultiflow.data import SEAGenerator
from skmultiflow.trees import HoeffdingAdaptiveTreeClassifier
# from skmultiflow.trees import HAT
# from skmultiflow.meta import AdaptiveRandomForestClassifier
from skmultiflow.core import Pipeline
from skmultiflow.lazy import KNNADWINClassifier
from skmultiflow.evaluation import EvaluatePrequential
from skmultiflow.drift_detection import ADWIN
# from sklearn.linear_model import SGDClassifier
import numpy as np
import pandas as pd
import joblib
import pickle

In [104]:
model_info = {
    'model_name': 'Concept Adaptive Very Fast Decision Tree',
    'training_file': 'MLM_CVFDT.ipynb',
    'file_name': 'model_cvfdt.pkl',
    'scaler_file': 'model_cvfdt_scaler.pkl',
    'prepared_by': 'Digital Khalid',
    'library': 'skmultiflow',
    'classifier': 'Hoeffding Adaptive Tree Classifier',
    'normalization': 'Standard Scaler (Z-Score)',
    'data_file': 'flows.csv',
    'features': ['src_port', 'dst_port', 'protocol', 'first_pkt_size'],
    'target': ['elephant'],
}

In [None]:
# Save model information as json file
with open(f'model_info_{model_info["model_name"]}.json', "w") as outfile:
    json.dump(model_info, outfile)

In [14]:
# Read data file
input_file = model_info['data_file']
input_file
flows = pd.read_csv(input_file)

In [70]:
features = model_info['features']
target = model_info['target']

X = flows[features]
y = flows[target]

In [107]:
# Feed data stream from file
stream = FileStream(model_info['data_file'] , 6)

In [106]:
# stream.prepare_for_use()

New instances of the Stream class are now ready to use after instantiation.


In [108]:
hat_classifier = HoeffdingAdaptiveTreeClassifier(
    grace_period = 400,
    split_criterion = 'gini',
    split_confidence = 1e-5,
    tie_threshold = 0.005,
    binary_split = True,
    stop_mem_management = False,
    remove_poor_atts = False,
    no_preprune = False,
    leaf_prediction = 'nba',
    nb_threshold = 0,
)

# model = AdaptiveRandomForestClassifier(memory_estimate_period=1000000)
# model = KNNADWINClassifier()]

In [123]:
pipe = Pipeline([('test', hat_classifier)])

In [113]:
evaluator = EvaluatePrequential(
    max_samples = 100000,
    batch_size = 1,
    n_wait = 200,
    pretrain_size = 500,
    restart_stream = True,
    data_points_for_classification = True,   
    metrics = ['precision', 'Recall', 'Accuracy', 'Kappa', 'f1'],
    show_plot = False,
    output_file = 'log_CVFDT_training.csv',
)

In [114]:
evaluator.evaluate(stream=stream, model=pipe)
# display(HTML('<script>document.title = "scikit-multiflow Evaluation";</script>'))
# display(eval)

Prequential Evaluation
Evaluating 1 target(s).
Pre-training on 500 sample(s).
Evaluating...
 #################### [100%] [31.90s]
Processed samples: 100000
Mean performance:
M0 - Accuracy     : 0.9961
M0 - Kappa        : 0.5120
M0 - Precision: 0.3865
M0 - Recall: 0.7658


[HoeffdingAdaptiveTreeClassifier(binary_split=True, bootstrap_sampling=True,
                                 grace_period=400, leaf_prediction='nba',
                                 max_byte_size=33554432,
                                 memory_estimate_period=1000000, nb_threshold=0,
                                 no_preprune=False, nominal_attributes=None,
                                 random_state=None, remove_poor_atts=False,
                                 split_confidence=1e-05, split_criterion='gini',
                                 stop_mem_management=False, tie_threshold=0.005)]

In [128]:
pickle.dump(evaluator, open('hat.pkl', 'wb'))

PicklingError: Can't pickle <class 'skmultiflow.evaluation.evaluate_prequential.EvaluatePrequential'>: import of module 'skmultiflow.evaluation.evaluate_prequential' failed

In [131]:
joblib.dump(hat_classifier,'hat.pkl')

TypeError: argument of type 'builtin_function_or_method' is not iterable

In [132]:
model = joblib.load('hat.pkl')

EOFError: 

In [116]:
type(eval)

skmultiflow.evaluation.evaluate_prequential.EvaluatePrequential

In [117]:
model.get_info()

"HoeffdingAdaptiveTreeClassifier(binary_split=False, bootstrap_sampling=True,\n                                grace_period=200, leaf_prediction='nba',\n                                max_byte_size=33554432,\n                                memory_estimate_period=1000000, nb_threshold=0,\n                                no_preprune=False, nominal_attributes=None,\n                                random_state=None, remove_poor_atts=False,\n                                split_confidence=1e-07,\n                                split_criterion='info_gain',\n                                stop_mem_management=False, tie_threshold=0.05)"

In [120]:
model.measure_tree_depth()

0