In [6]:
from main import train_and_test_with, Summarizer, train_with_tensorflow
from utils import *

# Handy list of all the files
binet_files = get_binetflow_files()

def print_result(file, data):
    """ Just prints out the data in a nice way """
    print("For %s" % file)
    print("=" * 30)
    for key, value in sorted(data.items()):
        print("%s \t\t= %s" % (key, value))
    print("")

def evaluate_ml(interval, files, ml):
    """ Given the parameters, run the machine learning algorithm
        and print out the metrics.
    """
    if ml == 'tf':
        for file in files:
            time  = get_start_time_for(file)
            features, labels = get_feature_labels(get_saved_data(interval, time, file))
            result = {'accuracy': train_with_tensorflow(features, labels)}
            print_result(file, result)
    else:
        for file in files:
            time  = get_start_time_for(file)
            features, labels = get_feature_labels(get_saved_data(interval, time, file))
            result = result = train_and_test_with(features, labels, ml)
            print_result(file, result)

# Evaluating Machine Learning Algorithms

Point of this notebook is to compare how different algorithms perform on all the files. Change the **interval** to see how it does in other intervals. But you should leave the machine learning algorithm the same for each section. 


## Decision Trees

In [7]:
evaluate_ml(1, binet_files, 'dt')

For capture20110816.binetflow
0 		= 3372, 3436
1 		= 434, 371
accuracy 		= 0.864197530864
attacks 		= 805
f1 score 		= 0.364083640836
normal count 		= 6808
precision 		= 0.334841628959
recall 		= 0.398921832884
training size 		= 3806

For capture20110818-2.binetflow
0 		= 384, 391
1 		= 62, 55
accuracy 		= 0.939461883408
attacks 		= 117
f1 score 		= 0.761061946903
normal count 		= 775
precision 		= 0.741379310345
recall 		= 0.781818181818
training size 		= 446

For capture20110815.binetflow
0 		= 6969, 6995
1 		= 555, 530
accuracy 		= 0.865514950166
attacks 		= 1085
f1 score 		= 0.15242881072
normal count 		= 13964
precision 		= 0.137048192771
recall 		= 0.171698113208
training size 		= 7524

For capture20110815-3.binetflow
0 		= 15999, 15812
1 		= 13465, 13653
accuracy 		= 0.866757169523
attacks 		= 27118
f1 score 		= 0.856411381757
normal count 		= 31811
precision 		= 0.855285265542
recall 		= 0.857540467297
training size 		= 29464

For capture20110811.binetflow
0 		= 5053, 5085
1 		

## Random Forest

In [8]:
evaluate_ml(1, binet_files, 'rf')

For capture20110816.binetflow
0 		= 3372, 3436
1 		= 434, 371
accuracy 		= 0.909377462569
attacks 		= 805
f1 score 		= 0.241758241758
normal count 		= 6808
precision 		= 0.654761904762
recall 		= 0.148247978437
training size 		= 3806

For capture20110818-2.binetflow
0 		= 384, 391
1 		= 62, 55
accuracy 		= 0.959641255605
attacks 		= 117
f1 score 		= 0.82
normal count 		= 775
precision 		= 0.911111111111
recall 		= 0.745454545455
training size 		= 446

For capture20110815.binetflow
0 		= 6969, 6995
1 		= 555, 530
accuracy 		= 0.92877076412
attacks 		= 1085
f1 score 		= 0.056338028169
normal count 		= 13964
precision 		= 0.421052631579
recall 		= 0.0301886792453
training size 		= 7524

For capture20110815-3.binetflow
0 		= 15999, 15812
1 		= 13465, 13653
accuracy 		= 0.732869506194
attacks 		= 27118
f1 score 		= 0.679949579149
normal count 		= 31811
precision 		= 0.764259597806
recall 		= 0.612392880686
training size 		= 29464

For capture20110811.binetflow
0 		= 5053, 5085
1 		= 2373, 2

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


## Tensorflow

In [9]:
evaluate_ml(1, binet_files, 'tf')

For capture20110816.binetflow
accuracy 		= 0.89426

For capture20110818-2.binetflow
accuracy 		= 0.868834

For capture20110815.binetflow
accuracy 		= 0.927902

For capture20110815-3.binetflow
accuracy 		= 0.539819

For capture20110811.binetflow
accuracy 		= 0.682602

For capture20110812.binetflow
accuracy 		= 0.914524

For capture20110815-2.binetflow
accuracy 		= 0.712618

For capture20110819.binetflow
accuracy 		= 0.867031

For capture20110816-3.binetflow
accuracy 		= 0.965017

For capture20110818.binetflow
accuracy 		= 0.931943

For capture20110810.binetflow
accuracy 		= 0.601852

For capture20110817.binetflow
accuracy 		= 0.641582

For capture20110816-2.binetflow
accuracy 		= 0.981437

