# Perfomance Analysis

## The UNSW-NB15 data set

In [1]:
import pandas as pd

In [2]:
unsw = pd.read_csv('UNSW_NB15.csv')

In [3]:
unsw.columns

Index(['id', 'dur', 'proto', 'service', 'state', 'spkts', 'dpkts', 'sbytes',
       'dbytes', 'rate', 'sttl', 'dttl', 'sload', 'dload', 'sloss', 'dloss',
       'sinpkt', 'dinpkt', 'sjit', 'djit', 'swin', 'stcpb', 'dtcpb', 'dwin',
       'tcprtt', 'synack', 'ackdat', 'smean', 'dmean', 'trans_depth',
       'response_body_len', 'ct_srv_src', 'ct_state_ttl', 'ct_dst_ltm',
       'ct_src_dport_ltm', 'ct_dst_sport_ltm', 'ct_dst_src_ltm',
       'is_ftp_login', 'ct_ftp_cmd', 'ct_flw_http_mthd', 'ct_src_ltm',
       'ct_srv_dst', 'is_sm_ips_ports', 'attack_cat', 'label'],
      dtype='object')

## Data Preprocessing

In [4]:
cols_to_norm = ['dur','spkts','dpkts','sbytes','dbytes','rate','sttl','dttl','sload','dload','sloss','dloss','sinpkt','dinpkt','synack','ackdat','smean','dmean']

## Data Normalisation

In [5]:
unsw[cols_to_norm] = unsw[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max()-x.min() ))

In [6]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [7]:
dur = tf.feature_column.numeric_column('dur')
spkts = tf.feature_column.numeric_column('spkts')
dpkts = tf.feature_column.numeric_column('dpkts')
sbytes = tf.feature_column.numeric_column('sbytes')
dbytes = tf.feature_column.numeric_column('dbytes')
rate = tf.feature_column.numeric_column('rate')
sttl = tf.feature_column.numeric_column('sttl')
dttl = tf.feature_column.numeric_column('dttl')
sload = tf.feature_column.numeric_column('sload')
dload = tf.feature_column.numeric_column('dload')
sloss = tf.feature_column.numeric_column('sloss')
dloss = tf.feature_column.numeric_column('dloss')
sinpkt = tf.feature_column.numeric_column('sinpkt')
dinpkt = tf.feature_column.numeric_column('dinpkt')
synack = tf.feature_column.numeric_column('synack')
ackdat = tf.feature_column.numeric_column('ackat')
smean = tf.feature_column.numeric_column('smean')
dmean = tf.feature_column.numeric_column('dmean')

In [8]:
feat_cols = [dur,spkts,dpkts, sbytes, dbytes, rate, sttl, sload, dload, sloss, dloss, sinpkt,dinpkt, smean, dmean]

In [9]:
x_data = unsw.drop('label', axis = 1)

In [10]:
labels = unsw['label']

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
X_train, x_test, y_train, y_test = train_test_split(x_data, labels, test_size = 0.3, random_state = 101)

## Linear Classifier

In [13]:
input_func = tf.estimator.inputs.pandas_input_fn(x = X_train, y = y_train, batch_size=10, num_epochs=1000, shuffle=True)

In [14]:
model = tf.estimator.LinearClassifier(feature_columns=feat_cols,n_classes=2,)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\ADi\\AppData\\Local\\Temp\\tmpdjpfzohi', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000000056A5D30>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [15]:
model.train(input_fn=input_func, steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\ADi\AppData\Local\Temp\tmpdjpfzohi\model.ckpt.
INFO:tensorflow:loss = 6.931472, step = 0
INFO:tensorflow:global_step/sec: 146.628
INFO:tensorflow:loss = 8.141283, step = 100 (0.682 sec)
INFO:tensorflow:global_step/sec: 195.695
INFO:tensorflow:loss = 3.7199895, step = 200 (0.511 sec)
INFO:tensorflow:global_step/sec: 191.571
INFO:tensorflow:loss = 6.0483665, step = 300 (0.522 sec)
INFO:tensorflow:global_step/sec: 196.464
INFO:tensorflow:loss = 3.8651543, step = 400 (0.509 sec)
INFO:tensorflow:global_step/sec: 190.476
INFO:tensorflow:loss = 4.469162, step = 500 (0.524 sec)
INFO:tensorflow:global_step/sec: 184.502
INFO:tensorflow:loss = 3.1802914, step = 600 (0.543 sec)
INFO:tensorflow:global_step/sec: 

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0xad7ba58>

In [16]:
cols_to_norm = ['dur', 'spkts', 'dpkts', 'sbytes', 'dbytes', 'rate',
       'sttl', 'dttl', 'sload', 'dload', 'sloss', 'dloss', 'sinpkt', 'dinpkt',
       'sjit', 'djit', 'swin', 'stcpb', 'dtcpb', 'dwin', 'synack', 'ackdat',
       'smean', 'dmean']
len(cols_to_norm)

24

In [17]:
x_data[cols_to_norm] = x_data[cols_to_norm].apply(lambda x: (x - x.min())/(x.max()-x.min()))

In [18]:
eval_input_function = tf.estimator.inputs.pandas_input_fn(x = x_test, y = y_test, batch_size=10, num_epochs=1,shuffle=False)

In [19]:
results = model.evaluate(eval_input_function)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-03-23-08:09:57
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\ADi\AppData\Local\Temp\tmpdjpfzohi\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-03-23-08:10:28
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.8478984, accuracy_baseline = 0.6797141, auc = 0.91823685, auc_precision_recall = 0.9435493, average_loss = 0.35497555, global_step = 1000, label/mean = 0.6797141, loss = 3.5492833, precision = 0.90218526, prediction/mean = 0.6876889, recall = 0.8706195
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: C:\Users\ADi\AppData\Local\Temp\tmpdjpfzohi\model.ckpt-1000


In [20]:
results

{'accuracy': 0.8478984,
 'accuracy_baseline': 0.6797141,
 'auc': 0.91823685,
 'auc_precision_recall': 0.9435493,
 'average_loss': 0.35497555,
 'label/mean': 0.6797141,
 'loss': 3.5492833,
 'precision': 0.90218526,
 'prediction/mean': 0.6876889,
 'recall': 0.8706195,
 'global_step': 1000}

## DNN classifier

In [21]:
x_data = unsw.drop('attack_cat', axis = 1)
labels = unsw['label']
X_train, x_test, y_train, y_test = train_test_split(x_data, labels, test_size = 0.3, random_state = 101)

In [22]:
assigned_grp = tf.feature_column.categorical_column_with_vocabulary_list('label', [0, 1])

In [23]:
X_train.columns

Index(['id', 'dur', 'proto', 'service', 'state', 'spkts', 'dpkts', 'sbytes',
       'dbytes', 'rate', 'sttl', 'dttl', 'sload', 'dload', 'sloss', 'dloss',
       'sinpkt', 'dinpkt', 'sjit', 'djit', 'swin', 'stcpb', 'dtcpb', 'dwin',
       'tcprtt', 'synack', 'ackdat', 'smean', 'dmean', 'trans_depth',
       'response_body_len', 'ct_srv_src', 'ct_state_ttl', 'ct_dst_ltm',
       'ct_src_dport_ltm', 'ct_dst_sport_ltm', 'ct_dst_src_ltm',
       'is_ftp_login', 'ct_ftp_cmd', 'ct_flw_http_mthd', 'ct_src_ltm',
       'ct_srv_dst', 'is_sm_ips_ports', 'label'],
      dtype='object')

In [24]:
embedded_group_col = tf.feature_column.embedding_column(assigned_grp, dimension=2)

In [25]:
feat_cols = [dur,spkts,dpkts, sbytes, dbytes, rate, sttl, sload, dload, sloss, dloss, sinpkt,dinpkt, smean, dmean, embedded_group_col]

In [26]:
dnn_input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train, batch_size=10, num_epochs=1000, shuffle=True)

In [27]:
dnn_model = tf.estimator.DNNClassifier(hidden_units=[3,5,3], feature_columns=feat_cols, n_classes=2, optimizer = 'Adagrad')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\ADi\\AppData\\Local\\Temp\\tmpkj1syduz', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000000021F2A630>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [28]:
dnn_model.train(input_fn=dnn_input_func, steps = 1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\ADi\AppData\Local\Temp\tmpkj1syduz\model.ckpt.
INFO:tensorflow:loss = 7.203273, step = 0
INFO:tensorflow:global_step/sec: 126.582
INFO:tensorflow:loss = 5.7038803, step = 100 (0.788 sec)
INFO:tensorflow:global_step/sec: 168.919
INFO:tensorflow:loss = 6.789616, step = 200 (0.592 sec)
INFO:tensorflow:global_step/sec: 171.527
INFO:tensorflow:loss = 7.4685135, step = 300 (0.583 sec)
INFO:tensorflow:global_step/sec: 168.35
INFO:tensorflow:loss = 4.049603, step = 400 (0.594 sec)
INFO:tensorflow:global_step/sec: 161.551
INFO:tensorflow:loss = 6.8674345, step = 500 (0.619 sec)
INFO:tensorflow:global_step/sec: 151.745
INFO:tensorflow:loss = 6.857594, step = 600 (0.659 sec)
INFO:tensorflow:global_step/sec: 17

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x21f2a160>

In [29]:
dnn_eval_input_func = tf.estimator.inputs.pandas_input_fn(x = x_test, y = y_test, num_epochs=1, batch_size=10, shuffle=False)

In [30]:
results = dnn_model.evaluate(dnn_eval_input_func)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-03-23-08:10:45
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\ADi\AppData\Local\Temp\tmpkj1syduz\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-03-23-08:11:22
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.6797141, accuracy_baseline = 0.6797141, auc = 0.5, auc_precision_recall = 0.83985704, average_loss = 0.6270794, global_step = 1000, label/mean = 0.6797141, loss = 6.2699594, precision = 0.6797141, prediction/mean = 0.68255645, recall = 1.0
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: C:\Users\ADi\AppData\Local\Temp\tmpkj1syduz\model.ckpt-1000


In [31]:
results

{'accuracy': 0.6797141,
 'accuracy_baseline': 0.6797141,
 'auc': 0.5,
 'auc_precision_recall': 0.83985704,
 'average_loss': 0.6270794,
 'label/mean': 0.6797141,
 'loss': 6.2699594,
 'precision': 0.6797141,
 'prediction/mean': 0.68255645,
 'recall': 1.0,
 'global_step': 1000}

## Baseline Classifier

In [32]:
labels = unsw['label']
X_train, x_test, y_train, y_test = train_test_split(x_data, labels, test_size = 0.3, random_state = 101)

In [33]:
bc_input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train, batch_size=10, num_epochs=1000, shuffle=True)

In [34]:
bc_model = tf.estimator.BaselineClassifier(n_classes=2,optimizer=lambda: tf.train.AdamOptimizer(
    learning_rate=0.01))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\ADi\\AppData\\Local\\Temp\\tmp42lst1tq', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000000001C0B36A0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [35]:
bc_model.train(input_fn=bc_input_func, steps = 1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\ADi\AppData\Local\Temp\tmp42lst1tq\model.ckpt.
INFO:tensorflow:loss = 6.931472, step = 0
INFO:tensorflow:global_step/sec: 166.945
INFO:tensorflow:loss = 5.660366, step = 100 (0.600 sec)
INFO:tensorflow:global_step/sec: 259.067
INFO:tensorflow:loss = 7.5537105, step = 200 (0.386 sec)
INFO:tensorflow:global_step/sec: 213.22
INFO:tensorflow:loss = 7.6454997, step = 300 (0.469 sec)
INFO:tensorflow:global_step/sec: 220.264
INFO:tensorflow:loss = 5.349785, step = 400 (0.454 sec)
INFO:tensorflow:global_step/sec: 250.627
INFO:tensorflow:loss = 8.502277, step = 500 (0.399 sec)
INFO:tensorflow:global_step/sec: 255.754
INFO:tensorflow:loss = 5.330124, step = 600 (0.391 sec)
INFO:tensorflow:global_step/sec: 255

<tensorflow.python.estimator.canned.baseline.BaselineClassifier at 0x1bdfe588>

In [36]:
bc_eval_input_func = tf.estimator.inputs.pandas_input_fn(x = x_test, y = y_test, num_epochs=1, batch_size=10, shuffle=False)

In [37]:
bcresults = bc_model.evaluate(bc_eval_input_func)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-03-23-08:11:37
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\ADi\AppData\Local\Temp\tmp42lst1tq\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-03-23-08:12:02
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.6797141, accuracy_baseline = 0.6797141, auc = 0.5, auc_precision_recall = 0.83985704, average_loss = 0.6274951, global_step = 1000, label/mean = 0.6797141, loss = 6.274116, precision = 0.6797141, prediction/mean = 0.6661532, recall = 1.0
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: C:\Users\ADi\AppData\Local\Temp\tmp42lst1tq\model.ckpt-1000


In [38]:
bcresults

{'accuracy': 0.6797141,
 'accuracy_baseline': 0.6797141,
 'auc': 0.5,
 'auc_precision_recall': 0.83985704,
 'average_loss': 0.6274951,
 'label/mean': 0.6797141,
 'loss': 6.274116,
 'precision': 0.6797141,
 'prediction/mean': 0.6661532,
 'recall': 1.0,
 'global_step': 1000}

## DNNLinearCombined Classifier

In [39]:
labels = unsw['label']
X_train, x_test, y_train, y_test = train_test_split(x_data, labels, test_size = 0.3, random_state = 101)

In [40]:
assigned_grp = tf.feature_column.categorical_column_with_vocabulary_list('label', [0, 1])

In [41]:
embedded_group_col = tf.feature_column.embedding_column(assigned_grp, dimension=2)

In [42]:
dnn_feat_cols = [dur,spkts,dpkts, sbytes, dbytes, rate, sttl, sload, dload, sloss, dloss, sinpkt,dinpkt, smean, dmean, embedded_group_col]

In [43]:
lin_feat_cols = [dur,spkts,dpkts, sbytes, dbytes, rate, sttl, sload, dload, sloss, dloss, sinpkt,dinpkt, smean, dmean]

In [44]:
dnnlc_input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train, batch_size=10, num_epochs=1000, shuffle=True)

In [45]:
dnnlc_model = tf.estimator.DNNLinearCombinedClassifier(n_classes=2, linear_feature_columns=lin_feat_cols, linear_optimizer='Ftrl', dnn_feature_columns=dnn_feat_cols,dnn_hidden_units=[200,350,300], dnn_optimizer=tf.train.ProximalAdagradOptimizer(
    learning_rate=0.01,
    l1_regularization_strength=0.001,
    l2_regularization_strength=0.001))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\ADi\\AppData\\Local\\Temp\\tmptlr_1q3i', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000000001E7E1390>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [46]:
dnnlc_model.train(input_fn=dnnlc_input_func, steps = 1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\ADi\AppData\Local\Temp\tmptlr_1q3i\model.ckpt.
INFO:tensorflow:loss = 6.9837313, step = 0
INFO:tensorflow:global_step/sec: 82.7815
INFO:tensorflow:loss = 0.015594535, step = 100 (1.209 sec)
INFO:tensorflow:global_step/sec: 91.9118
INFO:tensorflow:loss = 0.0116810305, step = 200 (1.088 sec)
INFO:tensorflow:global_step/sec: 97.2763
INFO:tensorflow:loss = 0.010240341, step = 300 (1.027 sec)
INFO:tensorflow:global_step/sec: 92.8505
INFO:tensorflow:loss = 0.011735291, step = 400 (1.078 sec)
INFO:tensorflow:global_step/sec: 114.025
INFO:tensorflow:loss = 0.0071750716, step = 500 (0.876 sec)
INFO:tensorflow:global_step/sec: 100.2
INFO:tensorflow:loss = 0.012724978, step = 600 (0.998 sec)
INFO:tensorflow:gl

<tensorflow.python.estimator.canned.dnn_linear_combined.DNNLinearCombinedClassifier at 0x1e7e14e0>

In [47]:
dnnlc_eval_input_func = tf.estimator.inputs.pandas_input_fn(x = x_test, y = y_test, num_epochs=1, batch_size=10, shuffle=False)

In [48]:
dnnlcresults = dnnlc_model.evaluate(dnnlc_eval_input_func)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-03-23-08:12:29
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\ADi\AppData\Local\Temp\tmptlr_1q3i\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-03-23-08:13:08
INFO:tensorflow:Saving dict for global step 1000: accuracy = 1.0, accuracy_baseline = 0.6797141, auc = 1.0, auc_precision_recall = 1.0, average_loss = 0.0012238203, global_step = 1000, label/mean = 0.6797141, loss = 0.012236575, precision = 1.0, prediction/mean = 0.67962176, recall = 1.0
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1000: C:\Users\ADi\AppData\Local\Temp\tmptlr_1q3i\model.ckpt-1000


In [49]:
dnnlcresults

{'accuracy': 1.0,
 'accuracy_baseline': 0.6797141,
 'auc': 1.0,
 'auc_precision_recall': 1.0,
 'average_loss': 0.0012238203,
 'label/mean': 0.6797141,
 'loss': 0.012236575,
 'precision': 1.0,
 'prediction/mean': 0.67962176,
 'recall': 1.0,
 'global_step': 1000}